xref: /freebsd-src/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp (revision cb14a3fe5122c879eae1fb480ed7ce82a699ddb6)
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/BinaryFormat/Dwarf.h"
19 #include "llvm/IR/AttributeMask.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DebugInfoMetadata.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/InstVisitor.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/IntrinsicsAArch64.h"
31 #include "llvm/IR/IntrinsicsARM.h"
32 #include "llvm/IR/IntrinsicsNVPTX.h"
33 #include "llvm/IR/IntrinsicsRISCV.h"
34 #include "llvm/IR/IntrinsicsWebAssembly.h"
35 #include "llvm/IR/IntrinsicsX86.h"
36 #include "llvm/IR/LLVMContext.h"
37 #include "llvm/IR/Metadata.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/Verifier.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/Regex.h"
43 #include "llvm/TargetParser/Triple.h"
44 #include <cstring>
45 
46 using namespace llvm;
47 
48 static cl::opt<bool>
49     DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
50                                 cl::desc("Disable autoupgrade of debug info"));
51 
52 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
53 
54 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
55 // changed their type from v4f32 to v2i64.
56 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
57                                   Function *&NewFn) {
58   // Check whether this is an old version of the function, which received
59   // v4f32 arguments.
60   Type *Arg0Type = F->getFunctionType()->getParamType(0);
61   if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
62     return false;
63 
64   // Yes, it's old, replace it with new version.
65   rename(F);
66   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
67   return true;
68 }
69 
70 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
71 // arguments have changed their type from i32 to i8.
72 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
73                                              Function *&NewFn) {
74   // Check that the last argument is an i32.
75   Type *LastArgType = F->getFunctionType()->getParamType(
76      F->getFunctionType()->getNumParams() - 1);
77   if (!LastArgType->isIntegerTy(32))
78     return false;
79 
80   // Move this function aside and map down.
81   rename(F);
82   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
83   return true;
84 }
85 
86 // Upgrade the declaration of fp compare intrinsics that change return type
87 // from scalar to vXi1 mask.
88 static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
89                                       Function *&NewFn) {
90   // Check if the return type is a vector.
91   if (F->getReturnType()->isVectorTy())
92     return false;
93 
94   rename(F);
95   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
96   return true;
97 }
98 
99 static bool UpgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
100                                     Function *&NewFn) {
101   if (F->getReturnType()->getScalarType()->isBFloatTy())
102     return false;
103 
104   rename(F);
105   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
106   return true;
107 }
108 
109 static bool UpgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
110                                       Function *&NewFn) {
111   if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
112     return false;
113 
114   rename(F);
115   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
116   return true;
117 }
118 
119 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
120   // All of the intrinsics matches below should be marked with which llvm
121   // version started autoupgrading them. At some point in the future we would
122   // like to use this information to remove upgrade code for some older
123   // intrinsics. It is currently undecided how we will determine that future
124   // point.
125   if (Name.consume_front("avx."))
126     return (Name.starts_with("blend.p") ||        // Added in 3.7
127             Name == "cvt.ps2.pd.256" ||           // Added in 3.9
128             Name == "cvtdq2.pd.256" ||            // Added in 3.9
129             Name == "cvtdq2.ps.256" ||            // Added in 7.0
130             Name.starts_with("movnt.") ||         // Added in 3.2
131             Name.starts_with("sqrt.p") ||         // Added in 7.0
132             Name.starts_with("storeu.") ||        // Added in 3.9
133             Name.starts_with("vbroadcast.s") ||   // Added in 3.5
134             Name.starts_with("vbroadcastf128") || // Added in 4.0
135             Name.starts_with("vextractf128.") ||  // Added in 3.7
136             Name.starts_with("vinsertf128.") ||   // Added in 3.7
137             Name.starts_with("vperm2f128.") ||    // Added in 6.0
138             Name.starts_with("vpermil."));        // Added in 3.1
139 
140   if (Name.consume_front("avx2."))
141     return (Name == "movntdqa" ||             // Added in 5.0
142             Name.starts_with("pabs.") ||      // Added in 6.0
143             Name.starts_with("padds.") ||     // Added in 8.0
144             Name.starts_with("paddus.") ||    // Added in 8.0
145             Name.starts_with("pblendd.") ||   // Added in 3.7
146             Name == "pblendw" ||              // Added in 3.7
147             Name.starts_with("pbroadcast") || // Added in 3.8
148             Name.starts_with("pcmpeq.") ||    // Added in 3.1
149             Name.starts_with("pcmpgt.") ||    // Added in 3.1
150             Name.starts_with("pmax") ||       // Added in 3.9
151             Name.starts_with("pmin") ||       // Added in 3.9
152             Name.starts_with("pmovsx") ||     // Added in 3.9
153             Name.starts_with("pmovzx") ||     // Added in 3.9
154             Name == "pmul.dq" ||              // Added in 7.0
155             Name == "pmulu.dq" ||             // Added in 7.0
156             Name.starts_with("psll.dq") ||    // Added in 3.7
157             Name.starts_with("psrl.dq") ||    // Added in 3.7
158             Name.starts_with("psubs.") ||     // Added in 8.0
159             Name.starts_with("psubus.") ||    // Added in 8.0
160             Name.starts_with("vbroadcast") || // Added in 3.8
161             Name == "vbroadcasti128" ||       // Added in 3.7
162             Name == "vextracti128" ||         // Added in 3.7
163             Name == "vinserti128" ||          // Added in 3.7
164             Name == "vperm2i128");            // Added in 6.0
165 
166   if (Name.consume_front("avx512.")) {
167     if (Name.consume_front("mask."))
168       // 'avx512.mask.*'
169       return (Name.starts_with("add.p") ||       // Added in 7.0. 128/256 in 4.0
170               Name.starts_with("and.") ||        // Added in 3.9
171               Name.starts_with("andn.") ||       // Added in 3.9
172               Name.starts_with("broadcast.s") || // Added in 3.9
173               Name.starts_with("broadcastf32x4.") || // Added in 6.0
174               Name.starts_with("broadcastf32x8.") || // Added in 6.0
175               Name.starts_with("broadcastf64x2.") || // Added in 6.0
176               Name.starts_with("broadcastf64x4.") || // Added in 6.0
177               Name.starts_with("broadcasti32x4.") || // Added in 6.0
178               Name.starts_with("broadcasti32x8.") || // Added in 6.0
179               Name.starts_with("broadcasti64x2.") || // Added in 6.0
180               Name.starts_with("broadcasti64x4.") || // Added in 6.0
181               Name.starts_with("cmp.b") ||           // Added in 5.0
182               Name.starts_with("cmp.d") ||           // Added in 5.0
183               Name.starts_with("cmp.q") ||           // Added in 5.0
184               Name.starts_with("cmp.w") ||           // Added in 5.0
185               Name.starts_with("compress.b") ||      // Added in 9.0
186               Name.starts_with("compress.d") ||      // Added in 9.0
187               Name.starts_with("compress.p") ||      // Added in 9.0
188               Name.starts_with("compress.q") ||      // Added in 9.0
189               Name.starts_with("compress.store.") || // Added in 7.0
190               Name.starts_with("compress.w") ||      // Added in 9.0
191               Name.starts_with("conflict.") ||       // Added in 9.0
192               Name.starts_with("cvtdq2pd.") ||       // Added in 4.0
193               Name.starts_with("cvtdq2ps.") ||       // Added in 7.0 updated 9.0
194               Name == "cvtpd2dq.256" ||              // Added in 7.0
195               Name == "cvtpd2ps.256" ||              // Added in 7.0
196               Name == "cvtps2pd.128" ||              // Added in 7.0
197               Name == "cvtps2pd.256" ||              // Added in 7.0
198               Name.starts_with("cvtqq2pd.") ||       // Added in 7.0 updated 9.0
199               Name == "cvtqq2ps.256" ||              // Added in 9.0
200               Name == "cvtqq2ps.512" ||              // Added in 9.0
201               Name == "cvttpd2dq.256" ||             // Added in 7.0
202               Name == "cvttps2dq.128" ||             // Added in 7.0
203               Name == "cvttps2dq.256" ||             // Added in 7.0
204               Name.starts_with("cvtudq2pd.") ||      // Added in 4.0
205               Name.starts_with("cvtudq2ps.") ||      // Added in 7.0 updated 9.0
206               Name.starts_with("cvtuqq2pd.") ||      // Added in 7.0 updated 9.0
207               Name == "cvtuqq2ps.256" ||             // Added in 9.0
208               Name == "cvtuqq2ps.512" ||             // Added in 9.0
209               Name.starts_with("dbpsadbw.") ||       // Added in 7.0
210               Name.starts_with("div.p") ||    // Added in 7.0. 128/256 in 4.0
211               Name.starts_with("expand.b") || // Added in 9.0
212               Name.starts_with("expand.d") || // Added in 9.0
213               Name.starts_with("expand.load.") || // Added in 7.0
214               Name.starts_with("expand.p") ||     // Added in 9.0
215               Name.starts_with("expand.q") ||     // Added in 9.0
216               Name.starts_with("expand.w") ||     // Added in 9.0
217               Name.starts_with("fpclass.p") ||    // Added in 7.0
218               Name.starts_with("insert") ||       // Added in 4.0
219               Name.starts_with("load.") ||        // Added in 3.9
220               Name.starts_with("loadu.") ||       // Added in 3.9
221               Name.starts_with("lzcnt.") ||       // Added in 5.0
222               Name.starts_with("max.p") ||       // Added in 7.0. 128/256 in 5.0
223               Name.starts_with("min.p") ||       // Added in 7.0. 128/256 in 5.0
224               Name.starts_with("movddup") ||     // Added in 3.9
225               Name.starts_with("move.s") ||      // Added in 4.0
226               Name.starts_with("movshdup") ||    // Added in 3.9
227               Name.starts_with("movsldup") ||    // Added in 3.9
228               Name.starts_with("mul.p") ||       // Added in 7.0. 128/256 in 4.0
229               Name.starts_with("or.") ||         // Added in 3.9
230               Name.starts_with("pabs.") ||       // Added in 6.0
231               Name.starts_with("packssdw.") ||   // Added in 5.0
232               Name.starts_with("packsswb.") ||   // Added in 5.0
233               Name.starts_with("packusdw.") ||   // Added in 5.0
234               Name.starts_with("packuswb.") ||   // Added in 5.0
235               Name.starts_with("padd.") ||       // Added in 4.0
236               Name.starts_with("padds.") ||      // Added in 8.0
237               Name.starts_with("paddus.") ||     // Added in 8.0
238               Name.starts_with("palignr.") ||    // Added in 3.9
239               Name.starts_with("pand.") ||       // Added in 3.9
240               Name.starts_with("pandn.") ||      // Added in 3.9
241               Name.starts_with("pavg") ||        // Added in 6.0
242               Name.starts_with("pbroadcast") ||  // Added in 6.0
243               Name.starts_with("pcmpeq.") ||     // Added in 3.9
244               Name.starts_with("pcmpgt.") ||     // Added in 3.9
245               Name.starts_with("perm.df.") ||    // Added in 3.9
246               Name.starts_with("perm.di.") ||    // Added in 3.9
247               Name.starts_with("permvar.") ||    // Added in 7.0
248               Name.starts_with("pmaddubs.w.") || // Added in 7.0
249               Name.starts_with("pmaddw.d.") ||   // Added in 7.0
250               Name.starts_with("pmax") ||        // Added in 4.0
251               Name.starts_with("pmin") ||        // Added in 4.0
252               Name == "pmov.qd.256" ||           // Added in 9.0
253               Name == "pmov.qd.512" ||           // Added in 9.0
254               Name == "pmov.wb.256" ||           // Added in 9.0
255               Name == "pmov.wb.512" ||           // Added in 9.0
256               Name.starts_with("pmovsx") ||      // Added in 4.0
257               Name.starts_with("pmovzx") ||      // Added in 4.0
258               Name.starts_with("pmul.dq.") ||    // Added in 4.0
259               Name.starts_with("pmul.hr.sw.") || // Added in 7.0
260               Name.starts_with("pmulh.w.") ||    // Added in 7.0
261               Name.starts_with("pmulhu.w.") ||   // Added in 7.0
262               Name.starts_with("pmull.") ||      // Added in 4.0
263               Name.starts_with("pmultishift.qb.") || // Added in 8.0
264               Name.starts_with("pmulu.dq.") ||       // Added in 4.0
265               Name.starts_with("por.") ||            // Added in 3.9
266               Name.starts_with("prol.") ||           // Added in 8.0
267               Name.starts_with("prolv.") ||          // Added in 8.0
268               Name.starts_with("pror.") ||           // Added in 8.0
269               Name.starts_with("prorv.") ||          // Added in 8.0
270               Name.starts_with("pshuf.b.") ||        // Added in 4.0
271               Name.starts_with("pshuf.d.") ||        // Added in 3.9
272               Name.starts_with("pshufh.w.") ||       // Added in 3.9
273               Name.starts_with("pshufl.w.") ||       // Added in 3.9
274               Name.starts_with("psll.d") ||          // Added in 4.0
275               Name.starts_with("psll.q") ||          // Added in 4.0
276               Name.starts_with("psll.w") ||          // Added in 4.0
277               Name.starts_with("pslli") ||           // Added in 4.0
278               Name.starts_with("psllv") ||           // Added in 4.0
279               Name.starts_with("psra.d") ||          // Added in 4.0
280               Name.starts_with("psra.q") ||          // Added in 4.0
281               Name.starts_with("psra.w") ||          // Added in 4.0
282               Name.starts_with("psrai") ||           // Added in 4.0
283               Name.starts_with("psrav") ||           // Added in 4.0
284               Name.starts_with("psrl.d") ||          // Added in 4.0
285               Name.starts_with("psrl.q") ||          // Added in 4.0
286               Name.starts_with("psrl.w") ||          // Added in 4.0
287               Name.starts_with("psrli") ||           // Added in 4.0
288               Name.starts_with("psrlv") ||           // Added in 4.0
289               Name.starts_with("psub.") ||           // Added in 4.0
290               Name.starts_with("psubs.") ||          // Added in 8.0
291               Name.starts_with("psubus.") ||         // Added in 8.0
292               Name.starts_with("pternlog.") ||       // Added in 7.0
293               Name.starts_with("punpckh") ||         // Added in 3.9
294               Name.starts_with("punpckl") ||         // Added in 3.9
295               Name.starts_with("pxor.") ||           // Added in 3.9
296               Name.starts_with("shuf.f") ||          // Added in 6.0
297               Name.starts_with("shuf.i") ||          // Added in 6.0
298               Name.starts_with("shuf.p") ||          // Added in 4.0
299               Name.starts_with("sqrt.p") ||          // Added in 7.0
300               Name.starts_with("store.b.") ||        // Added in 3.9
301               Name.starts_with("store.d.") ||        // Added in 3.9
302               Name.starts_with("store.p") ||         // Added in 3.9
303               Name.starts_with("store.q.") ||        // Added in 3.9
304               Name.starts_with("store.w.") ||        // Added in 3.9
305               Name == "store.ss" ||                  // Added in 7.0
306               Name.starts_with("storeu.") ||         // Added in 3.9
307               Name.starts_with("sub.p") ||       // Added in 7.0. 128/256 in 4.0
308               Name.starts_with("ucmp.") ||       // Added in 5.0
309               Name.starts_with("unpckh.") ||     // Added in 3.9
310               Name.starts_with("unpckl.") ||     // Added in 3.9
311               Name.starts_with("valign.") ||     // Added in 4.0
312               Name == "vcvtph2ps.128" ||         // Added in 11.0
313               Name == "vcvtph2ps.256" ||         // Added in 11.0
314               Name.starts_with("vextract") ||    // Added in 4.0
315               Name.starts_with("vfmadd.") ||     // Added in 7.0
316               Name.starts_with("vfmaddsub.") ||  // Added in 7.0
317               Name.starts_with("vfnmadd.") ||    // Added in 7.0
318               Name.starts_with("vfnmsub.") ||    // Added in 7.0
319               Name.starts_with("vpdpbusd.") ||   // Added in 7.0
320               Name.starts_with("vpdpbusds.") ||  // Added in 7.0
321               Name.starts_with("vpdpwssd.") ||   // Added in 7.0
322               Name.starts_with("vpdpwssds.") ||  // Added in 7.0
323               Name.starts_with("vpermi2var.") || // Added in 7.0
324               Name.starts_with("vpermil.p") ||   // Added in 3.9
325               Name.starts_with("vpermilvar.") || // Added in 4.0
326               Name.starts_with("vpermt2var.") || // Added in 7.0
327               Name.starts_with("vpmadd52") ||    // Added in 7.0
328               Name.starts_with("vpshld.") ||     // Added in 7.0
329               Name.starts_with("vpshldv.") ||    // Added in 8.0
330               Name.starts_with("vpshrd.") ||     // Added in 7.0
331               Name.starts_with("vpshrdv.") ||    // Added in 8.0
332               Name.starts_with("vpshufbitqmb.") || // Added in 8.0
333               Name.starts_with("xor."));           // Added in 3.9
334 
335     if (Name.consume_front("mask3."))
336       // 'avx512.mask3.*'
337       return (Name.starts_with("vfmadd.") ||    // Added in 7.0
338               Name.starts_with("vfmaddsub.") || // Added in 7.0
339               Name.starts_with("vfmsub.") ||    // Added in 7.0
340               Name.starts_with("vfmsubadd.") || // Added in 7.0
341               Name.starts_with("vfnmsub."));    // Added in 7.0
342 
343     if (Name.consume_front("maskz."))
344       // 'avx512.maskz.*'
345       return (Name.starts_with("pternlog.") ||   // Added in 7.0
346               Name.starts_with("vfmadd.") ||     // Added in 7.0
347               Name.starts_with("vfmaddsub.") ||  // Added in 7.0
348               Name.starts_with("vpdpbusd.") ||   // Added in 7.0
349               Name.starts_with("vpdpbusds.") ||  // Added in 7.0
350               Name.starts_with("vpdpwssd.") ||   // Added in 7.0
351               Name.starts_with("vpdpwssds.") ||  // Added in 7.0
352               Name.starts_with("vpermt2var.") || // Added in 7.0
353               Name.starts_with("vpmadd52") ||    // Added in 7.0
354               Name.starts_with("vpshldv.") ||    // Added in 8.0
355               Name.starts_with("vpshrdv."));     // Added in 8.0
356 
357     // 'avx512.*'
358     return (Name == "movntdqa" ||               // Added in 5.0
359             Name == "pmul.dq.512" ||            // Added in 7.0
360             Name == "pmulu.dq.512" ||           // Added in 7.0
361             Name.starts_with("broadcastm") ||   // Added in 6.0
362             Name.starts_with("cmp.p") ||        // Added in 12.0
363             Name.starts_with("cvtb2mask.") ||   // Added in 7.0
364             Name.starts_with("cvtd2mask.") ||   // Added in 7.0
365             Name.starts_with("cvtmask2") ||     // Added in 5.0
366             Name.starts_with("cvtq2mask.") ||   // Added in 7.0
367             Name == "cvtusi2sd" ||              // Added in 7.0
368             Name.starts_with("cvtw2mask.") ||   // Added in 7.0
369             Name == "kand.w" ||                 // Added in 7.0
370             Name == "kandn.w" ||                // Added in 7.0
371             Name == "knot.w" ||                 // Added in 7.0
372             Name == "kor.w" ||                  // Added in 7.0
373             Name == "kortestc.w" ||             // Added in 7.0
374             Name == "kortestz.w" ||             // Added in 7.0
375             Name.starts_with("kunpck") ||       // added in 6.0
376             Name == "kxnor.w" ||                // Added in 7.0
377             Name == "kxor.w" ||                 // Added in 7.0
378             Name.starts_with("padds.") ||       // Added in 8.0
379             Name.starts_with("pbroadcast") ||   // Added in 3.9
380             Name.starts_with("prol") ||         // Added in 8.0
381             Name.starts_with("pror") ||         // Added in 8.0
382             Name.starts_with("psll.dq") ||      // Added in 3.9
383             Name.starts_with("psrl.dq") ||      // Added in 3.9
384             Name.starts_with("psubs.") ||       // Added in 8.0
385             Name.starts_with("ptestm") ||       // Added in 6.0
386             Name.starts_with("ptestnm") ||      // Added in 6.0
387             Name.starts_with("storent.") ||     // Added in 3.9
388             Name.starts_with("vbroadcast.s") || // Added in 7.0
389             Name.starts_with("vpshld.") ||      // Added in 8.0
390             Name.starts_with("vpshrd."));       // Added in 8.0
391   }
392 
393   if (Name.consume_front("fma."))
394     return (Name.starts_with("vfmadd.") ||    // Added in 7.0
395             Name.starts_with("vfmsub.") ||    // Added in 7.0
396             Name.starts_with("vfmsubadd.") || // Added in 7.0
397             Name.starts_with("vfnmadd.") ||   // Added in 7.0
398             Name.starts_with("vfnmsub."));    // Added in 7.0
399 
400   if (Name.consume_front("fma4."))
401     return Name.starts_with("vfmadd.s"); // Added in 7.0
402 
403   if (Name.consume_front("sse."))
404     return (Name == "add.ss" ||            // Added in 4.0
405             Name == "cvtsi2ss" ||          // Added in 7.0
406             Name == "cvtsi642ss" ||        // Added in 7.0
407             Name == "div.ss" ||            // Added in 4.0
408             Name == "mul.ss" ||            // Added in 4.0
409             Name.starts_with("sqrt.p") ||  // Added in 7.0
410             Name == "sqrt.ss" ||           // Added in 7.0
411             Name.starts_with("storeu.") || // Added in 3.9
412             Name == "sub.ss");             // Added in 4.0
413 
414   if (Name.consume_front("sse2."))
415     return (Name == "add.sd" ||            // Added in 4.0
416             Name == "cvtdq2pd" ||          // Added in 3.9
417             Name == "cvtdq2ps" ||          // Added in 7.0
418             Name == "cvtps2pd" ||          // Added in 3.9
419             Name == "cvtsi2sd" ||          // Added in 7.0
420             Name == "cvtsi642sd" ||        // Added in 7.0
421             Name == "cvtss2sd" ||          // Added in 7.0
422             Name == "div.sd" ||            // Added in 4.0
423             Name == "mul.sd" ||            // Added in 4.0
424             Name.starts_with("padds.") ||  // Added in 8.0
425             Name.starts_with("paddus.") || // Added in 8.0
426             Name.starts_with("pcmpeq.") || // Added in 3.1
427             Name.starts_with("pcmpgt.") || // Added in 3.1
428             Name == "pmaxs.w" ||           // Added in 3.9
429             Name == "pmaxu.b" ||           // Added in 3.9
430             Name == "pmins.w" ||           // Added in 3.9
431             Name == "pminu.b" ||           // Added in 3.9
432             Name == "pmulu.dq" ||          // Added in 7.0
433             Name.starts_with("pshuf") ||   // Added in 3.9
434             Name.starts_with("psll.dq") || // Added in 3.7
435             Name.starts_with("psrl.dq") || // Added in 3.7
436             Name.starts_with("psubs.") ||  // Added in 8.0
437             Name.starts_with("psubus.") || // Added in 8.0
438             Name.starts_with("sqrt.p") ||  // Added in 7.0
439             Name == "sqrt.sd" ||           // Added in 7.0
440             Name == "storel.dq" ||         // Added in 3.9
441             Name.starts_with("storeu.") || // Added in 3.9
442             Name == "sub.sd");             // Added in 4.0
443 
444   if (Name.consume_front("sse41."))
445     return (Name.starts_with("blendp") || // Added in 3.7
446             Name == "movntdqa" ||         // Added in 5.0
447             Name == "pblendw" ||          // Added in 3.7
448             Name == "pmaxsb" ||           // Added in 3.9
449             Name == "pmaxsd" ||           // Added in 3.9
450             Name == "pmaxud" ||           // Added in 3.9
451             Name == "pmaxuw" ||           // Added in 3.9
452             Name == "pminsb" ||           // Added in 3.9
453             Name == "pminsd" ||           // Added in 3.9
454             Name == "pminud" ||           // Added in 3.9
455             Name == "pminuw" ||           // Added in 3.9
456             Name.starts_with("pmovsx") || // Added in 3.8
457             Name.starts_with("pmovzx") || // Added in 3.9
458             Name == "pmuldq");            // Added in 7.0
459 
460   if (Name.consume_front("sse42."))
461     return Name == "crc32.64.8"; // Added in 3.4
462 
463   if (Name.consume_front("sse4a."))
464     return Name.starts_with("movnt."); // Added in 3.9
465 
466   if (Name.consume_front("ssse3."))
467     return (Name == "pabs.b.128" || // Added in 6.0
468             Name == "pabs.d.128" || // Added in 6.0
469             Name == "pabs.w.128");  // Added in 6.0
470 
471   if (Name.consume_front("xop."))
472     return (Name == "vpcmov" ||          // Added in 3.8
473             Name == "vpcmov.256" ||      // Added in 5.0
474             Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
475             Name.starts_with("vprot"));  // Added in 8.0
476 
477   return (Name == "addcarry.u32" ||        // Added in 8.0
478           Name == "addcarry.u64" ||        // Added in 8.0
479           Name == "addcarryx.u32" ||       // Added in 8.0
480           Name == "addcarryx.u64" ||       // Added in 8.0
481           Name == "subborrow.u32" ||       // Added in 8.0
482           Name == "subborrow.u64" ||       // Added in 8.0
483           Name.starts_with("vcvtph2ps.")); // Added in 11.0
484 }
485 
486 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
487                                         Function *&NewFn) {
488   // Only handle intrinsics that start with "x86.".
489   if (!Name.consume_front("x86."))
490     return false;
491 
492   if (ShouldUpgradeX86Intrinsic(F, Name)) {
493     NewFn = nullptr;
494     return true;
495   }
496 
497   if (Name == "rdtscp") { // Added in 8.0
498     // If this intrinsic has 0 operands, it's the new version.
499     if (F->getFunctionType()->getNumParams() == 0)
500       return false;
501 
502     rename(F);
503     NewFn = Intrinsic::getDeclaration(F->getParent(),
504                                       Intrinsic::x86_rdtscp);
505     return true;
506   }
507 
508   Intrinsic::ID ID;
509 
510   // SSE4.1 ptest functions may have an old signature.
511   if (Name.consume_front("sse41.ptest")) { // Added in 3.2
512     ID = StringSwitch<Intrinsic::ID>(Name)
513              .Case("c", Intrinsic::x86_sse41_ptestc)
514              .Case("z", Intrinsic::x86_sse41_ptestz)
515              .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
516              .Default(Intrinsic::not_intrinsic);
517     if (ID != Intrinsic::not_intrinsic)
518       return UpgradePTESTIntrinsic(F, ID, NewFn);
519 
520     return false;
521   }
522 
523   // Several blend and other instructions with masks used the wrong number of
524   // bits.
525 
526   // Added in 3.6
527   ID = StringSwitch<Intrinsic::ID>(Name)
528            .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
529            .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
530            .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
531            .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
532            .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
533            .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
534            .Default(Intrinsic::not_intrinsic);
535   if (ID != Intrinsic::not_intrinsic)
536     return UpgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
537 
538   if (Name.consume_front("avx512.mask.cmp.")) {
539     // Added in 7.0
540     ID = StringSwitch<Intrinsic::ID>(Name)
541              .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
542              .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
543              .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
544              .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
545              .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
546              .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
547              .Default(Intrinsic::not_intrinsic);
548     if (ID != Intrinsic::not_intrinsic)
549       return UpgradeX86MaskedFPCompare(F, ID, NewFn);
550     return false; // No other 'x86.avx523.mask.cmp.*'.
551   }
552 
553   if (Name.consume_front("avx512bf16.")) {
554     // Added in 9.0
555     ID = StringSwitch<Intrinsic::ID>(Name)
556              .Case("cvtne2ps2bf16.128",
557                    Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
558              .Case("cvtne2ps2bf16.256",
559                    Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
560              .Case("cvtne2ps2bf16.512",
561                    Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
562              .Case("mask.cvtneps2bf16.128",
563                    Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
564              .Case("cvtneps2bf16.256",
565                    Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
566              .Case("cvtneps2bf16.512",
567                    Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
568              .Default(Intrinsic::not_intrinsic);
569     if (ID != Intrinsic::not_intrinsic)
570       return UpgradeX86BF16Intrinsic(F, ID, NewFn);
571 
572     // Added in 9.0
573     ID = StringSwitch<Intrinsic::ID>(Name)
574              .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
575              .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
576              .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
577              .Default(Intrinsic::not_intrinsic);
578     if (ID != Intrinsic::not_intrinsic)
579       return UpgradeX86BF16DPIntrinsic(F, ID, NewFn);
580     return false; // No other 'x86.avx512bf16.*'.
581   }
582 
583   if (Name.consume_front("xop.")) {
584     Intrinsic::ID ID = Intrinsic::not_intrinsic;
585     if (Name.starts_with("vpermil2")) { // Added in 3.9
586       // Upgrade any XOP PERMIL2 index operand still using a float/double
587       // vector.
588       auto Idx = F->getFunctionType()->getParamType(2);
589       if (Idx->isFPOrFPVectorTy()) {
590         unsigned IdxSize = Idx->getPrimitiveSizeInBits();
591         unsigned EltSize = Idx->getScalarSizeInBits();
592         if (EltSize == 64 && IdxSize == 128)
593           ID = Intrinsic::x86_xop_vpermil2pd;
594         else if (EltSize == 32 && IdxSize == 128)
595           ID = Intrinsic::x86_xop_vpermil2ps;
596         else if (EltSize == 64 && IdxSize == 256)
597           ID = Intrinsic::x86_xop_vpermil2pd_256;
598         else
599           ID = Intrinsic::x86_xop_vpermil2ps_256;
600       }
601     } else if (F->arg_size() == 2)
602       // frcz.ss/sd may need to have an argument dropped. Added in 3.2
603       ID = StringSwitch<Intrinsic::ID>(Name)
604                .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
605                .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
606                .Default(Intrinsic::not_intrinsic);
607 
608     if (ID != Intrinsic::not_intrinsic) {
609       rename(F);
610       NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
611       return true;
612     }
613     return false; // No other 'x86.xop.*'
614   }
615 
616   if (Name == "seh.recoverfp") {
617     NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
618     return true;
619   }
620 
621   return false;
622 }
623 
624 static Intrinsic::ID ShouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
625   if (Name.consume_front("abs."))
626     return StringSwitch<Intrinsic::ID>(Name)
627         .Case("bf16", Intrinsic::nvvm_abs_bf16)
628         .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
629         .Default(Intrinsic::not_intrinsic);
630 
631   if (Name.consume_front("fma.rn."))
632     return StringSwitch<Intrinsic::ID>(Name)
633         .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
634         .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
635         .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
636         .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
637         .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
638         .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
639         .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
640         .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
641         .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
642         .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
643         .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
644         .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
645         .Default(Intrinsic::not_intrinsic);
646 
647   if (Name.consume_front("fmax."))
648     return StringSwitch<Intrinsic::ID>(Name)
649         .Case("bf16", Intrinsic::nvvm_fmax_bf16)
650         .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
651         .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
652         .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
653         .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
654         .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
655         .Case("ftz.nan.xorsign.abs.bf16",
656               Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
657         .Case("ftz.nan.xorsign.abs.bf16x2",
658               Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
659         .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
660         .Case("ftz.xorsign.abs.bf16x2",
661               Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
662         .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
663         .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
664         .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
665         .Case("nan.xorsign.abs.bf16x2",
666               Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
667         .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
668         .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
669         .Default(Intrinsic::not_intrinsic);
670 
671   if (Name.consume_front("fmin."))
672     return StringSwitch<Intrinsic::ID>(Name)
673         .Case("bf16", Intrinsic::nvvm_fmin_bf16)
674         .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
675         .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
676         .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
677         .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
678         .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
679         .Case("ftz.nan.xorsign.abs.bf16",
680               Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
681         .Case("ftz.nan.xorsign.abs.bf16x2",
682               Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
683         .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
684         .Case("ftz.xorsign.abs.bf16x2",
685               Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
686         .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
687         .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
688         .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
689         .Case("nan.xorsign.abs.bf16x2",
690               Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
691         .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
692         .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
693         .Default(Intrinsic::not_intrinsic);
694 
695   if (Name.consume_front("neg."))
696     return StringSwitch<Intrinsic::ID>(Name)
697         .Case("bf16", Intrinsic::nvvm_neg_bf16)
698         .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
699         .Default(Intrinsic::not_intrinsic);
700 
701   return Intrinsic::not_intrinsic;
702 }
703 
704 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
705   assert(F && "Illegal to upgrade a non-existent Function.");
706 
707   StringRef Name = F->getName();
708 
709   // Quickly eliminate it, if it's not a candidate.
710   if (!Name.consume_front("llvm.") || Name.empty())
711     return false;
712 
713   switch (Name[0]) {
714   default: break;
715   case 'a': {
716     if (Name.starts_with("arm.rbit") || Name.starts_with("aarch64.rbit")) {
717       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
718                                         F->arg_begin()->getType());
719       return true;
720     }
721     if (Name.starts_with("aarch64.neon.frintn")) {
722       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
723                                         F->arg_begin()->getType());
724       return true;
725     }
726     if (Name.starts_with("aarch64.neon.rbit")) {
727       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
728                                         F->arg_begin()->getType());
729       return true;
730     }
731     if (Name == "aarch64.sve.bfdot.lane") {
732       NewFn = Intrinsic::getDeclaration(F->getParent(),
733                                         Intrinsic::aarch64_sve_bfdot_lane_v2);
734       return true;
735     }
736     if (Name == "aarch64.sve.bfmlalb.lane") {
737       NewFn = Intrinsic::getDeclaration(F->getParent(),
738                                         Intrinsic::aarch64_sve_bfmlalb_lane_v2);
739       return true;
740     }
741     if (Name == "aarch64.sve.bfmlalt.lane") {
742       NewFn = Intrinsic::getDeclaration(F->getParent(),
743                                         Intrinsic::aarch64_sve_bfmlalt_lane_v2);
744       return true;
745     }
746     static const Regex LdRegex("^aarch64\\.sve\\.ld[234](.nxv[a-z0-9]+|$)");
747     if (LdRegex.match(Name)) {
748       Type *ScalarTy =
749           dyn_cast<VectorType>(F->getReturnType())->getElementType();
750       ElementCount EC =
751           dyn_cast<VectorType>(F->arg_begin()->getType())->getElementCount();
752       Type *Ty = VectorType::get(ScalarTy, EC);
753       Intrinsic::ID ID =
754           StringSwitch<Intrinsic::ID>(Name)
755               .StartsWith("aarch64.sve.ld2", Intrinsic::aarch64_sve_ld2_sret)
756               .StartsWith("aarch64.sve.ld3", Intrinsic::aarch64_sve_ld3_sret)
757               .StartsWith("aarch64.sve.ld4", Intrinsic::aarch64_sve_ld4_sret)
758               .Default(Intrinsic::not_intrinsic);
759       NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Ty);
760       return true;
761     }
762     if (Name.starts_with("aarch64.sve.tuple.get")) {
763       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
764       NewFn = Intrinsic::getDeclaration(F->getParent(),
765                                         Intrinsic::vector_extract, Tys);
766       return true;
767     }
768     if (Name.starts_with("aarch64.sve.tuple.set")) {
769       auto Args = F->getFunctionType()->params();
770       Type *Tys[] = {Args[0], Args[2], Args[1]};
771       NewFn = Intrinsic::getDeclaration(F->getParent(),
772                                         Intrinsic::vector_insert, Tys);
773       return true;
774     }
775     static const Regex CreateTupleRegex(
776         "^aarch64\\.sve\\.tuple\\.create[234](.nxv[a-z0-9]+|$)");
777     if (CreateTupleRegex.match(Name)) {
778       auto Args = F->getFunctionType()->params();
779       Type *Tys[] = {F->getReturnType(), Args[1]};
780       NewFn = Intrinsic::getDeclaration(F->getParent(),
781                                         Intrinsic::vector_insert, Tys);
782       return true;
783     }
784     if (Name.starts_with("arm.neon.vclz")) {
785       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
786                                         F->arg_begin()->getType());
787       return true;
788     }
789     if (Name.starts_with("arm.neon.vcnt")) {
790       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
791                                         F->arg_begin()->getType());
792       return true;
793     }
794     static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
795     if (vstRegex.match(Name)) {
796       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
797                                                 Intrinsic::arm_neon_vst2,
798                                                 Intrinsic::arm_neon_vst3,
799                                                 Intrinsic::arm_neon_vst4};
800 
801       static const Intrinsic::ID StoreLaneInts[] = {
802         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
803         Intrinsic::arm_neon_vst4lane
804       };
805 
806       auto fArgs = F->getFunctionType()->params();
807       Type *Tys[] = {fArgs[0], fArgs[1]};
808       if (!Name.contains("lane"))
809         NewFn = Intrinsic::getDeclaration(F->getParent(),
810                                           StoreInts[fArgs.size() - 3], Tys);
811       else
812         NewFn = Intrinsic::getDeclaration(F->getParent(),
813                                           StoreLaneInts[fArgs.size() - 5], Tys);
814       return true;
815     }
816     if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
817       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
818       return true;
819     }
820     if (Name.starts_with("arm.neon.vqadds.")) {
821       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
822                                         F->arg_begin()->getType());
823       return true;
824     }
825     if (Name.starts_with("arm.neon.vqaddu.")) {
826       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
827                                         F->arg_begin()->getType());
828       return true;
829     }
830     if (Name.starts_with("arm.neon.vqsubs.")) {
831       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
832                                         F->arg_begin()->getType());
833       return true;
834     }
835     if (Name.starts_with("arm.neon.vqsubu.")) {
836       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
837                                         F->arg_begin()->getType());
838       return true;
839     }
840     if (Name.starts_with("aarch64.neon.addp")) {
841       if (F->arg_size() != 2)
842         break; // Invalid IR.
843       VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
844       if (Ty && Ty->getElementType()->isFloatingPointTy()) {
845         NewFn = Intrinsic::getDeclaration(F->getParent(),
846                                           Intrinsic::aarch64_neon_faddp, Ty);
847         return true;
848       }
849     }
850 
851     // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
852     // respectively
853     if ((Name.starts_with("arm.neon.bfdot.") ||
854          Name.starts_with("aarch64.neon.bfdot.")) &&
855         Name.ends_with("i8")) {
856       Intrinsic::ID IID =
857           StringSwitch<Intrinsic::ID>(Name)
858               .Cases("arm.neon.bfdot.v2f32.v8i8",
859                      "arm.neon.bfdot.v4f32.v16i8",
860                      Intrinsic::arm_neon_bfdot)
861               .Cases("aarch64.neon.bfdot.v2f32.v8i8",
862                      "aarch64.neon.bfdot.v4f32.v16i8",
863                      Intrinsic::aarch64_neon_bfdot)
864               .Default(Intrinsic::not_intrinsic);
865       if (IID == Intrinsic::not_intrinsic)
866         break;
867 
868       size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
869       assert((OperandWidth == 64 || OperandWidth == 128) &&
870              "Unexpected operand width");
871       LLVMContext &Ctx = F->getParent()->getContext();
872       std::array<Type *, 2> Tys {{
873         F->getReturnType(),
874         FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
875       }};
876       NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
877       return true;
878     }
879 
880     // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
881     // and accept v8bf16 instead of v16i8
882     if ((Name.starts_with("arm.neon.bfm") ||
883          Name.starts_with("aarch64.neon.bfm")) &&
884         Name.ends_with(".v4f32.v16i8")) {
885       Intrinsic::ID IID =
886           StringSwitch<Intrinsic::ID>(Name)
887               .Case("arm.neon.bfmmla.v4f32.v16i8",
888                     Intrinsic::arm_neon_bfmmla)
889               .Case("arm.neon.bfmlalb.v4f32.v16i8",
890                     Intrinsic::arm_neon_bfmlalb)
891               .Case("arm.neon.bfmlalt.v4f32.v16i8",
892                     Intrinsic::arm_neon_bfmlalt)
893               .Case("aarch64.neon.bfmmla.v4f32.v16i8",
894                     Intrinsic::aarch64_neon_bfmmla)
895               .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
896                     Intrinsic::aarch64_neon_bfmlalb)
897               .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
898                     Intrinsic::aarch64_neon_bfmlalt)
899               .Default(Intrinsic::not_intrinsic);
900       if (IID == Intrinsic::not_intrinsic)
901         break;
902 
903       std::array<Type *, 0> Tys;
904       NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
905       return true;
906     }
907 
908     if (Name == "arm.mve.vctp64" &&
909         cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
910       // A vctp64 returning a v4i1 is converted to return a v2i1. Rename the
911       // function and deal with it below in UpgradeIntrinsicCall.
912       rename(F);
913       return true;
914     }
915     // These too are changed to accept a v2i1 insteead of the old v4i1.
916     if (Name == "arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
917         Name == "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
918         Name == "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
919         Name == "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
920         Name ==
921             "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
922         Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
923         Name == "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
924         Name == "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
925         Name ==
926             "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
927         Name == "arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
928         Name == "arm.cde.vcx1q.predicated.v2i64.v4i1" ||
929         Name == "arm.cde.vcx1qa.predicated.v2i64.v4i1" ||
930         Name == "arm.cde.vcx2q.predicated.v2i64.v4i1" ||
931         Name == "arm.cde.vcx2qa.predicated.v2i64.v4i1" ||
932         Name == "arm.cde.vcx3q.predicated.v2i64.v4i1" ||
933         Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
934       return true;
935 
936     if (Name.consume_front("amdgcn.")) {
937       if (Name == "alignbit") {
938         // Target specific intrinsic became redundant
939         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
940                                           {F->getReturnType()});
941         return true;
942       }
943 
944       if (Name.consume_front("atomic.")) {
945         if (Name.starts_with("inc") || Name.starts_with("dec")) {
946           // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
947           // there's no new declaration.
948           NewFn = nullptr;
949           return true;
950         }
951         break; // No other 'amdgcn.atomic.*'
952       }
953 
954       if (Name.starts_with("ldexp.")) {
955         // Target specific intrinsic became redundant
956         NewFn = Intrinsic::getDeclaration(
957           F->getParent(), Intrinsic::ldexp,
958           {F->getReturnType(), F->getArg(1)->getType()});
959         return true;
960       }
961       break; // No other 'amdgcn.*'
962     }
963 
964     break;
965   }
966   case 'c': {
967     if (F->arg_size() == 1) {
968       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
969                              .StartsWith("ctlz.", Intrinsic::ctlz)
970                              .StartsWith("cttz.", Intrinsic::cttz)
971                              .Default(Intrinsic::not_intrinsic);
972       if (ID != Intrinsic::not_intrinsic) {
973         rename(F);
974         NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
975                                           F->arg_begin()->getType());
976         return true;
977       }
978     }
979 
980     if (F->arg_size() == 2 && Name.equals("coro.end")) {
981       rename(F);
982       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::coro_end);
983       return true;
984     }
985 
986     break;
987   }
988   case 'd':
989     if (Name.consume_front("dbg.")) {
990       if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
991         rename(F);
992         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
993         return true;
994       }
995       break; // No other 'dbg.*'.
996     }
997     break;
998   case 'e':
999     if (Name.consume_front("experimental.vector.")) {
1000       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1001                              .StartsWith("extract.", Intrinsic::vector_extract)
1002                              .StartsWith("insert.", Intrinsic::vector_insert)
1003                              .Default(Intrinsic::not_intrinsic);
1004       if (ID != Intrinsic::not_intrinsic) {
1005         const auto *FT = F->getFunctionType();
1006         SmallVector<Type *, 2> Tys;
1007         if (ID == Intrinsic::vector_extract)
1008           // Extracting overloads the return type.
1009           Tys.push_back(FT->getReturnType());
1010         Tys.push_back(FT->getParamType(0));
1011         if (ID == Intrinsic::vector_insert)
1012           // Inserting overloads the inserted type.
1013           Tys.push_back(FT->getParamType(1));
1014         rename(F);
1015         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1016         return true;
1017       }
1018 
1019       if (Name.consume_front("reduce.")) {
1020         SmallVector<StringRef, 2> Groups;
1021         static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1022         if (R.match(Name, &Groups))
1023           ID = StringSwitch<Intrinsic::ID>(Groups[1])
1024                    .Case("add", Intrinsic::vector_reduce_add)
1025                    .Case("mul", Intrinsic::vector_reduce_mul)
1026                    .Case("and", Intrinsic::vector_reduce_and)
1027                    .Case("or", Intrinsic::vector_reduce_or)
1028                    .Case("xor", Intrinsic::vector_reduce_xor)
1029                    .Case("smax", Intrinsic::vector_reduce_smax)
1030                    .Case("smin", Intrinsic::vector_reduce_smin)
1031                    .Case("umax", Intrinsic::vector_reduce_umax)
1032                    .Case("umin", Intrinsic::vector_reduce_umin)
1033                    .Case("fmax", Intrinsic::vector_reduce_fmax)
1034                    .Case("fmin", Intrinsic::vector_reduce_fmin)
1035                    .Default(Intrinsic::not_intrinsic);
1036 
1037         bool V2 = false;
1038         if (ID == Intrinsic::not_intrinsic) {
1039           static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1040           Groups.clear();
1041           V2 = true;
1042           if (R2.match(Name, &Groups))
1043             ID = StringSwitch<Intrinsic::ID>(Groups[1])
1044                      .Case("fadd", Intrinsic::vector_reduce_fadd)
1045                      .Case("fmul", Intrinsic::vector_reduce_fmul)
1046                      .Default(Intrinsic::not_intrinsic);
1047         }
1048         if (ID != Intrinsic::not_intrinsic) {
1049           rename(F);
1050           auto Args = F->getFunctionType()->params();
1051           NewFn =
1052               Intrinsic::getDeclaration(F->getParent(), ID, {Args[V2 ? 1 : 0]});
1053           return true;
1054         }
1055         break; // No other 'expermental.vector.reduce.*'.
1056       }
1057       break; // No other 'experimental.vector.*'.
1058     }
1059     break; // No other 'e*'.
1060   case 'f':
1061     if (Name.starts_with("flt.rounds")) {
1062       rename(F);
1063       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding);
1064       return true;
1065     }
1066     break;
1067   case 'i':
1068     if (Name.starts_with("invariant.group.barrier")) {
1069       // Rename invariant.group.barrier to launder.invariant.group
1070       auto Args = F->getFunctionType()->params();
1071       Type* ObjectPtr[1] = {Args[0]};
1072       rename(F);
1073       NewFn = Intrinsic::getDeclaration(F->getParent(),
1074           Intrinsic::launder_invariant_group, ObjectPtr);
1075       return true;
1076     }
1077     break;
1078   case 'm': {
1079     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1080     // alignment parameter to embedding the alignment as an attribute of
1081     // the pointer args.
1082     if (unsigned ID = StringSwitch<unsigned>(Name)
1083                           .StartsWith("memcpy.", Intrinsic::memcpy)
1084                           .StartsWith("memmove.", Intrinsic::memmove)
1085                           .Default(0)) {
1086       if (F->arg_size() == 5) {
1087         rename(F);
1088         // Get the types of dest, src, and len
1089         ArrayRef<Type *> ParamTypes =
1090             F->getFunctionType()->params().slice(0, 3);
1091         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ParamTypes);
1092         return true;
1093       }
1094     }
1095     if (Name.starts_with("memset.") && F->arg_size() == 5) {
1096       rename(F);
1097       // Get the types of dest, and len
1098       const auto *FT = F->getFunctionType();
1099       Type *ParamTypes[2] = {
1100           FT->getParamType(0), // Dest
1101           FT->getParamType(2)  // len
1102       };
1103       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
1104                                         ParamTypes);
1105       return true;
1106     }
1107     break;
1108   }
1109   case 'n': {
1110     if (Name.consume_front("nvvm.")) {
1111       // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1112       if (F->arg_size() == 1) {
1113         Intrinsic::ID IID =
1114             StringSwitch<Intrinsic::ID>(Name)
1115                 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1116                 .Case("clz.i", Intrinsic::ctlz)
1117                 .Case("popc.i", Intrinsic::ctpop)
1118                 .Default(Intrinsic::not_intrinsic);
1119         if (IID != Intrinsic::not_intrinsic) {
1120           NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
1121                                             {F->getReturnType()});
1122           return true;
1123         }
1124       }
1125 
1126       // Check for nvvm intrinsics that need a return type adjustment.
1127       if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1128         Intrinsic::ID IID = ShouldUpgradeNVPTXBF16Intrinsic(Name);
1129         if (IID != Intrinsic::not_intrinsic) {
1130           NewFn = nullptr;
1131           return true;
1132         }
1133       }
1134 
1135       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1136       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
1137       //
1138       // TODO: We could add lohi.i2d.
1139       bool Expand = false;
1140       if (Name.consume_front("abs."))
1141         // nvvm.abs.{i,ii}
1142         Expand = Name == "i" || Name == "ll";
1143       else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1144         Expand = true;
1145       else if (Name.consume_front("max.") || Name.consume_front("min."))
1146         // nvvm.{min,max}.{i,ii,ui,ull}
1147         Expand = Name == "i" || Name == "ll" || Name == "ui" || Name == "ull";
1148       else if (Name.consume_front("atomic.load.add."))
1149         // nvvm.atomic.load.add.{f32.p,f64.p}
1150         Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1151       else
1152         Expand = false;
1153 
1154       if (Expand) {
1155         NewFn = nullptr;
1156         return true;
1157       }
1158       break; // No other 'nvvm.*'.
1159     }
1160     break;
1161   }
1162   case 'o':
1163     // We only need to change the name to match the mangling including the
1164     // address space.
1165     if (Name.starts_with("objectsize.")) {
1166       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1167       if (F->arg_size() == 2 || F->arg_size() == 3 ||
1168           F->getName() !=
1169               Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1170         rename(F);
1171         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
1172                                           Tys);
1173         return true;
1174       }
1175     }
1176     break;
1177 
1178   case 'p':
1179     if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1180       rename(F);
1181       NewFn = Intrinsic::getDeclaration(
1182           F->getParent(), Intrinsic::ptr_annotation,
1183           {F->arg_begin()->getType(), F->getArg(1)->getType()});
1184       return true;
1185     }
1186     break;
1187 
1188   case 'r': {
1189     if (Name.consume_front("riscv.")) {
1190       Intrinsic::ID ID;
1191       ID = StringSwitch<Intrinsic::ID>(Name)
1192                .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1193                .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1194                .Case("aes32esi", Intrinsic::riscv_aes32esi)
1195                .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1196                .Default(Intrinsic::not_intrinsic);
1197       if (ID != Intrinsic::not_intrinsic) {
1198         if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1199           rename(F);
1200           NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1201           return true;
1202         }
1203         break; // No other applicable upgrades.
1204       }
1205 
1206       ID = StringSwitch<Intrinsic::ID>(Name)
1207                .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1208                .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1209                .Default(Intrinsic::not_intrinsic);
1210       if (ID != Intrinsic::not_intrinsic) {
1211         if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1212             F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1213           rename(F);
1214           NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1215           return true;
1216         }
1217         break; // No other applicable upgrades.
1218       }
1219 
1220       ID = StringSwitch<Intrinsic::ID>(Name)
1221                .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1222                .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1223                .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1224                .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1225                .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1226                .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1227                .Default(Intrinsic::not_intrinsic);
1228       if (ID != Intrinsic::not_intrinsic) {
1229         if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1230           rename(F);
1231           NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1232           return true;
1233         }
1234         break; // No other applicable upgrades.
1235       }
1236       break; // No other 'riscv.*' intrinsics
1237     }
1238   } break;
1239 
1240   case 's':
1241     if (Name == "stackprotectorcheck") {
1242       NewFn = nullptr;
1243       return true;
1244     }
1245     break;
1246 
1247   case 'v': {
1248     if (Name == "var.annotation" && F->arg_size() == 4) {
1249       rename(F);
1250       NewFn = Intrinsic::getDeclaration(
1251           F->getParent(), Intrinsic::var_annotation,
1252           {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1253       return true;
1254     }
1255     break;
1256   }
1257 
1258   case 'w':
1259     if (Name.consume_front("wasm.")) {
1260       Intrinsic::ID ID =
1261           StringSwitch<Intrinsic::ID>(Name)
1262               .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1263               .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1264               .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1265               .Default(Intrinsic::not_intrinsic);
1266       if (ID != Intrinsic::not_intrinsic) {
1267         rename(F);
1268         NewFn =
1269             Intrinsic::getDeclaration(F->getParent(), ID, F->getReturnType());
1270         return true;
1271       }
1272 
1273       if (Name.consume_front("dot.i8x16.i7x16.")) {
1274         ID = StringSwitch<Intrinsic::ID>(Name)
1275                  .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1276                  .Case("add.signed",
1277                        Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1278                  .Default(Intrinsic::not_intrinsic);
1279         if (ID != Intrinsic::not_intrinsic) {
1280           rename(F);
1281           NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1282           return true;
1283         }
1284         break; // No other 'wasm.dot.i8x16.i7x16.*'.
1285       }
1286       break; // No other 'wasm.*'.
1287     }
1288     break;
1289 
1290   case 'x':
1291     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
1292       return true;
1293   }
1294 
1295   auto *ST = dyn_cast<StructType>(F->getReturnType());
1296   if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1297       F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1298     // Replace return type with literal non-packed struct. Only do this for
1299     // intrinsics declared to return a struct, not for intrinsics with
1300     // overloaded return type, in which case the exact struct type will be
1301     // mangled into the name.
1302     SmallVector<Intrinsic::IITDescriptor> Desc;
1303     Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
1304     if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1305       auto *FT = F->getFunctionType();
1306       auto *NewST = StructType::get(ST->getContext(), ST->elements());
1307       auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1308       std::string Name = F->getName().str();
1309       rename(F);
1310       NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1311                                Name, F->getParent());
1312 
1313       // The new function may also need remangling.
1314       if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1315         NewFn = *Result;
1316       return true;
1317     }
1318   }
1319 
1320   // Remangle our intrinsic since we upgrade the mangling
1321   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1322   if (Result != std::nullopt) {
1323     NewFn = *Result;
1324     return true;
1325   }
1326 
1327   //  This may not belong here. This function is effectively being overloaded
1328   //  to both detect an intrinsic which needs upgrading, and to provide the
1329   //  upgraded form of the intrinsic. We should perhaps have two separate
1330   //  functions for this.
1331   return false;
1332 }
1333 
1334 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
1335   NewFn = nullptr;
1336   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
1337   assert(F != NewFn && "Intrinsic function upgraded to the same function");
1338 
1339   // Upgrade intrinsic attributes.  This does not change the function.
1340   if (NewFn)
1341     F = NewFn;
1342   if (Intrinsic::ID id = F->getIntrinsicID())
1343     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1344   return Upgraded;
1345 }
1346 
1347 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1348   if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1349                           GV->getName() == "llvm.global_dtors")) ||
1350       !GV->hasInitializer())
1351     return nullptr;
1352   ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1353   if (!ATy)
1354     return nullptr;
1355   StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1356   if (!STy || STy->getNumElements() != 2)
1357     return nullptr;
1358 
1359   LLVMContext &C = GV->getContext();
1360   IRBuilder<> IRB(C);
1361   auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1362                                IRB.getPtrTy());
1363   Constant *Init = GV->getInitializer();
1364   unsigned N = Init->getNumOperands();
1365   std::vector<Constant *> NewCtors(N);
1366   for (unsigned i = 0; i != N; ++i) {
1367     auto Ctor = cast<Constant>(Init->getOperand(i));
1368     NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1369                                       Ctor->getAggregateElement(1),
1370                                       Constant::getNullValue(IRB.getPtrTy()));
1371   }
1372   Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1373 
1374   return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1375                             NewInit, GV->getName());
1376 }
1377 
1378 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1379 // to byte shuffles.
1380 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
1381                                          Value *Op, unsigned Shift) {
1382   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1383   unsigned NumElts = ResultTy->getNumElements() * 8;
1384 
1385   // Bitcast from a 64-bit element type to a byte element type.
1386   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1387   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1388 
1389   // We'll be shuffling in zeroes.
1390   Value *Res = Constant::getNullValue(VecTy);
1391 
1392   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1393   // we'll just return the zero vector.
1394   if (Shift < 16) {
1395     int Idxs[64];
1396     // 256/512-bit version is split into 2/4 16-byte lanes.
1397     for (unsigned l = 0; l != NumElts; l += 16)
1398       for (unsigned i = 0; i != 16; ++i) {
1399         unsigned Idx = NumElts + i - Shift;
1400         if (Idx < NumElts)
1401           Idx -= NumElts - 16; // end of lane, switch operand.
1402         Idxs[l + i] = Idx + l;
1403       }
1404 
1405     Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1406   }
1407 
1408   // Bitcast back to a 64-bit element type.
1409   return Builder.CreateBitCast(Res, ResultTy, "cast");
1410 }
1411 
1412 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1413 // to byte shuffles.
1414 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1415                                          unsigned Shift) {
1416   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1417   unsigned NumElts = ResultTy->getNumElements() * 8;
1418 
1419   // Bitcast from a 64-bit element type to a byte element type.
1420   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1421   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1422 
1423   // We'll be shuffling in zeroes.
1424   Value *Res = Constant::getNullValue(VecTy);
1425 
1426   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1427   // we'll just return the zero vector.
1428   if (Shift < 16) {
1429     int Idxs[64];
1430     // 256/512-bit version is split into 2/4 16-byte lanes.
1431     for (unsigned l = 0; l != NumElts; l += 16)
1432       for (unsigned i = 0; i != 16; ++i) {
1433         unsigned Idx = i + Shift;
1434         if (Idx >= 16)
1435           Idx += NumElts - 16; // end of lane, switch operand.
1436         Idxs[l + i] = Idx + l;
1437       }
1438 
1439     Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1440   }
1441 
1442   // Bitcast back to a 64-bit element type.
1443   return Builder.CreateBitCast(Res, ResultTy, "cast");
1444 }
1445 
1446 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1447                             unsigned NumElts) {
1448   assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1449   llvm::VectorType *MaskTy = FixedVectorType::get(
1450       Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1451   Mask = Builder.CreateBitCast(Mask, MaskTy);
1452 
1453   // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1454   // i8 and we need to extract down to the right number of elements.
1455   if (NumElts <= 4) {
1456     int Indices[4];
1457     for (unsigned i = 0; i != NumElts; ++i)
1458       Indices[i] = i;
1459     Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1460                                        "extract");
1461   }
1462 
1463   return Mask;
1464 }
1465 
1466 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
1467                             Value *Op0, Value *Op1) {
1468   // If the mask is all ones just emit the first operation.
1469   if (const auto *C = dyn_cast<Constant>(Mask))
1470     if (C->isAllOnesValue())
1471       return Op0;
1472 
1473   Mask = getX86MaskVec(Builder, Mask,
1474                        cast<FixedVectorType>(Op0->getType())->getNumElements());
1475   return Builder.CreateSelect(Mask, Op0, Op1);
1476 }
1477 
1478 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
1479                                   Value *Op0, Value *Op1) {
1480   // If the mask is all ones just emit the first operation.
1481   if (const auto *C = dyn_cast<Constant>(Mask))
1482     if (C->isAllOnesValue())
1483       return Op0;
1484 
1485   auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1486                                       Mask->getType()->getIntegerBitWidth());
1487   Mask = Builder.CreateBitCast(Mask, MaskTy);
1488   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1489   return Builder.CreateSelect(Mask, Op0, Op1);
1490 }
1491 
1492 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1493 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1494 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1495 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1496                                         Value *Op1, Value *Shift,
1497                                         Value *Passthru, Value *Mask,
1498                                         bool IsVALIGN) {
1499   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1500 
1501   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1502   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1503   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1504   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1505 
1506   // Mask the immediate for VALIGN.
1507   if (IsVALIGN)
1508     ShiftVal &= (NumElts - 1);
1509 
1510   // If palignr is shifting the pair of vectors more than the size of two
1511   // lanes, emit zero.
1512   if (ShiftVal >= 32)
1513     return llvm::Constant::getNullValue(Op0->getType());
1514 
1515   // If palignr is shifting the pair of input vectors more than one lane,
1516   // but less than two lanes, convert to shifting in zeroes.
1517   if (ShiftVal > 16) {
1518     ShiftVal -= 16;
1519     Op1 = Op0;
1520     Op0 = llvm::Constant::getNullValue(Op0->getType());
1521   }
1522 
1523   int Indices[64];
1524   // 256-bit palignr operates on 128-bit lanes so we need to handle that
1525   for (unsigned l = 0; l < NumElts; l += 16) {
1526     for (unsigned i = 0; i != 16; ++i) {
1527       unsigned Idx = ShiftVal + i;
1528       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1529         Idx += NumElts - 16; // End of lane, switch operand.
1530       Indices[l + i] = Idx + l;
1531     }
1532   }
1533 
1534   Value *Align = Builder.CreateShuffleVector(
1535       Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1536 
1537   return EmitX86Select(Builder, Mask, Align, Passthru);
1538 }
1539 
1540 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
1541                                           bool ZeroMask, bool IndexForm) {
1542   Type *Ty = CI.getType();
1543   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1544   unsigned EltWidth = Ty->getScalarSizeInBits();
1545   bool IsFloat = Ty->isFPOrFPVectorTy();
1546   Intrinsic::ID IID;
1547   if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1548     IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1549   else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1550     IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1551   else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1552     IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1553   else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1554     IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1555   else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1556     IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1557   else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1558     IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1559   else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1560     IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1561   else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1562     IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1563   else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1564     IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1565   else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1566     IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1567   else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1568     IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1569   else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1570     IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1571   else if (VecWidth == 128 && EltWidth == 16)
1572     IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1573   else if (VecWidth == 256 && EltWidth == 16)
1574     IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1575   else if (VecWidth == 512 && EltWidth == 16)
1576     IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1577   else if (VecWidth == 128 && EltWidth == 8)
1578     IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1579   else if (VecWidth == 256 && EltWidth == 8)
1580     IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1581   else if (VecWidth == 512 && EltWidth == 8)
1582     IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1583   else
1584     llvm_unreachable("Unexpected intrinsic");
1585 
1586   Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1587                     CI.getArgOperand(2) };
1588 
1589   // If this isn't index form we need to swap operand 0 and 1.
1590   if (!IndexForm)
1591     std::swap(Args[0], Args[1]);
1592 
1593   Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1594                                 Args);
1595   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1596                              : Builder.CreateBitCast(CI.getArgOperand(1),
1597                                                      Ty);
1598   return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1599 }
1600 
1601 static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
1602                                          Intrinsic::ID IID) {
1603   Type *Ty = CI.getType();
1604   Value *Op0 = CI.getOperand(0);
1605   Value *Op1 = CI.getOperand(1);
1606   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1607   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1608 
1609   if (CI.arg_size() == 4) { // For masked intrinsics.
1610     Value *VecSrc = CI.getOperand(2);
1611     Value *Mask = CI.getOperand(3);
1612     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1613   }
1614   return Res;
1615 }
1616 
1617 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
1618                                bool IsRotateRight) {
1619   Type *Ty = CI.getType();
1620   Value *Src = CI.getArgOperand(0);
1621   Value *Amt = CI.getArgOperand(1);
1622 
1623   // Amount may be scalar immediate, in which case create a splat vector.
1624   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1625   // we only care about the lowest log2 bits anyway.
1626   if (Amt->getType() != Ty) {
1627     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1628     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1629     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1630   }
1631 
1632   Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1633   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1634   Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1635 
1636   if (CI.arg_size() == 4) { // For masked intrinsics.
1637     Value *VecSrc = CI.getOperand(2);
1638     Value *Mask = CI.getOperand(3);
1639     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1640   }
1641   return Res;
1642 }
1643 
1644 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1645                               bool IsSigned) {
1646   Type *Ty = CI.getType();
1647   Value *LHS = CI.getArgOperand(0);
1648   Value *RHS = CI.getArgOperand(1);
1649 
1650   CmpInst::Predicate Pred;
1651   switch (Imm) {
1652   case 0x0:
1653     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1654     break;
1655   case 0x1:
1656     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1657     break;
1658   case 0x2:
1659     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1660     break;
1661   case 0x3:
1662     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1663     break;
1664   case 0x4:
1665     Pred = ICmpInst::ICMP_EQ;
1666     break;
1667   case 0x5:
1668     Pred = ICmpInst::ICMP_NE;
1669     break;
1670   case 0x6:
1671     return Constant::getNullValue(Ty); // FALSE
1672   case 0x7:
1673     return Constant::getAllOnesValue(Ty); // TRUE
1674   default:
1675     llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1676   }
1677 
1678   Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1679   Value *Ext = Builder.CreateSExt(Cmp, Ty);
1680   return Ext;
1681 }
1682 
1683 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
1684                                     bool IsShiftRight, bool ZeroMask) {
1685   Type *Ty = CI.getType();
1686   Value *Op0 = CI.getArgOperand(0);
1687   Value *Op1 = CI.getArgOperand(1);
1688   Value *Amt = CI.getArgOperand(2);
1689 
1690   if (IsShiftRight)
1691     std::swap(Op0, Op1);
1692 
1693   // Amount may be scalar immediate, in which case create a splat vector.
1694   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1695   // we only care about the lowest log2 bits anyway.
1696   if (Amt->getType() != Ty) {
1697     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1698     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1699     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1700   }
1701 
1702   Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1703   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1704   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1705 
1706   unsigned NumArgs = CI.arg_size();
1707   if (NumArgs >= 4) { // For masked intrinsics.
1708     Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1709                     ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1710                                    CI.getArgOperand(0);
1711     Value *Mask = CI.getOperand(NumArgs - 1);
1712     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1713   }
1714   return Res;
1715 }
1716 
1717 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1718                                  Value *Ptr, Value *Data, Value *Mask,
1719                                  bool Aligned) {
1720   // Cast the pointer to the right type.
1721   Ptr = Builder.CreateBitCast(Ptr,
1722                               llvm::PointerType::getUnqual(Data->getType()));
1723   const Align Alignment =
1724       Aligned
1725           ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1726           : Align(1);
1727 
1728   // If the mask is all ones just emit a regular store.
1729   if (const auto *C = dyn_cast<Constant>(Mask))
1730     if (C->isAllOnesValue())
1731       return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1732 
1733   // Convert the mask from an integer type to a vector of i1.
1734   unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1735   Mask = getX86MaskVec(Builder, Mask, NumElts);
1736   return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1737 }
1738 
1739 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1740                                 Value *Ptr, Value *Passthru, Value *Mask,
1741                                 bool Aligned) {
1742   Type *ValTy = Passthru->getType();
1743   // Cast the pointer to the right type.
1744   Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1745   const Align Alignment =
1746       Aligned
1747           ? Align(
1748                 Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
1749                 8)
1750           : Align(1);
1751 
1752   // If the mask is all ones just emit a regular store.
1753   if (const auto *C = dyn_cast<Constant>(Mask))
1754     if (C->isAllOnesValue())
1755       return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1756 
1757   // Convert the mask from an integer type to a vector of i1.
1758   unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1759   Mask = getX86MaskVec(Builder, Mask, NumElts);
1760   return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1761 }
1762 
1763 static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1764   Type *Ty = CI.getType();
1765   Value *Op0 = CI.getArgOperand(0);
1766   Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1767   Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1768   if (CI.arg_size() == 3)
1769     Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1770   return Res;
1771 }
1772 
1773 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1774   Type *Ty = CI.getType();
1775 
1776   // Arguments have a vXi32 type so cast to vXi64.
1777   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1778   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1779 
1780   if (IsSigned) {
1781     // Shift left then arithmetic shift right.
1782     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1783     LHS = Builder.CreateShl(LHS, ShiftAmt);
1784     LHS = Builder.CreateAShr(LHS, ShiftAmt);
1785     RHS = Builder.CreateShl(RHS, ShiftAmt);
1786     RHS = Builder.CreateAShr(RHS, ShiftAmt);
1787   } else {
1788     // Clear the upper bits.
1789     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1790     LHS = Builder.CreateAnd(LHS, Mask);
1791     RHS = Builder.CreateAnd(RHS, Mask);
1792   }
1793 
1794   Value *Res = Builder.CreateMul(LHS, RHS);
1795 
1796   if (CI.arg_size() == 4)
1797     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1798 
1799   return Res;
1800 }
1801 
1802 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1803 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1804                                      Value *Mask) {
1805   unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1806   if (Mask) {
1807     const auto *C = dyn_cast<Constant>(Mask);
1808     if (!C || !C->isAllOnesValue())
1809       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1810   }
1811 
1812   if (NumElts < 8) {
1813     int Indices[8];
1814     for (unsigned i = 0; i != NumElts; ++i)
1815       Indices[i] = i;
1816     for (unsigned i = NumElts; i != 8; ++i)
1817       Indices[i] = NumElts + i % NumElts;
1818     Vec = Builder.CreateShuffleVector(Vec,
1819                                       Constant::getNullValue(Vec->getType()),
1820                                       Indices);
1821   }
1822   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1823 }
1824 
1825 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
1826                                    unsigned CC, bool Signed) {
1827   Value *Op0 = CI.getArgOperand(0);
1828   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1829 
1830   Value *Cmp;
1831   if (CC == 3) {
1832     Cmp = Constant::getNullValue(
1833         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1834   } else if (CC == 7) {
1835     Cmp = Constant::getAllOnesValue(
1836         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1837   } else {
1838     ICmpInst::Predicate Pred;
1839     switch (CC) {
1840     default: llvm_unreachable("Unknown condition code");
1841     case 0: Pred = ICmpInst::ICMP_EQ;  break;
1842     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1843     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1844     case 4: Pred = ICmpInst::ICMP_NE;  break;
1845     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1846     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1847     }
1848     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1849   }
1850 
1851   Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1852 
1853   return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1854 }
1855 
1856 // Replace a masked intrinsic with an older unmasked intrinsic.
1857 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
1858                                     Intrinsic::ID IID) {
1859   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1860   Value *Rep = Builder.CreateCall(Intrin,
1861                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
1862   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1863 }
1864 
1865 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
1866   Value* A = CI.getArgOperand(0);
1867   Value* B = CI.getArgOperand(1);
1868   Value* Src = CI.getArgOperand(2);
1869   Value* Mask = CI.getArgOperand(3);
1870 
1871   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1872   Value* Cmp = Builder.CreateIsNotNull(AndNode);
1873   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1874   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1875   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1876   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1877 }
1878 
1879 
1880 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
1881   Value* Op = CI.getArgOperand(0);
1882   Type* ReturnOp = CI.getType();
1883   unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1884   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1885   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1886 }
1887 
1888 // Replace intrinsic with unmasked version and a select.
1889 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1890                                       CallBase &CI, Value *&Rep) {
1891   Name = Name.substr(12); // Remove avx512.mask.
1892 
1893   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1894   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1895   Intrinsic::ID IID;
1896   if (Name.starts_with("max.p")) {
1897     if (VecWidth == 128 && EltWidth == 32)
1898       IID = Intrinsic::x86_sse_max_ps;
1899     else if (VecWidth == 128 && EltWidth == 64)
1900       IID = Intrinsic::x86_sse2_max_pd;
1901     else if (VecWidth == 256 && EltWidth == 32)
1902       IID = Intrinsic::x86_avx_max_ps_256;
1903     else if (VecWidth == 256 && EltWidth == 64)
1904       IID = Intrinsic::x86_avx_max_pd_256;
1905     else
1906       llvm_unreachable("Unexpected intrinsic");
1907   } else if (Name.starts_with("min.p")) {
1908     if (VecWidth == 128 && EltWidth == 32)
1909       IID = Intrinsic::x86_sse_min_ps;
1910     else if (VecWidth == 128 && EltWidth == 64)
1911       IID = Intrinsic::x86_sse2_min_pd;
1912     else if (VecWidth == 256 && EltWidth == 32)
1913       IID = Intrinsic::x86_avx_min_ps_256;
1914     else if (VecWidth == 256 && EltWidth == 64)
1915       IID = Intrinsic::x86_avx_min_pd_256;
1916     else
1917       llvm_unreachable("Unexpected intrinsic");
1918   } else if (Name.starts_with("pshuf.b.")) {
1919     if (VecWidth == 128)
1920       IID = Intrinsic::x86_ssse3_pshuf_b_128;
1921     else if (VecWidth == 256)
1922       IID = Intrinsic::x86_avx2_pshuf_b;
1923     else if (VecWidth == 512)
1924       IID = Intrinsic::x86_avx512_pshuf_b_512;
1925     else
1926       llvm_unreachable("Unexpected intrinsic");
1927   } else if (Name.starts_with("pmul.hr.sw.")) {
1928     if (VecWidth == 128)
1929       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1930     else if (VecWidth == 256)
1931       IID = Intrinsic::x86_avx2_pmul_hr_sw;
1932     else if (VecWidth == 512)
1933       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1934     else
1935       llvm_unreachable("Unexpected intrinsic");
1936   } else if (Name.starts_with("pmulh.w.")) {
1937     if (VecWidth == 128)
1938       IID = Intrinsic::x86_sse2_pmulh_w;
1939     else if (VecWidth == 256)
1940       IID = Intrinsic::x86_avx2_pmulh_w;
1941     else if (VecWidth == 512)
1942       IID = Intrinsic::x86_avx512_pmulh_w_512;
1943     else
1944       llvm_unreachable("Unexpected intrinsic");
1945   } else if (Name.starts_with("pmulhu.w.")) {
1946     if (VecWidth == 128)
1947       IID = Intrinsic::x86_sse2_pmulhu_w;
1948     else if (VecWidth == 256)
1949       IID = Intrinsic::x86_avx2_pmulhu_w;
1950     else if (VecWidth == 512)
1951       IID = Intrinsic::x86_avx512_pmulhu_w_512;
1952     else
1953       llvm_unreachable("Unexpected intrinsic");
1954   } else if (Name.starts_with("pmaddw.d.")) {
1955     if (VecWidth == 128)
1956       IID = Intrinsic::x86_sse2_pmadd_wd;
1957     else if (VecWidth == 256)
1958       IID = Intrinsic::x86_avx2_pmadd_wd;
1959     else if (VecWidth == 512)
1960       IID = Intrinsic::x86_avx512_pmaddw_d_512;
1961     else
1962       llvm_unreachable("Unexpected intrinsic");
1963   } else if (Name.starts_with("pmaddubs.w.")) {
1964     if (VecWidth == 128)
1965       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1966     else if (VecWidth == 256)
1967       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1968     else if (VecWidth == 512)
1969       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1970     else
1971       llvm_unreachable("Unexpected intrinsic");
1972   } else if (Name.starts_with("packsswb.")) {
1973     if (VecWidth == 128)
1974       IID = Intrinsic::x86_sse2_packsswb_128;
1975     else if (VecWidth == 256)
1976       IID = Intrinsic::x86_avx2_packsswb;
1977     else if (VecWidth == 512)
1978       IID = Intrinsic::x86_avx512_packsswb_512;
1979     else
1980       llvm_unreachable("Unexpected intrinsic");
1981   } else if (Name.starts_with("packssdw.")) {
1982     if (VecWidth == 128)
1983       IID = Intrinsic::x86_sse2_packssdw_128;
1984     else if (VecWidth == 256)
1985       IID = Intrinsic::x86_avx2_packssdw;
1986     else if (VecWidth == 512)
1987       IID = Intrinsic::x86_avx512_packssdw_512;
1988     else
1989       llvm_unreachable("Unexpected intrinsic");
1990   } else if (Name.starts_with("packuswb.")) {
1991     if (VecWidth == 128)
1992       IID = Intrinsic::x86_sse2_packuswb_128;
1993     else if (VecWidth == 256)
1994       IID = Intrinsic::x86_avx2_packuswb;
1995     else if (VecWidth == 512)
1996       IID = Intrinsic::x86_avx512_packuswb_512;
1997     else
1998       llvm_unreachable("Unexpected intrinsic");
1999   } else if (Name.starts_with("packusdw.")) {
2000     if (VecWidth == 128)
2001       IID = Intrinsic::x86_sse41_packusdw;
2002     else if (VecWidth == 256)
2003       IID = Intrinsic::x86_avx2_packusdw;
2004     else if (VecWidth == 512)
2005       IID = Intrinsic::x86_avx512_packusdw_512;
2006     else
2007       llvm_unreachable("Unexpected intrinsic");
2008   } else if (Name.starts_with("vpermilvar.")) {
2009     if (VecWidth == 128 && EltWidth == 32)
2010       IID = Intrinsic::x86_avx_vpermilvar_ps;
2011     else if (VecWidth == 128 && EltWidth == 64)
2012       IID = Intrinsic::x86_avx_vpermilvar_pd;
2013     else if (VecWidth == 256 && EltWidth == 32)
2014       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2015     else if (VecWidth == 256 && EltWidth == 64)
2016       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2017     else if (VecWidth == 512 && EltWidth == 32)
2018       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2019     else if (VecWidth == 512 && EltWidth == 64)
2020       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2021     else
2022       llvm_unreachable("Unexpected intrinsic");
2023   } else if (Name == "cvtpd2dq.256") {
2024     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2025   } else if (Name == "cvtpd2ps.256") {
2026     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2027   } else if (Name == "cvttpd2dq.256") {
2028     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2029   } else if (Name == "cvttps2dq.128") {
2030     IID = Intrinsic::x86_sse2_cvttps2dq;
2031   } else if (Name == "cvttps2dq.256") {
2032     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2033   } else if (Name.starts_with("permvar.")) {
2034     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2035     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2036       IID = Intrinsic::x86_avx2_permps;
2037     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2038       IID = Intrinsic::x86_avx2_permd;
2039     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2040       IID = Intrinsic::x86_avx512_permvar_df_256;
2041     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2042       IID = Intrinsic::x86_avx512_permvar_di_256;
2043     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2044       IID = Intrinsic::x86_avx512_permvar_sf_512;
2045     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2046       IID = Intrinsic::x86_avx512_permvar_si_512;
2047     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2048       IID = Intrinsic::x86_avx512_permvar_df_512;
2049     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2050       IID = Intrinsic::x86_avx512_permvar_di_512;
2051     else if (VecWidth == 128 && EltWidth == 16)
2052       IID = Intrinsic::x86_avx512_permvar_hi_128;
2053     else if (VecWidth == 256 && EltWidth == 16)
2054       IID = Intrinsic::x86_avx512_permvar_hi_256;
2055     else if (VecWidth == 512 && EltWidth == 16)
2056       IID = Intrinsic::x86_avx512_permvar_hi_512;
2057     else if (VecWidth == 128 && EltWidth == 8)
2058       IID = Intrinsic::x86_avx512_permvar_qi_128;
2059     else if (VecWidth == 256 && EltWidth == 8)
2060       IID = Intrinsic::x86_avx512_permvar_qi_256;
2061     else if (VecWidth == 512 && EltWidth == 8)
2062       IID = Intrinsic::x86_avx512_permvar_qi_512;
2063     else
2064       llvm_unreachable("Unexpected intrinsic");
2065   } else if (Name.starts_with("dbpsadbw.")) {
2066     if (VecWidth == 128)
2067       IID = Intrinsic::x86_avx512_dbpsadbw_128;
2068     else if (VecWidth == 256)
2069       IID = Intrinsic::x86_avx512_dbpsadbw_256;
2070     else if (VecWidth == 512)
2071       IID = Intrinsic::x86_avx512_dbpsadbw_512;
2072     else
2073       llvm_unreachable("Unexpected intrinsic");
2074   } else if (Name.starts_with("pmultishift.qb.")) {
2075     if (VecWidth == 128)
2076       IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2077     else if (VecWidth == 256)
2078       IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2079     else if (VecWidth == 512)
2080       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2081     else
2082       llvm_unreachable("Unexpected intrinsic");
2083   } else if (Name.starts_with("conflict.")) {
2084     if (Name[9] == 'd' && VecWidth == 128)
2085       IID = Intrinsic::x86_avx512_conflict_d_128;
2086     else if (Name[9] == 'd' && VecWidth == 256)
2087       IID = Intrinsic::x86_avx512_conflict_d_256;
2088     else if (Name[9] == 'd' && VecWidth == 512)
2089       IID = Intrinsic::x86_avx512_conflict_d_512;
2090     else if (Name[9] == 'q' && VecWidth == 128)
2091       IID = Intrinsic::x86_avx512_conflict_q_128;
2092     else if (Name[9] == 'q' && VecWidth == 256)
2093       IID = Intrinsic::x86_avx512_conflict_q_256;
2094     else if (Name[9] == 'q' && VecWidth == 512)
2095       IID = Intrinsic::x86_avx512_conflict_q_512;
2096     else
2097       llvm_unreachable("Unexpected intrinsic");
2098   } else if (Name.starts_with("pavg.")) {
2099     if (Name[5] == 'b' && VecWidth == 128)
2100       IID = Intrinsic::x86_sse2_pavg_b;
2101     else if (Name[5] == 'b' && VecWidth == 256)
2102       IID = Intrinsic::x86_avx2_pavg_b;
2103     else if (Name[5] == 'b' && VecWidth == 512)
2104       IID = Intrinsic::x86_avx512_pavg_b_512;
2105     else if (Name[5] == 'w' && VecWidth == 128)
2106       IID = Intrinsic::x86_sse2_pavg_w;
2107     else if (Name[5] == 'w' && VecWidth == 256)
2108       IID = Intrinsic::x86_avx2_pavg_w;
2109     else if (Name[5] == 'w' && VecWidth == 512)
2110       IID = Intrinsic::x86_avx512_pavg_w_512;
2111     else
2112       llvm_unreachable("Unexpected intrinsic");
2113   } else
2114     return false;
2115 
2116   SmallVector<Value *, 4> Args(CI.args());
2117   Args.pop_back();
2118   Args.pop_back();
2119   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
2120                            Args);
2121   unsigned NumArgs = CI.arg_size();
2122   Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2123                       CI.getArgOperand(NumArgs - 2));
2124   return true;
2125 }
2126 
2127 /// Upgrade comment in call to inline asm that represents an objc retain release
2128 /// marker.
2129 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2130   size_t Pos;
2131   if (AsmStr->find("mov\tfp") == 0 &&
2132       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2133       (Pos = AsmStr->find("# marker")) != std::string::npos) {
2134     AsmStr->replace(Pos, 1, ";");
2135   }
2136 }
2137 
2138 static Value *UpgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2139                                       IRBuilder<> &Builder) {
2140   if (Name == "mve.vctp64.old") {
2141     // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
2142     // correct type.
2143     Value *VCTP = Builder.CreateCall(
2144         Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
2145         CI->getArgOperand(0), CI->getName());
2146     Value *C1 = Builder.CreateCall(
2147         Intrinsic::getDeclaration(
2148             F->getParent(), Intrinsic::arm_mve_pred_v2i,
2149             {VectorType::get(Builder.getInt1Ty(), 2, false)}),
2150         VCTP);
2151     return Builder.CreateCall(
2152         Intrinsic::getDeclaration(
2153             F->getParent(), Intrinsic::arm_mve_pred_i2v,
2154             {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2155         C1);
2156   } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
2157              Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
2158              Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
2159              Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
2160              Name ==
2161                  "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
2162              Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
2163              Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
2164              Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
2165              Name ==
2166                  "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
2167              Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
2168              Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
2169              Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
2170              Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
2171              Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
2172              Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
2173              Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
2174     std::vector<Type *> Tys;
2175     unsigned ID = CI->getIntrinsicID();
2176     Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
2177     switch (ID) {
2178     case Intrinsic::arm_mve_mull_int_predicated:
2179     case Intrinsic::arm_mve_vqdmull_predicated:
2180     case Intrinsic::arm_mve_vldr_gather_base_predicated:
2181       Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
2182       break;
2183     case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
2184     case Intrinsic::arm_mve_vstr_scatter_base_predicated:
2185     case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
2186       Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
2187              V2I1Ty};
2188       break;
2189     case Intrinsic::arm_mve_vldr_gather_offset_predicated:
2190       Tys = {CI->getType(), CI->getOperand(0)->getType(),
2191              CI->getOperand(1)->getType(), V2I1Ty};
2192       break;
2193     case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
2194       Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
2195              CI->getOperand(2)->getType(), V2I1Ty};
2196       break;
2197     case Intrinsic::arm_cde_vcx1q_predicated:
2198     case Intrinsic::arm_cde_vcx1qa_predicated:
2199     case Intrinsic::arm_cde_vcx2q_predicated:
2200     case Intrinsic::arm_cde_vcx2qa_predicated:
2201     case Intrinsic::arm_cde_vcx3q_predicated:
2202     case Intrinsic::arm_cde_vcx3qa_predicated:
2203       Tys = {CI->getOperand(1)->getType(), V2I1Ty};
2204       break;
2205     default:
2206       llvm_unreachable("Unhandled Intrinsic!");
2207     }
2208 
2209     std::vector<Value *> Ops;
2210     for (Value *Op : CI->args()) {
2211       Type *Ty = Op->getType();
2212       if (Ty->getScalarSizeInBits() == 1) {
2213         Value *C1 = Builder.CreateCall(
2214             Intrinsic::getDeclaration(
2215                 F->getParent(), Intrinsic::arm_mve_pred_v2i,
2216                 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2217             Op);
2218         Op = Builder.CreateCall(
2219             Intrinsic::getDeclaration(F->getParent(),
2220                                       Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
2221             C1);
2222       }
2223       Ops.push_back(Op);
2224     }
2225 
2226     Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
2227     return Builder.CreateCall(Fn, Ops, CI->getName());
2228   }
2229   llvm_unreachable("Unknown function for ARM CallBase upgrade.");
2230 }
2231 
2232 static Value *UpgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
2233                                          Function *F, IRBuilder<> &Builder) {
2234   const bool IsInc = Name.starts_with("atomic.inc.");
2235   if (IsInc || Name.starts_with("atomic.dec.")) {
2236     if (CI->getNumOperands() != 6) // Malformed bitcode.
2237       return nullptr;
2238 
2239     AtomicRMWInst::BinOp RMWOp =
2240         IsInc ? AtomicRMWInst::UIncWrap : AtomicRMWInst::UDecWrap;
2241 
2242     Value *Ptr = CI->getArgOperand(0);
2243     Value *Val = CI->getArgOperand(1);
2244     ConstantInt *OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
2245     ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
2246 
2247     AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
2248     if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
2249       Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
2250     if (Order == AtomicOrdering::NotAtomic ||
2251         Order == AtomicOrdering::Unordered)
2252       Order = AtomicOrdering::SequentiallyConsistent;
2253 
2254     // The scope argument never really worked correctly. Use agent as the most
2255     // conservative option which should still always produce the instruction.
2256     SyncScope::ID SSID = F->getContext().getOrInsertSyncScopeID("agent");
2257     AtomicRMWInst *RMW =
2258         Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
2259 
2260     if (!VolatileArg || !VolatileArg->isZero())
2261       RMW->setVolatile(true);
2262     return RMW;
2263   }
2264 
2265   llvm_unreachable("Unknown function for AMDGPU intrinsic upgrade.");
2266 }
2267 
2268 /// Upgrade a call to an old intrinsic. All argument and return casting must be
2269 /// provided to seamlessly integrate with existing context.
2270 void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
2271   // Note dyn_cast to Function is not quite the same as getCalledFunction, which
2272   // checks the callee's function type matches. It's likely we need to handle
2273   // type changes here.
2274   Function *F = dyn_cast<Function>(CI->getCalledOperand());
2275   if (!F)
2276     return;
2277 
2278   LLVMContext &C = CI->getContext();
2279   IRBuilder<> Builder(C);
2280   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
2281 
2282   if (!NewFn) {
2283     // Get the Function's name.
2284     StringRef Name = F->getName();
2285 
2286     assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
2287     Name = Name.substr(5);
2288 
2289     bool IsX86 = Name.starts_with("x86.");
2290     if (IsX86)
2291       Name = Name.substr(4);
2292     bool IsNVVM = Name.starts_with("nvvm.");
2293     if (IsNVVM)
2294       Name = Name.substr(5);
2295     bool IsARM = Name.starts_with("arm.");
2296     if (IsARM)
2297       Name = Name.substr(4);
2298     bool IsAMDGCN = Name.starts_with("amdgcn.");
2299     if (IsAMDGCN)
2300       Name = Name.substr(7);
2301 
2302     if (IsX86 && Name.starts_with("sse4a.movnt.")) {
2303       SmallVector<Metadata *, 1> Elts;
2304       Elts.push_back(
2305           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2306       MDNode *Node = MDNode::get(C, Elts);
2307 
2308       Value *Arg0 = CI->getArgOperand(0);
2309       Value *Arg1 = CI->getArgOperand(1);
2310 
2311       // Nontemporal (unaligned) store of the 0'th element of the float/double
2312       // vector.
2313       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
2314       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
2315       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
2316       Value *Extract =
2317           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2318 
2319       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
2320       SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2321 
2322       // Remove intrinsic.
2323       CI->eraseFromParent();
2324       return;
2325     }
2326 
2327     if (IsX86 && (Name.starts_with("avx.movnt.") ||
2328                   Name.starts_with("avx512.storent."))) {
2329       SmallVector<Metadata *, 1> Elts;
2330       Elts.push_back(
2331           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2332       MDNode *Node = MDNode::get(C, Elts);
2333 
2334       Value *Arg0 = CI->getArgOperand(0);
2335       Value *Arg1 = CI->getArgOperand(1);
2336 
2337       // Convert the type of the pointer to a pointer to the stored type.
2338       Value *BC = Builder.CreateBitCast(Arg0,
2339                                         PointerType::getUnqual(Arg1->getType()),
2340                                         "cast");
2341       StoreInst *SI = Builder.CreateAlignedStore(
2342           Arg1, BC,
2343           Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2344       SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2345 
2346       // Remove intrinsic.
2347       CI->eraseFromParent();
2348       return;
2349     }
2350 
2351     if (IsX86 && Name == "sse2.storel.dq") {
2352       Value *Arg0 = CI->getArgOperand(0);
2353       Value *Arg1 = CI->getArgOperand(1);
2354 
2355       auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2356       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2357       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2358       Value *BC = Builder.CreateBitCast(Arg0,
2359                                         PointerType::getUnqual(Elt->getType()),
2360                                         "cast");
2361       Builder.CreateAlignedStore(Elt, BC, Align(1));
2362 
2363       // Remove intrinsic.
2364       CI->eraseFromParent();
2365       return;
2366     }
2367 
2368     if (IsX86 && (Name.starts_with("sse.storeu.") ||
2369                   Name.starts_with("sse2.storeu.") ||
2370                   Name.starts_with("avx.storeu."))) {
2371       Value *Arg0 = CI->getArgOperand(0);
2372       Value *Arg1 = CI->getArgOperand(1);
2373 
2374       Arg0 = Builder.CreateBitCast(Arg0,
2375                                    PointerType::getUnqual(Arg1->getType()),
2376                                    "cast");
2377       Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2378 
2379       // Remove intrinsic.
2380       CI->eraseFromParent();
2381       return;
2382     }
2383 
2384     if (IsX86 && Name == "avx512.mask.store.ss") {
2385       Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2386       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2387                          Mask, false);
2388 
2389       // Remove intrinsic.
2390       CI->eraseFromParent();
2391       return;
2392     }
2393 
2394     if (IsX86 && (Name.starts_with("avx512.mask.store"))) {
2395       // "avx512.mask.storeu." or "avx512.mask.store."
2396       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2397       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2398                          CI->getArgOperand(2), Aligned);
2399 
2400       // Remove intrinsic.
2401       CI->eraseFromParent();
2402       return;
2403     }
2404 
2405     Value *Rep;
2406     // Upgrade packed integer vector compare intrinsics to compare instructions.
2407     if (IsX86 && (Name.starts_with("sse2.pcmp") ||
2408                   Name.starts_with("avx2.pcmp"))) {
2409       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2410       bool CmpEq = Name[9] == 'e';
2411       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2412                                CI->getArgOperand(0), CI->getArgOperand(1));
2413       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2414     } else if (IsX86 && (Name.starts_with("avx512.broadcastm"))) {
2415       Type *ExtTy = Type::getInt32Ty(C);
2416       if (CI->getOperand(0)->getType()->isIntegerTy(8))
2417         ExtTy = Type::getInt64Ty(C);
2418       unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2419                          ExtTy->getPrimitiveSizeInBits();
2420       Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2421       Rep = Builder.CreateVectorSplat(NumElts, Rep);
2422     } else if (IsX86 && (Name == "sse.sqrt.ss" ||
2423                          Name == "sse2.sqrt.sd")) {
2424       Value *Vec = CI->getArgOperand(0);
2425       Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2426       Function *Intr = Intrinsic::getDeclaration(F->getParent(),
2427                                                  Intrinsic::sqrt, Elt0->getType());
2428       Elt0 = Builder.CreateCall(Intr, Elt0);
2429       Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2430     } else if (IsX86 && (Name.starts_with("avx.sqrt.p") ||
2431                          Name.starts_with("sse2.sqrt.p") ||
2432                          Name.starts_with("sse.sqrt.p"))) {
2433       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2434                                                          Intrinsic::sqrt,
2435                                                          CI->getType()),
2436                                {CI->getArgOperand(0)});
2437     } else if (IsX86 && (Name.starts_with("avx512.mask.sqrt.p"))) {
2438       if (CI->arg_size() == 4 &&
2439           (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2440            cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2441         Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2442                                             : Intrinsic::x86_avx512_sqrt_pd_512;
2443 
2444         Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
2445         Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2446                                                            IID), Args);
2447       } else {
2448         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2449                                                            Intrinsic::sqrt,
2450                                                            CI->getType()),
2451                                  {CI->getArgOperand(0)});
2452       }
2453       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2454                           CI->getArgOperand(1));
2455     } else if (IsX86 && (Name.starts_with("avx512.ptestm") ||
2456                          Name.starts_with("avx512.ptestnm"))) {
2457       Value *Op0 = CI->getArgOperand(0);
2458       Value *Op1 = CI->getArgOperand(1);
2459       Value *Mask = CI->getArgOperand(2);
2460       Rep = Builder.CreateAnd(Op0, Op1);
2461       llvm::Type *Ty = Op0->getType();
2462       Value *Zero = llvm::Constant::getNullValue(Ty);
2463       ICmpInst::Predicate Pred =
2464         Name.starts_with("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2465       Rep = Builder.CreateICmp(Pred, Rep, Zero);
2466       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
2467     } else if (IsX86 && (Name.starts_with("avx512.mask.pbroadcast"))){
2468       unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2469                              ->getNumElements();
2470       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2471       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2472                           CI->getArgOperand(1));
2473     } else if (IsX86 && (Name.starts_with("avx512.kunpck"))) {
2474       unsigned NumElts = CI->getType()->getScalarSizeInBits();
2475       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2476       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2477       int Indices[64];
2478       for (unsigned i = 0; i != NumElts; ++i)
2479         Indices[i] = i;
2480 
2481       // First extract half of each vector. This gives better codegen than
2482       // doing it in a single shuffle.
2483       LHS =
2484           Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2485       RHS =
2486           Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2487       // Concat the vectors.
2488       // NOTE: Operands have to be swapped to match intrinsic definition.
2489       Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2490       Rep = Builder.CreateBitCast(Rep, CI->getType());
2491     } else if (IsX86 && Name == "avx512.kand.w") {
2492       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2493       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2494       Rep = Builder.CreateAnd(LHS, RHS);
2495       Rep = Builder.CreateBitCast(Rep, CI->getType());
2496     } else if (IsX86 && Name == "avx512.kandn.w") {
2497       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2498       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2499       LHS = Builder.CreateNot(LHS);
2500       Rep = Builder.CreateAnd(LHS, RHS);
2501       Rep = Builder.CreateBitCast(Rep, CI->getType());
2502     } else if (IsX86 && Name == "avx512.kor.w") {
2503       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2504       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2505       Rep = Builder.CreateOr(LHS, RHS);
2506       Rep = Builder.CreateBitCast(Rep, CI->getType());
2507     } else if (IsX86 && Name == "avx512.kxor.w") {
2508       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2509       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2510       Rep = Builder.CreateXor(LHS, RHS);
2511       Rep = Builder.CreateBitCast(Rep, CI->getType());
2512     } else if (IsX86 && Name == "avx512.kxnor.w") {
2513       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2514       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2515       LHS = Builder.CreateNot(LHS);
2516       Rep = Builder.CreateXor(LHS, RHS);
2517       Rep = Builder.CreateBitCast(Rep, CI->getType());
2518     } else if (IsX86 && Name == "avx512.knot.w") {
2519       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2520       Rep = Builder.CreateNot(Rep);
2521       Rep = Builder.CreateBitCast(Rep, CI->getType());
2522     } else if (IsX86 &&
2523                (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2524       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2525       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2526       Rep = Builder.CreateOr(LHS, RHS);
2527       Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2528       Value *C;
2529       if (Name[14] == 'c')
2530         C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2531       else
2532         C = ConstantInt::getNullValue(Builder.getInt16Ty());
2533       Rep = Builder.CreateICmpEQ(Rep, C);
2534       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2535     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2536                          Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2537                          Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2538                          Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2539       Type *I32Ty = Type::getInt32Ty(C);
2540       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2541                                                  ConstantInt::get(I32Ty, 0));
2542       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2543                                                  ConstantInt::get(I32Ty, 0));
2544       Value *EltOp;
2545       if (Name.contains(".add."))
2546         EltOp = Builder.CreateFAdd(Elt0, Elt1);
2547       else if (Name.contains(".sub."))
2548         EltOp = Builder.CreateFSub(Elt0, Elt1);
2549       else if (Name.contains(".mul."))
2550         EltOp = Builder.CreateFMul(Elt0, Elt1);
2551       else
2552         EltOp = Builder.CreateFDiv(Elt0, Elt1);
2553       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2554                                         ConstantInt::get(I32Ty, 0));
2555     } else if (IsX86 && Name.starts_with("avx512.mask.pcmp")) {
2556       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2557       bool CmpEq = Name[16] == 'e';
2558       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2559     } else if (IsX86 && Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2560       Type *OpTy = CI->getArgOperand(0)->getType();
2561       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2562       Intrinsic::ID IID;
2563       switch (VecWidth) {
2564       default: llvm_unreachable("Unexpected intrinsic");
2565       case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2566       case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2567       case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2568       }
2569 
2570       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2571                                { CI->getOperand(0), CI->getArgOperand(1) });
2572       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2573     } else if (IsX86 && Name.starts_with("avx512.mask.fpclass.p")) {
2574       Type *OpTy = CI->getArgOperand(0)->getType();
2575       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2576       unsigned EltWidth = OpTy->getScalarSizeInBits();
2577       Intrinsic::ID IID;
2578       if (VecWidth == 128 && EltWidth == 32)
2579         IID = Intrinsic::x86_avx512_fpclass_ps_128;
2580       else if (VecWidth == 256 && EltWidth == 32)
2581         IID = Intrinsic::x86_avx512_fpclass_ps_256;
2582       else if (VecWidth == 512 && EltWidth == 32)
2583         IID = Intrinsic::x86_avx512_fpclass_ps_512;
2584       else if (VecWidth == 128 && EltWidth == 64)
2585         IID = Intrinsic::x86_avx512_fpclass_pd_128;
2586       else if (VecWidth == 256 && EltWidth == 64)
2587         IID = Intrinsic::x86_avx512_fpclass_pd_256;
2588       else if (VecWidth == 512 && EltWidth == 64)
2589         IID = Intrinsic::x86_avx512_fpclass_pd_512;
2590       else
2591         llvm_unreachable("Unexpected intrinsic");
2592 
2593       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2594                                { CI->getOperand(0), CI->getArgOperand(1) });
2595       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2596     } else if (IsX86 && Name.starts_with("avx512.cmp.p")) {
2597       SmallVector<Value *, 4> Args(CI->args());
2598       Type *OpTy = Args[0]->getType();
2599       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2600       unsigned EltWidth = OpTy->getScalarSizeInBits();
2601       Intrinsic::ID IID;
2602       if (VecWidth == 128 && EltWidth == 32)
2603         IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2604       else if (VecWidth == 256 && EltWidth == 32)
2605         IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2606       else if (VecWidth == 512 && EltWidth == 32)
2607         IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2608       else if (VecWidth == 128 && EltWidth == 64)
2609         IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2610       else if (VecWidth == 256 && EltWidth == 64)
2611         IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2612       else if (VecWidth == 512 && EltWidth == 64)
2613         IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2614       else
2615         llvm_unreachable("Unexpected intrinsic");
2616 
2617       Value *Mask = Constant::getAllOnesValue(CI->getType());
2618       if (VecWidth == 512)
2619         std::swap(Mask, Args.back());
2620       Args.push_back(Mask);
2621 
2622       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2623                                Args);
2624     } else if (IsX86 && Name.starts_with("avx512.mask.cmp.")) {
2625       // Integer compare intrinsics.
2626       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2627       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2628     } else if (IsX86 && Name.starts_with("avx512.mask.ucmp.")) {
2629       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2630       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2631     } else if (IsX86 && (Name.starts_with("avx512.cvtb2mask.") ||
2632                          Name.starts_with("avx512.cvtw2mask.") ||
2633                          Name.starts_with("avx512.cvtd2mask.") ||
2634                          Name.starts_with("avx512.cvtq2mask."))) {
2635       Value *Op = CI->getArgOperand(0);
2636       Value *Zero = llvm::Constant::getNullValue(Op->getType());
2637       Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2638       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2639     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2640                         Name == "ssse3.pabs.w.128" ||
2641                         Name == "ssse3.pabs.d.128" ||
2642                         Name.starts_with("avx2.pabs") ||
2643                         Name.starts_with("avx512.mask.pabs"))) {
2644       Rep = upgradeAbs(Builder, *CI);
2645     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2646                          Name == "sse2.pmaxs.w" ||
2647                          Name == "sse41.pmaxsd" ||
2648                          Name.starts_with("avx2.pmaxs") ||
2649                          Name.starts_with("avx512.mask.pmaxs"))) {
2650       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2651     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2652                          Name == "sse41.pmaxuw" ||
2653                          Name == "sse41.pmaxud" ||
2654                          Name.starts_with("avx2.pmaxu") ||
2655                          Name.starts_with("avx512.mask.pmaxu"))) {
2656       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2657     } else if (IsX86 && (Name == "sse41.pminsb" ||
2658                          Name == "sse2.pmins.w" ||
2659                          Name == "sse41.pminsd" ||
2660                          Name.starts_with("avx2.pmins") ||
2661                          Name.starts_with("avx512.mask.pmins"))) {
2662       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2663     } else if (IsX86 && (Name == "sse2.pminu.b" ||
2664                          Name == "sse41.pminuw" ||
2665                          Name == "sse41.pminud" ||
2666                          Name.starts_with("avx2.pminu") ||
2667                          Name.starts_with("avx512.mask.pminu"))) {
2668       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2669     } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2670                          Name == "avx2.pmulu.dq" ||
2671                          Name == "avx512.pmulu.dq.512" ||
2672                          Name.starts_with("avx512.mask.pmulu.dq."))) {
2673       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2674     } else if (IsX86 && (Name == "sse41.pmuldq" ||
2675                          Name == "avx2.pmul.dq" ||
2676                          Name == "avx512.pmul.dq.512" ||
2677                          Name.starts_with("avx512.mask.pmul.dq."))) {
2678       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2679     } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2680                          Name == "sse2.cvtsi2sd" ||
2681                          Name == "sse.cvtsi642ss" ||
2682                          Name == "sse2.cvtsi642sd")) {
2683       Rep = Builder.CreateSIToFP(
2684           CI->getArgOperand(1),
2685           cast<VectorType>(CI->getType())->getElementType());
2686       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2687     } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2688       Rep = Builder.CreateUIToFP(
2689           CI->getArgOperand(1),
2690           cast<VectorType>(CI->getType())->getElementType());
2691       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2692     } else if (IsX86 && Name == "sse2.cvtss2sd") {
2693       Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2694       Rep = Builder.CreateFPExt(
2695           Rep, cast<VectorType>(CI->getType())->getElementType());
2696       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2697     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2698                          Name == "sse2.cvtdq2ps" ||
2699                          Name == "avx.cvtdq2.pd.256" ||
2700                          Name == "avx.cvtdq2.ps.256" ||
2701                          Name.starts_with("avx512.mask.cvtdq2pd.") ||
2702                          Name.starts_with("avx512.mask.cvtudq2pd.") ||
2703                          Name.starts_with("avx512.mask.cvtdq2ps.") ||
2704                          Name.starts_with("avx512.mask.cvtudq2ps.") ||
2705                          Name.starts_with("avx512.mask.cvtqq2pd.") ||
2706                          Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2707                          Name == "avx512.mask.cvtqq2ps.256" ||
2708                          Name == "avx512.mask.cvtqq2ps.512" ||
2709                          Name == "avx512.mask.cvtuqq2ps.256" ||
2710                          Name == "avx512.mask.cvtuqq2ps.512" ||
2711                          Name == "sse2.cvtps2pd" ||
2712                          Name == "avx.cvt.ps2.pd.256" ||
2713                          Name == "avx512.mask.cvtps2pd.128" ||
2714                          Name == "avx512.mask.cvtps2pd.256")) {
2715       auto *DstTy = cast<FixedVectorType>(CI->getType());
2716       Rep = CI->getArgOperand(0);
2717       auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2718 
2719       unsigned NumDstElts = DstTy->getNumElements();
2720       if (NumDstElts < SrcTy->getNumElements()) {
2721         assert(NumDstElts == 2 && "Unexpected vector size");
2722         Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2723       }
2724 
2725       bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2726       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2727       if (IsPS2PD)
2728         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2729       else if (CI->arg_size() == 4 &&
2730                (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2731                 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2732         Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2733                                        : Intrinsic::x86_avx512_sitofp_round;
2734         Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2735                                                 { DstTy, SrcTy });
2736         Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2737       } else {
2738         Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2739                          : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2740       }
2741 
2742       if (CI->arg_size() >= 3)
2743         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2744                             CI->getArgOperand(1));
2745     } else if (IsX86 && (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2746                          Name.starts_with("vcvtph2ps."))) {
2747       auto *DstTy = cast<FixedVectorType>(CI->getType());
2748       Rep = CI->getArgOperand(0);
2749       auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2750       unsigned NumDstElts = DstTy->getNumElements();
2751       if (NumDstElts != SrcTy->getNumElements()) {
2752         assert(NumDstElts == 4 && "Unexpected vector size");
2753         Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2754       }
2755       Rep = Builder.CreateBitCast(
2756           Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2757       Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2758       if (CI->arg_size() >= 3)
2759         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2760                             CI->getArgOperand(1));
2761     } else if (IsX86 && Name.starts_with("avx512.mask.load")) {
2762       // "avx512.mask.loadu." or "avx512.mask.load."
2763       bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2764       Rep =
2765           UpgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2766                             CI->getArgOperand(2), Aligned);
2767     } else if (IsX86 && Name.starts_with("avx512.mask.expand.load.")) {
2768       auto *ResultTy = cast<FixedVectorType>(CI->getType());
2769       Type *PtrTy = ResultTy->getElementType();
2770 
2771       // Cast the pointer to element type.
2772       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2773                                          llvm::PointerType::getUnqual(PtrTy));
2774 
2775       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2776                                      ResultTy->getNumElements());
2777 
2778       Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2779                                                 Intrinsic::masked_expandload,
2780                                                 ResultTy);
2781       Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2782     } else if (IsX86 && Name.starts_with("avx512.mask.compress.store.")) {
2783       auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2784       Type *PtrTy = ResultTy->getElementType();
2785 
2786       // Cast the pointer to element type.
2787       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2788                                          llvm::PointerType::getUnqual(PtrTy));
2789 
2790       Value *MaskVec =
2791           getX86MaskVec(Builder, CI->getArgOperand(2),
2792                         cast<FixedVectorType>(ResultTy)->getNumElements());
2793 
2794       Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2795                                                 Intrinsic::masked_compressstore,
2796                                                 ResultTy);
2797       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2798     } else if (IsX86 && (Name.starts_with("avx512.mask.compress.") ||
2799                          Name.starts_with("avx512.mask.expand."))) {
2800       auto *ResultTy = cast<FixedVectorType>(CI->getType());
2801 
2802       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2803                                      ResultTy->getNumElements());
2804 
2805       bool IsCompress = Name[12] == 'c';
2806       Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2807                                      : Intrinsic::x86_avx512_mask_expand;
2808       Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2809       Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2810                                        MaskVec });
2811     } else if (IsX86 && Name.starts_with("xop.vpcom")) {
2812       bool IsSigned;
2813       if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
2814           Name.ends_with("uq"))
2815         IsSigned = false;
2816       else if (Name.ends_with("b") || Name.ends_with("w") || Name.ends_with("d") ||
2817                Name.ends_with("q"))
2818         IsSigned = true;
2819       else
2820         llvm_unreachable("Unknown suffix");
2821 
2822       unsigned Imm;
2823       if (CI->arg_size() == 3) {
2824         Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2825       } else {
2826         Name = Name.substr(9); // strip off "xop.vpcom"
2827         if (Name.starts_with("lt"))
2828           Imm = 0;
2829         else if (Name.starts_with("le"))
2830           Imm = 1;
2831         else if (Name.starts_with("gt"))
2832           Imm = 2;
2833         else if (Name.starts_with("ge"))
2834           Imm = 3;
2835         else if (Name.starts_with("eq"))
2836           Imm = 4;
2837         else if (Name.starts_with("ne"))
2838           Imm = 5;
2839         else if (Name.starts_with("false"))
2840           Imm = 6;
2841         else if (Name.starts_with("true"))
2842           Imm = 7;
2843         else
2844           llvm_unreachable("Unknown condition");
2845       }
2846 
2847       Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2848     } else if (IsX86 && Name.starts_with("xop.vpcmov")) {
2849       Value *Sel = CI->getArgOperand(2);
2850       Value *NotSel = Builder.CreateNot(Sel);
2851       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2852       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2853       Rep = Builder.CreateOr(Sel0, Sel1);
2854     } else if (IsX86 && (Name.starts_with("xop.vprot") ||
2855                          Name.starts_with("avx512.prol") ||
2856                          Name.starts_with("avx512.mask.prol"))) {
2857       Rep = upgradeX86Rotate(Builder, *CI, false);
2858     } else if (IsX86 && (Name.starts_with("avx512.pror") ||
2859                          Name.starts_with("avx512.mask.pror"))) {
2860       Rep = upgradeX86Rotate(Builder, *CI, true);
2861     } else if (IsX86 && (Name.starts_with("avx512.vpshld.") ||
2862                          Name.starts_with("avx512.mask.vpshld") ||
2863                          Name.starts_with("avx512.maskz.vpshld"))) {
2864       bool ZeroMask = Name[11] == 'z';
2865       Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2866     } else if (IsX86 && (Name.starts_with("avx512.vpshrd.") ||
2867                          Name.starts_with("avx512.mask.vpshrd") ||
2868                          Name.starts_with("avx512.maskz.vpshrd"))) {
2869       bool ZeroMask = Name[11] == 'z';
2870       Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2871     } else if (IsX86 && Name == "sse42.crc32.64.8") {
2872       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2873                                                Intrinsic::x86_sse42_crc32_32_8);
2874       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2875       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2876       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2877     } else if (IsX86 && (Name.starts_with("avx.vbroadcast.s") ||
2878                          Name.starts_with("avx512.vbroadcast.s"))) {
2879       // Replace broadcasts with a series of insertelements.
2880       auto *VecTy = cast<FixedVectorType>(CI->getType());
2881       Type *EltTy = VecTy->getElementType();
2882       unsigned EltNum = VecTy->getNumElements();
2883       Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
2884       Type *I32Ty = Type::getInt32Ty(C);
2885       Rep = PoisonValue::get(VecTy);
2886       for (unsigned I = 0; I < EltNum; ++I)
2887         Rep = Builder.CreateInsertElement(Rep, Load,
2888                                           ConstantInt::get(I32Ty, I));
2889     } else if (IsX86 && (Name.starts_with("sse41.pmovsx") ||
2890                          Name.starts_with("sse41.pmovzx") ||
2891                          Name.starts_with("avx2.pmovsx") ||
2892                          Name.starts_with("avx2.pmovzx") ||
2893                          Name.starts_with("avx512.mask.pmovsx") ||
2894                          Name.starts_with("avx512.mask.pmovzx"))) {
2895       auto *DstTy = cast<FixedVectorType>(CI->getType());
2896       unsigned NumDstElts = DstTy->getNumElements();
2897 
2898       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2899       SmallVector<int, 8> ShuffleMask(NumDstElts);
2900       for (unsigned i = 0; i != NumDstElts; ++i)
2901         ShuffleMask[i] = i;
2902 
2903       Value *SV =
2904           Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2905 
2906       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2907       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2908                    : Builder.CreateZExt(SV, DstTy);
2909       // If there are 3 arguments, it's a masked intrinsic so we need a select.
2910       if (CI->arg_size() == 3)
2911         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2912                             CI->getArgOperand(1));
2913     } else if (Name == "avx512.mask.pmov.qd.256" ||
2914                Name == "avx512.mask.pmov.qd.512" ||
2915                Name == "avx512.mask.pmov.wb.256" ||
2916                Name == "avx512.mask.pmov.wb.512") {
2917       Type *Ty = CI->getArgOperand(1)->getType();
2918       Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2919       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2920                           CI->getArgOperand(1));
2921     } else if (IsX86 && (Name.starts_with("avx.vbroadcastf128") ||
2922                          Name == "avx2.vbroadcasti128")) {
2923       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2924       Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2925       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2926       auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2927       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2928                                             PointerType::getUnqual(VT));
2929       Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2930       if (NumSrcElts == 2)
2931         Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2932       else
2933         Rep = Builder.CreateShuffleVector(
2934             Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2935     } else if (IsX86 && (Name.starts_with("avx512.mask.shuf.i") ||
2936                          Name.starts_with("avx512.mask.shuf.f"))) {
2937       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2938       Type *VT = CI->getType();
2939       unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2940       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2941       unsigned ControlBitsMask = NumLanes - 1;
2942       unsigned NumControlBits = NumLanes / 2;
2943       SmallVector<int, 8> ShuffleMask(0);
2944 
2945       for (unsigned l = 0; l != NumLanes; ++l) {
2946         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2947         // We actually need the other source.
2948         if (l >= NumLanes / 2)
2949           LaneMask += NumLanes;
2950         for (unsigned i = 0; i != NumElementsInLane; ++i)
2951           ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2952       }
2953       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2954                                         CI->getArgOperand(1), ShuffleMask);
2955       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2956                           CI->getArgOperand(3));
2957     }else if (IsX86 && (Name.starts_with("avx512.mask.broadcastf") ||
2958                          Name.starts_with("avx512.mask.broadcasti"))) {
2959       unsigned NumSrcElts =
2960           cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2961               ->getNumElements();
2962       unsigned NumDstElts =
2963           cast<FixedVectorType>(CI->getType())->getNumElements();
2964 
2965       SmallVector<int, 8> ShuffleMask(NumDstElts);
2966       for (unsigned i = 0; i != NumDstElts; ++i)
2967         ShuffleMask[i] = i % NumSrcElts;
2968 
2969       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2970                                         CI->getArgOperand(0),
2971                                         ShuffleMask);
2972       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2973                           CI->getArgOperand(1));
2974     } else if (IsX86 && (Name.starts_with("avx2.pbroadcast") ||
2975                          Name.starts_with("avx2.vbroadcast") ||
2976                          Name.starts_with("avx512.pbroadcast") ||
2977                          Name.starts_with("avx512.mask.broadcast.s"))) {
2978       // Replace vp?broadcasts with a vector shuffle.
2979       Value *Op = CI->getArgOperand(0);
2980       ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2981       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2982       SmallVector<int, 8> M;
2983       ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
2984       Rep = Builder.CreateShuffleVector(Op, M);
2985 
2986       if (CI->arg_size() == 3)
2987         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2988                             CI->getArgOperand(1));
2989     } else if (IsX86 && (Name.starts_with("sse2.padds.") ||
2990                          Name.starts_with("avx2.padds.") ||
2991                          Name.starts_with("avx512.padds.") ||
2992                          Name.starts_with("avx512.mask.padds."))) {
2993       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2994     } else if (IsX86 && (Name.starts_with("sse2.psubs.") ||
2995                          Name.starts_with("avx2.psubs.") ||
2996                          Name.starts_with("avx512.psubs.") ||
2997                          Name.starts_with("avx512.mask.psubs."))) {
2998       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2999     } else if (IsX86 && (Name.starts_with("sse2.paddus.") ||
3000                          Name.starts_with("avx2.paddus.") ||
3001                          Name.starts_with("avx512.mask.paddus."))) {
3002       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3003     } else if (IsX86 && (Name.starts_with("sse2.psubus.") ||
3004                          Name.starts_with("avx2.psubus.") ||
3005                          Name.starts_with("avx512.mask.psubus."))) {
3006       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3007     } else if (IsX86 && Name.starts_with("avx512.mask.palignr.")) {
3008       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3009                                       CI->getArgOperand(1),
3010                                       CI->getArgOperand(2),
3011                                       CI->getArgOperand(3),
3012                                       CI->getArgOperand(4),
3013                                       false);
3014     } else if (IsX86 && Name.starts_with("avx512.mask.valign.")) {
3015       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3016                                       CI->getArgOperand(1),
3017                                       CI->getArgOperand(2),
3018                                       CI->getArgOperand(3),
3019                                       CI->getArgOperand(4),
3020                                       true);
3021     } else if (IsX86 && (Name == "sse2.psll.dq" ||
3022                          Name == "avx2.psll.dq")) {
3023       // 128/256-bit shift left specified in bits.
3024       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3025       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3026                                        Shift / 8); // Shift is in bits.
3027     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
3028                          Name == "avx2.psrl.dq")) {
3029       // 128/256-bit shift right specified in bits.
3030       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3031       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3032                                        Shift / 8); // Shift is in bits.
3033     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
3034                          Name == "avx2.psll.dq.bs" ||
3035                          Name == "avx512.psll.dq.512")) {
3036       // 128/256/512-bit shift left specified in bytes.
3037       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3038       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3039     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
3040                          Name == "avx2.psrl.dq.bs" ||
3041                          Name == "avx512.psrl.dq.512")) {
3042       // 128/256/512-bit shift right specified in bytes.
3043       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3044       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3045     } else if (IsX86 && (Name == "sse41.pblendw" ||
3046                          Name.starts_with("sse41.blendp") ||
3047                          Name.starts_with("avx.blend.p") ||
3048                          Name == "avx2.pblendw" ||
3049                          Name.starts_with("avx2.pblendd."))) {
3050       Value *Op0 = CI->getArgOperand(0);
3051       Value *Op1 = CI->getArgOperand(1);
3052       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3053       auto *VecTy = cast<FixedVectorType>(CI->getType());
3054       unsigned NumElts = VecTy->getNumElements();
3055 
3056       SmallVector<int, 16> Idxs(NumElts);
3057       for (unsigned i = 0; i != NumElts; ++i)
3058         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
3059 
3060       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3061     } else if (IsX86 && (Name.starts_with("avx.vinsertf128.") ||
3062                          Name == "avx2.vinserti128" ||
3063                          Name.starts_with("avx512.mask.insert"))) {
3064       Value *Op0 = CI->getArgOperand(0);
3065       Value *Op1 = CI->getArgOperand(1);
3066       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3067       unsigned DstNumElts =
3068           cast<FixedVectorType>(CI->getType())->getNumElements();
3069       unsigned SrcNumElts =
3070           cast<FixedVectorType>(Op1->getType())->getNumElements();
3071       unsigned Scale = DstNumElts / SrcNumElts;
3072 
3073       // Mask off the high bits of the immediate value; hardware ignores those.
3074       Imm = Imm % Scale;
3075 
3076       // Extend the second operand into a vector the size of the destination.
3077       SmallVector<int, 8> Idxs(DstNumElts);
3078       for (unsigned i = 0; i != SrcNumElts; ++i)
3079         Idxs[i] = i;
3080       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3081         Idxs[i] = SrcNumElts;
3082       Rep = Builder.CreateShuffleVector(Op1, Idxs);
3083 
3084       // Insert the second operand into the first operand.
3085 
3086       // Note that there is no guarantee that instruction lowering will actually
3087       // produce a vinsertf128 instruction for the created shuffles. In
3088       // particular, the 0 immediate case involves no lane changes, so it can
3089       // be handled as a blend.
3090 
3091       // Example of shuffle mask for 32-bit elements:
3092       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
3093       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
3094 
3095       // First fill with identify mask.
3096       for (unsigned i = 0; i != DstNumElts; ++i)
3097         Idxs[i] = i;
3098       // Then replace the elements where we need to insert.
3099       for (unsigned i = 0; i != SrcNumElts; ++i)
3100         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3101       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3102 
3103       // If the intrinsic has a mask operand, handle that.
3104       if (CI->arg_size() == 5)
3105         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
3106                             CI->getArgOperand(3));
3107     } else if (IsX86 && (Name.starts_with("avx.vextractf128.") ||
3108                          Name == "avx2.vextracti128" ||
3109                          Name.starts_with("avx512.mask.vextract"))) {
3110       Value *Op0 = CI->getArgOperand(0);
3111       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3112       unsigned DstNumElts =
3113           cast<FixedVectorType>(CI->getType())->getNumElements();
3114       unsigned SrcNumElts =
3115           cast<FixedVectorType>(Op0->getType())->getNumElements();
3116       unsigned Scale = SrcNumElts / DstNumElts;
3117 
3118       // Mask off the high bits of the immediate value; hardware ignores those.
3119       Imm = Imm % Scale;
3120 
3121       // Get indexes for the subvector of the input vector.
3122       SmallVector<int, 8> Idxs(DstNumElts);
3123       for (unsigned i = 0; i != DstNumElts; ++i) {
3124         Idxs[i] = i + (Imm * DstNumElts);
3125       }
3126       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3127 
3128       // If the intrinsic has a mask operand, handle that.
3129       if (CI->arg_size() == 4)
3130         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3131                             CI->getArgOperand(2));
3132     } else if (!IsX86 && Name == "stackprotectorcheck") {
3133       Rep = nullptr;
3134     } else if (IsX86 && (Name.starts_with("avx512.mask.perm.df.") ||
3135                          Name.starts_with("avx512.mask.perm.di."))) {
3136       Value *Op0 = CI->getArgOperand(0);
3137       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3138       auto *VecTy = cast<FixedVectorType>(CI->getType());
3139       unsigned NumElts = VecTy->getNumElements();
3140 
3141       SmallVector<int, 8> Idxs(NumElts);
3142       for (unsigned i = 0; i != NumElts; ++i)
3143         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3144 
3145       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3146 
3147       if (CI->arg_size() == 4)
3148         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3149                             CI->getArgOperand(2));
3150     } else if (IsX86 && (Name.starts_with("avx.vperm2f128.") ||
3151                          Name == "avx2.vperm2i128")) {
3152       // The immediate permute control byte looks like this:
3153       //    [1:0] - select 128 bits from sources for low half of destination
3154       //    [2]   - ignore
3155       //    [3]   - zero low half of destination
3156       //    [5:4] - select 128 bits from sources for high half of destination
3157       //    [6]   - ignore
3158       //    [7]   - zero high half of destination
3159 
3160       uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3161 
3162       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3163       unsigned HalfSize = NumElts / 2;
3164       SmallVector<int, 8> ShuffleMask(NumElts);
3165 
3166       // Determine which operand(s) are actually in use for this instruction.
3167       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3168       Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3169 
3170       // If needed, replace operands based on zero mask.
3171       V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3172       V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3173 
3174       // Permute low half of result.
3175       unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3176       for (unsigned i = 0; i < HalfSize; ++i)
3177         ShuffleMask[i] = StartIndex + i;
3178 
3179       // Permute high half of result.
3180       StartIndex = (Imm & 0x10) ? HalfSize : 0;
3181       for (unsigned i = 0; i < HalfSize; ++i)
3182         ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3183 
3184       Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3185 
3186     } else if (IsX86 && (Name.starts_with("avx.vpermil.") ||
3187                          Name == "sse2.pshuf.d" ||
3188                          Name.starts_with("avx512.mask.vpermil.p") ||
3189                          Name.starts_with("avx512.mask.pshuf.d."))) {
3190       Value *Op0 = CI->getArgOperand(0);
3191       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3192       auto *VecTy = cast<FixedVectorType>(CI->getType());
3193       unsigned NumElts = VecTy->getNumElements();
3194       // Calculate the size of each index in the immediate.
3195       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3196       unsigned IdxMask = ((1 << IdxSize) - 1);
3197 
3198       SmallVector<int, 8> Idxs(NumElts);
3199       // Lookup the bits for this element, wrapping around the immediate every
3200       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3201       // to offset by the first index of each group.
3202       for (unsigned i = 0; i != NumElts; ++i)
3203         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3204 
3205       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3206 
3207       if (CI->arg_size() == 4)
3208         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3209                             CI->getArgOperand(2));
3210     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
3211                          Name.starts_with("avx512.mask.pshufl.w."))) {
3212       Value *Op0 = CI->getArgOperand(0);
3213       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3214       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3215 
3216       SmallVector<int, 16> Idxs(NumElts);
3217       for (unsigned l = 0; l != NumElts; l += 8) {
3218         for (unsigned i = 0; i != 4; ++i)
3219           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3220         for (unsigned i = 4; i != 8; ++i)
3221           Idxs[i + l] = i + l;
3222       }
3223 
3224       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3225 
3226       if (CI->arg_size() == 4)
3227         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3228                             CI->getArgOperand(2));
3229     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
3230                          Name.starts_with("avx512.mask.pshufh.w."))) {
3231       Value *Op0 = CI->getArgOperand(0);
3232       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3233       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3234 
3235       SmallVector<int, 16> Idxs(NumElts);
3236       for (unsigned l = 0; l != NumElts; l += 8) {
3237         for (unsigned i = 0; i != 4; ++i)
3238           Idxs[i + l] = i + l;
3239         for (unsigned i = 0; i != 4; ++i)
3240           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3241       }
3242 
3243       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3244 
3245       if (CI->arg_size() == 4)
3246         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3247                             CI->getArgOperand(2));
3248     } else if (IsX86 && Name.starts_with("avx512.mask.shuf.p")) {
3249       Value *Op0 = CI->getArgOperand(0);
3250       Value *Op1 = CI->getArgOperand(1);
3251       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3252       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3253 
3254       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3255       unsigned HalfLaneElts = NumLaneElts / 2;
3256 
3257       SmallVector<int, 16> Idxs(NumElts);
3258       for (unsigned i = 0; i != NumElts; ++i) {
3259         // Base index is the starting element of the lane.
3260         Idxs[i] = i - (i % NumLaneElts);
3261         // If we are half way through the lane switch to the other source.
3262         if ((i % NumLaneElts) >= HalfLaneElts)
3263           Idxs[i] += NumElts;
3264         // Now select the specific element. By adding HalfLaneElts bits from
3265         // the immediate. Wrapping around the immediate every 8-bits.
3266         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3267       }
3268 
3269       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3270 
3271       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
3272                           CI->getArgOperand(3));
3273     } else if (IsX86 && (Name.starts_with("avx512.mask.movddup") ||
3274                          Name.starts_with("avx512.mask.movshdup") ||
3275                          Name.starts_with("avx512.mask.movsldup"))) {
3276       Value *Op0 = CI->getArgOperand(0);
3277       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3278       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3279 
3280       unsigned Offset = 0;
3281       if (Name.starts_with("avx512.mask.movshdup."))
3282         Offset = 1;
3283 
3284       SmallVector<int, 16> Idxs(NumElts);
3285       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3286         for (unsigned i = 0; i != NumLaneElts; i += 2) {
3287           Idxs[i + l + 0] = i + l + Offset;
3288           Idxs[i + l + 1] = i + l + Offset;
3289         }
3290 
3291       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3292 
3293       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3294                           CI->getArgOperand(1));
3295     } else if (IsX86 && (Name.starts_with("avx512.mask.punpckl") ||
3296                          Name.starts_with("avx512.mask.unpckl."))) {
3297       Value *Op0 = CI->getArgOperand(0);
3298       Value *Op1 = CI->getArgOperand(1);
3299       int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3300       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3301 
3302       SmallVector<int, 64> Idxs(NumElts);
3303       for (int l = 0; l != NumElts; l += NumLaneElts)
3304         for (int i = 0; i != NumLaneElts; ++i)
3305           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3306 
3307       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3308 
3309       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3310                           CI->getArgOperand(2));
3311     } else if (IsX86 && (Name.starts_with("avx512.mask.punpckh") ||
3312                          Name.starts_with("avx512.mask.unpckh."))) {
3313       Value *Op0 = CI->getArgOperand(0);
3314       Value *Op1 = CI->getArgOperand(1);
3315       int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3316       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3317 
3318       SmallVector<int, 64> Idxs(NumElts);
3319       for (int l = 0; l != NumElts; l += NumLaneElts)
3320         for (int i = 0; i != NumLaneElts; ++i)
3321           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3322 
3323       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3324 
3325       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3326                           CI->getArgOperand(2));
3327     } else if (IsX86 && (Name.starts_with("avx512.mask.and.") ||
3328                          Name.starts_with("avx512.mask.pand."))) {
3329       VectorType *FTy = cast<VectorType>(CI->getType());
3330       VectorType *ITy = VectorType::getInteger(FTy);
3331       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3332                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3333       Rep = Builder.CreateBitCast(Rep, FTy);
3334       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3335                           CI->getArgOperand(2));
3336     } else if (IsX86 && (Name.starts_with("avx512.mask.andn.") ||
3337                          Name.starts_with("avx512.mask.pandn."))) {
3338       VectorType *FTy = cast<VectorType>(CI->getType());
3339       VectorType *ITy = VectorType::getInteger(FTy);
3340       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3341       Rep = Builder.CreateAnd(Rep,
3342                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3343       Rep = Builder.CreateBitCast(Rep, FTy);
3344       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3345                           CI->getArgOperand(2));
3346     } else if (IsX86 && (Name.starts_with("avx512.mask.or.") ||
3347                          Name.starts_with("avx512.mask.por."))) {
3348       VectorType *FTy = cast<VectorType>(CI->getType());
3349       VectorType *ITy = VectorType::getInteger(FTy);
3350       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3351                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3352       Rep = Builder.CreateBitCast(Rep, FTy);
3353       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3354                           CI->getArgOperand(2));
3355     } else if (IsX86 && (Name.starts_with("avx512.mask.xor.") ||
3356                          Name.starts_with("avx512.mask.pxor."))) {
3357       VectorType *FTy = cast<VectorType>(CI->getType());
3358       VectorType *ITy = VectorType::getInteger(FTy);
3359       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3360                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3361       Rep = Builder.CreateBitCast(Rep, FTy);
3362       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3363                           CI->getArgOperand(2));
3364     } else if (IsX86 && Name.starts_with("avx512.mask.padd.")) {
3365       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3366       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3367                           CI->getArgOperand(2));
3368     } else if (IsX86 && Name.starts_with("avx512.mask.psub.")) {
3369       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3370       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3371                           CI->getArgOperand(2));
3372     } else if (IsX86 && Name.starts_with("avx512.mask.pmull.")) {
3373       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3374       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3375                           CI->getArgOperand(2));
3376     } else if (IsX86 && Name.starts_with("avx512.mask.add.p")) {
3377       if (Name.ends_with(".512")) {
3378         Intrinsic::ID IID;
3379         if (Name[17] == 's')
3380           IID = Intrinsic::x86_avx512_add_ps_512;
3381         else
3382           IID = Intrinsic::x86_avx512_add_pd_512;
3383 
3384         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3385                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3386                                    CI->getArgOperand(4) });
3387       } else {
3388         Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3389       }
3390       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3391                           CI->getArgOperand(2));
3392     } else if (IsX86 && Name.starts_with("avx512.mask.div.p")) {
3393       if (Name.ends_with(".512")) {
3394         Intrinsic::ID IID;
3395         if (Name[17] == 's')
3396           IID = Intrinsic::x86_avx512_div_ps_512;
3397         else
3398           IID = Intrinsic::x86_avx512_div_pd_512;
3399 
3400         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3401                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3402                                    CI->getArgOperand(4) });
3403       } else {
3404         Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3405       }
3406       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3407                           CI->getArgOperand(2));
3408     } else if (IsX86 && Name.starts_with("avx512.mask.mul.p")) {
3409       if (Name.ends_with(".512")) {
3410         Intrinsic::ID IID;
3411         if (Name[17] == 's')
3412           IID = Intrinsic::x86_avx512_mul_ps_512;
3413         else
3414           IID = Intrinsic::x86_avx512_mul_pd_512;
3415 
3416         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3417                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3418                                    CI->getArgOperand(4) });
3419       } else {
3420         Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3421       }
3422       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3423                           CI->getArgOperand(2));
3424     } else if (IsX86 && Name.starts_with("avx512.mask.sub.p")) {
3425       if (Name.ends_with(".512")) {
3426         Intrinsic::ID IID;
3427         if (Name[17] == 's')
3428           IID = Intrinsic::x86_avx512_sub_ps_512;
3429         else
3430           IID = Intrinsic::x86_avx512_sub_pd_512;
3431 
3432         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3433                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3434                                    CI->getArgOperand(4) });
3435       } else {
3436         Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3437       }
3438       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3439                           CI->getArgOperand(2));
3440     } else if (IsX86 && (Name.starts_with("avx512.mask.max.p") ||
3441                          Name.starts_with("avx512.mask.min.p")) &&
3442                Name.drop_front(18) == ".512") {
3443       bool IsDouble = Name[17] == 'd';
3444       bool IsMin = Name[13] == 'i';
3445       static const Intrinsic::ID MinMaxTbl[2][2] = {
3446         { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
3447         { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
3448       };
3449       Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3450 
3451       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3452                                { CI->getArgOperand(0), CI->getArgOperand(1),
3453                                  CI->getArgOperand(4) });
3454       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3455                           CI->getArgOperand(2));
3456     } else if (IsX86 && Name.starts_with("avx512.mask.lzcnt.")) {
3457       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
3458                                                          Intrinsic::ctlz,
3459                                                          CI->getType()),
3460                                { CI->getArgOperand(0), Builder.getInt1(false) });
3461       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3462                           CI->getArgOperand(1));
3463     } else if (IsX86 && Name.starts_with("avx512.mask.psll")) {
3464       bool IsImmediate = Name[16] == 'i' ||
3465                          (Name.size() > 18 && Name[18] == 'i');
3466       bool IsVariable = Name[16] == 'v';
3467       char Size = Name[16] == '.' ? Name[17] :
3468                   Name[17] == '.' ? Name[18] :
3469                   Name[18] == '.' ? Name[19] :
3470                                     Name[20];
3471 
3472       Intrinsic::ID IID;
3473       if (IsVariable && Name[17] != '.') {
3474         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3475           IID = Intrinsic::x86_avx2_psllv_q;
3476         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3477           IID = Intrinsic::x86_avx2_psllv_q_256;
3478         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3479           IID = Intrinsic::x86_avx2_psllv_d;
3480         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3481           IID = Intrinsic::x86_avx2_psllv_d_256;
3482         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3483           IID = Intrinsic::x86_avx512_psllv_w_128;
3484         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3485           IID = Intrinsic::x86_avx512_psllv_w_256;
3486         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3487           IID = Intrinsic::x86_avx512_psllv_w_512;
3488         else
3489           llvm_unreachable("Unexpected size");
3490       } else if (Name.ends_with(".128")) {
3491         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3492           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3493                             : Intrinsic::x86_sse2_psll_d;
3494         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3495           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3496                             : Intrinsic::x86_sse2_psll_q;
3497         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3498           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3499                             : Intrinsic::x86_sse2_psll_w;
3500         else
3501           llvm_unreachable("Unexpected size");
3502       } else if (Name.ends_with(".256")) {
3503         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3504           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3505                             : Intrinsic::x86_avx2_psll_d;
3506         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3507           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3508                             : Intrinsic::x86_avx2_psll_q;
3509         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3510           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3511                             : Intrinsic::x86_avx2_psll_w;
3512         else
3513           llvm_unreachable("Unexpected size");
3514       } else {
3515         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3516           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3517                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
3518                               Intrinsic::x86_avx512_psll_d_512;
3519         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3520           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3521                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
3522                               Intrinsic::x86_avx512_psll_q_512;
3523         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3524           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3525                             : Intrinsic::x86_avx512_psll_w_512;
3526         else
3527           llvm_unreachable("Unexpected size");
3528       }
3529 
3530       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3531     } else if (IsX86 && Name.starts_with("avx512.mask.psrl")) {
3532       bool IsImmediate = Name[16] == 'i' ||
3533                          (Name.size() > 18 && Name[18] == 'i');
3534       bool IsVariable = Name[16] == 'v';
3535       char Size = Name[16] == '.' ? Name[17] :
3536                   Name[17] == '.' ? Name[18] :
3537                   Name[18] == '.' ? Name[19] :
3538                                     Name[20];
3539 
3540       Intrinsic::ID IID;
3541       if (IsVariable && Name[17] != '.') {
3542         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3543           IID = Intrinsic::x86_avx2_psrlv_q;
3544         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3545           IID = Intrinsic::x86_avx2_psrlv_q_256;
3546         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3547           IID = Intrinsic::x86_avx2_psrlv_d;
3548         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3549           IID = Intrinsic::x86_avx2_psrlv_d_256;
3550         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3551           IID = Intrinsic::x86_avx512_psrlv_w_128;
3552         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3553           IID = Intrinsic::x86_avx512_psrlv_w_256;
3554         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3555           IID = Intrinsic::x86_avx512_psrlv_w_512;
3556         else
3557           llvm_unreachable("Unexpected size");
3558       } else if (Name.ends_with(".128")) {
3559         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3560           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3561                             : Intrinsic::x86_sse2_psrl_d;
3562         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3563           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3564                             : Intrinsic::x86_sse2_psrl_q;
3565         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3566           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3567                             : Intrinsic::x86_sse2_psrl_w;
3568         else
3569           llvm_unreachable("Unexpected size");
3570       } else if (Name.ends_with(".256")) {
3571         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3572           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3573                             : Intrinsic::x86_avx2_psrl_d;
3574         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3575           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3576                             : Intrinsic::x86_avx2_psrl_q;
3577         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3578           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3579                             : Intrinsic::x86_avx2_psrl_w;
3580         else
3581           llvm_unreachable("Unexpected size");
3582       } else {
3583         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3584           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3585                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
3586                               Intrinsic::x86_avx512_psrl_d_512;
3587         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3588           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3589                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
3590                               Intrinsic::x86_avx512_psrl_q_512;
3591         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3592           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3593                             : Intrinsic::x86_avx512_psrl_w_512;
3594         else
3595           llvm_unreachable("Unexpected size");
3596       }
3597 
3598       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3599     } else if (IsX86 && Name.starts_with("avx512.mask.psra")) {
3600       bool IsImmediate = Name[16] == 'i' ||
3601                          (Name.size() > 18 && Name[18] == 'i');
3602       bool IsVariable = Name[16] == 'v';
3603       char Size = Name[16] == '.' ? Name[17] :
3604                   Name[17] == '.' ? Name[18] :
3605                   Name[18] == '.' ? Name[19] :
3606                                     Name[20];
3607 
3608       Intrinsic::ID IID;
3609       if (IsVariable && Name[17] != '.') {
3610         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3611           IID = Intrinsic::x86_avx2_psrav_d;
3612         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3613           IID = Intrinsic::x86_avx2_psrav_d_256;
3614         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3615           IID = Intrinsic::x86_avx512_psrav_w_128;
3616         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3617           IID = Intrinsic::x86_avx512_psrav_w_256;
3618         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3619           IID = Intrinsic::x86_avx512_psrav_w_512;
3620         else
3621           llvm_unreachable("Unexpected size");
3622       } else if (Name.ends_with(".128")) {
3623         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3624           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3625                             : Intrinsic::x86_sse2_psra_d;
3626         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3627           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3628                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
3629                               Intrinsic::x86_avx512_psra_q_128;
3630         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3631           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3632                             : Intrinsic::x86_sse2_psra_w;
3633         else
3634           llvm_unreachable("Unexpected size");
3635       } else if (Name.ends_with(".256")) {
3636         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3637           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3638                             : Intrinsic::x86_avx2_psra_d;
3639         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3640           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3641                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
3642                               Intrinsic::x86_avx512_psra_q_256;
3643         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3644           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3645                             : Intrinsic::x86_avx2_psra_w;
3646         else
3647           llvm_unreachable("Unexpected size");
3648       } else {
3649         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3650           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3651                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
3652                               Intrinsic::x86_avx512_psra_d_512;
3653         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3654           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3655                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
3656                               Intrinsic::x86_avx512_psra_q_512;
3657         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3658           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3659                             : Intrinsic::x86_avx512_psra_w_512;
3660         else
3661           llvm_unreachable("Unexpected size");
3662       }
3663 
3664       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3665     } else if (IsX86 && Name.starts_with("avx512.mask.move.s")) {
3666       Rep = upgradeMaskedMove(Builder, *CI);
3667     } else if (IsX86 && Name.starts_with("avx512.cvtmask2")) {
3668       Rep = UpgradeMaskToInt(Builder, *CI);
3669     } else if (IsX86 && Name.ends_with(".movntdqa")) {
3670       MDNode *Node = MDNode::get(
3671           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3672 
3673       Value *Ptr = CI->getArgOperand(0);
3674 
3675       // Convert the type of the pointer to a pointer to the stored type.
3676       Value *BC = Builder.CreateBitCast(
3677           Ptr, PointerType::getUnqual(CI->getType()), "cast");
3678       LoadInst *LI = Builder.CreateAlignedLoad(
3679           CI->getType(), BC,
3680           Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
3681       LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3682       Rep = LI;
3683     } else if (IsX86 && (Name.starts_with("fma.vfmadd.") ||
3684                          Name.starts_with("fma.vfmsub.") ||
3685                          Name.starts_with("fma.vfnmadd.") ||
3686                          Name.starts_with("fma.vfnmsub."))) {
3687       bool NegMul = Name[6] == 'n';
3688       bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3689       bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3690 
3691       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3692                        CI->getArgOperand(2) };
3693 
3694       if (IsScalar) {
3695         Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3696         Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3697         Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3698       }
3699 
3700       if (NegMul && !IsScalar)
3701         Ops[0] = Builder.CreateFNeg(Ops[0]);
3702       if (NegMul && IsScalar)
3703         Ops[1] = Builder.CreateFNeg(Ops[1]);
3704       if (NegAcc)
3705         Ops[2] = Builder.CreateFNeg(Ops[2]);
3706 
3707       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3708                                                          Intrinsic::fma,
3709                                                          Ops[0]->getType()),
3710                                Ops);
3711 
3712       if (IsScalar)
3713         Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3714                                           (uint64_t)0);
3715     } else if (IsX86 && Name.starts_with("fma4.vfmadd.s")) {
3716       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3717                        CI->getArgOperand(2) };
3718 
3719       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3720       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3721       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3722 
3723       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3724                                                          Intrinsic::fma,
3725                                                          Ops[0]->getType()),
3726                                Ops);
3727 
3728       Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3729                                         Rep, (uint64_t)0);
3730     } else if (IsX86 && (Name.starts_with("avx512.mask.vfmadd.s") ||
3731                          Name.starts_with("avx512.maskz.vfmadd.s") ||
3732                          Name.starts_with("avx512.mask3.vfmadd.s") ||
3733                          Name.starts_with("avx512.mask3.vfmsub.s") ||
3734                          Name.starts_with("avx512.mask3.vfnmsub.s"))) {
3735       bool IsMask3 = Name[11] == '3';
3736       bool IsMaskZ = Name[11] == 'z';
3737       // Drop the "avx512.mask." to make it easier.
3738       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3739       bool NegMul = Name[2] == 'n';
3740       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3741 
3742       Value *A = CI->getArgOperand(0);
3743       Value *B = CI->getArgOperand(1);
3744       Value *C = CI->getArgOperand(2);
3745 
3746       if (NegMul && (IsMask3 || IsMaskZ))
3747         A = Builder.CreateFNeg(A);
3748       if (NegMul && !(IsMask3 || IsMaskZ))
3749         B = Builder.CreateFNeg(B);
3750       if (NegAcc)
3751         C = Builder.CreateFNeg(C);
3752 
3753       A = Builder.CreateExtractElement(A, (uint64_t)0);
3754       B = Builder.CreateExtractElement(B, (uint64_t)0);
3755       C = Builder.CreateExtractElement(C, (uint64_t)0);
3756 
3757       if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3758           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3759         Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3760 
3761         Intrinsic::ID IID;
3762         if (Name.back() == 'd')
3763           IID = Intrinsic::x86_avx512_vfmadd_f64;
3764         else
3765           IID = Intrinsic::x86_avx512_vfmadd_f32;
3766         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3767         Rep = Builder.CreateCall(FMA, Ops);
3768       } else {
3769         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3770                                                   Intrinsic::fma,
3771                                                   A->getType());
3772         Rep = Builder.CreateCall(FMA, { A, B, C });
3773       }
3774 
3775       Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3776                         IsMask3 ? C : A;
3777 
3778       // For Mask3 with NegAcc, we need to create a new extractelement that
3779       // avoids the negation above.
3780       if (NegAcc && IsMask3)
3781         PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3782                                                 (uint64_t)0);
3783 
3784       Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3785                                 Rep, PassThru);
3786       Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3787                                         Rep, (uint64_t)0);
3788     } else if (IsX86 && (Name.starts_with("avx512.mask.vfmadd.p") ||
3789                          Name.starts_with("avx512.mask.vfnmadd.p") ||
3790                          Name.starts_with("avx512.mask.vfnmsub.p") ||
3791                          Name.starts_with("avx512.mask3.vfmadd.p") ||
3792                          Name.starts_with("avx512.mask3.vfmsub.p") ||
3793                          Name.starts_with("avx512.mask3.vfnmsub.p") ||
3794                          Name.starts_with("avx512.maskz.vfmadd.p"))) {
3795       bool IsMask3 = Name[11] == '3';
3796       bool IsMaskZ = Name[11] == 'z';
3797       // Drop the "avx512.mask." to make it easier.
3798       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3799       bool NegMul = Name[2] == 'n';
3800       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3801 
3802       Value *A = CI->getArgOperand(0);
3803       Value *B = CI->getArgOperand(1);
3804       Value *C = CI->getArgOperand(2);
3805 
3806       if (NegMul && (IsMask3 || IsMaskZ))
3807         A = Builder.CreateFNeg(A);
3808       if (NegMul && !(IsMask3 || IsMaskZ))
3809         B = Builder.CreateFNeg(B);
3810       if (NegAcc)
3811         C = Builder.CreateFNeg(C);
3812 
3813       if (CI->arg_size() == 5 &&
3814           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3815            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3816         Intrinsic::ID IID;
3817         // Check the character before ".512" in string.
3818         if (Name[Name.size()-5] == 's')
3819           IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3820         else
3821           IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3822 
3823         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3824                                  { A, B, C, CI->getArgOperand(4) });
3825       } else {
3826         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3827                                                   Intrinsic::fma,
3828                                                   A->getType());
3829         Rep = Builder.CreateCall(FMA, { A, B, C });
3830       }
3831 
3832       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3833                         IsMask3 ? CI->getArgOperand(2) :
3834                                   CI->getArgOperand(0);
3835 
3836       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3837     } else if (IsX86 &&  Name.starts_with("fma.vfmsubadd.p")) {
3838       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3839       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3840       Intrinsic::ID IID;
3841       if (VecWidth == 128 && EltWidth == 32)
3842         IID = Intrinsic::x86_fma_vfmaddsub_ps;
3843       else if (VecWidth == 256 && EltWidth == 32)
3844         IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3845       else if (VecWidth == 128 && EltWidth == 64)
3846         IID = Intrinsic::x86_fma_vfmaddsub_pd;
3847       else if (VecWidth == 256 && EltWidth == 64)
3848         IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3849       else
3850         llvm_unreachable("Unexpected intrinsic");
3851 
3852       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3853                        CI->getArgOperand(2) };
3854       Ops[2] = Builder.CreateFNeg(Ops[2]);
3855       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3856                                Ops);
3857     } else if (IsX86 && (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3858                          Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3859                          Name.starts_with("avx512.maskz.vfmaddsub.p") ||
3860                          Name.starts_with("avx512.mask3.vfmsubadd.p"))) {
3861       bool IsMask3 = Name[11] == '3';
3862       bool IsMaskZ = Name[11] == 'z';
3863       // Drop the "avx512.mask." to make it easier.
3864       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3865       bool IsSubAdd = Name[3] == 's';
3866       if (CI->arg_size() == 5) {
3867         Intrinsic::ID IID;
3868         // Check the character before ".512" in string.
3869         if (Name[Name.size()-5] == 's')
3870           IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3871         else
3872           IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3873 
3874         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3875                          CI->getArgOperand(2), CI->getArgOperand(4) };
3876         if (IsSubAdd)
3877           Ops[2] = Builder.CreateFNeg(Ops[2]);
3878 
3879         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3880                                  Ops);
3881       } else {
3882         int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3883 
3884         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3885                          CI->getArgOperand(2) };
3886 
3887         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3888                                                   Ops[0]->getType());
3889         Value *Odd = Builder.CreateCall(FMA, Ops);
3890         Ops[2] = Builder.CreateFNeg(Ops[2]);
3891         Value *Even = Builder.CreateCall(FMA, Ops);
3892 
3893         if (IsSubAdd)
3894           std::swap(Even, Odd);
3895 
3896         SmallVector<int, 32> Idxs(NumElts);
3897         for (int i = 0; i != NumElts; ++i)
3898           Idxs[i] = i + (i % 2) * NumElts;
3899 
3900         Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3901       }
3902 
3903       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3904                         IsMask3 ? CI->getArgOperand(2) :
3905                                   CI->getArgOperand(0);
3906 
3907       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3908     } else if (IsX86 && (Name.starts_with("avx512.mask.pternlog.") ||
3909                          Name.starts_with("avx512.maskz.pternlog."))) {
3910       bool ZeroMask = Name[11] == 'z';
3911       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3912       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3913       Intrinsic::ID IID;
3914       if (VecWidth == 128 && EltWidth == 32)
3915         IID = Intrinsic::x86_avx512_pternlog_d_128;
3916       else if (VecWidth == 256 && EltWidth == 32)
3917         IID = Intrinsic::x86_avx512_pternlog_d_256;
3918       else if (VecWidth == 512 && EltWidth == 32)
3919         IID = Intrinsic::x86_avx512_pternlog_d_512;
3920       else if (VecWidth == 128 && EltWidth == 64)
3921         IID = Intrinsic::x86_avx512_pternlog_q_128;
3922       else if (VecWidth == 256 && EltWidth == 64)
3923         IID = Intrinsic::x86_avx512_pternlog_q_256;
3924       else if (VecWidth == 512 && EltWidth == 64)
3925         IID = Intrinsic::x86_avx512_pternlog_q_512;
3926       else
3927         llvm_unreachable("Unexpected intrinsic");
3928 
3929       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3930                         CI->getArgOperand(2), CI->getArgOperand(3) };
3931       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3932                                Args);
3933       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3934                                  : CI->getArgOperand(0);
3935       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3936     } else if (IsX86 && (Name.starts_with("avx512.mask.vpmadd52") ||
3937                          Name.starts_with("avx512.maskz.vpmadd52"))) {
3938       bool ZeroMask = Name[11] == 'z';
3939       bool High = Name[20] == 'h' || Name[21] == 'h';
3940       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3941       Intrinsic::ID IID;
3942       if (VecWidth == 128 && !High)
3943         IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3944       else if (VecWidth == 256 && !High)
3945         IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3946       else if (VecWidth == 512 && !High)
3947         IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3948       else if (VecWidth == 128 && High)
3949         IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3950       else if (VecWidth == 256 && High)
3951         IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3952       else if (VecWidth == 512 && High)
3953         IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3954       else
3955         llvm_unreachable("Unexpected intrinsic");
3956 
3957       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3958                         CI->getArgOperand(2) };
3959       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3960                                Args);
3961       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3962                                  : CI->getArgOperand(0);
3963       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3964     } else if (IsX86 && (Name.starts_with("avx512.mask.vpermi2var.") ||
3965                          Name.starts_with("avx512.mask.vpermt2var.") ||
3966                          Name.starts_with("avx512.maskz.vpermt2var."))) {
3967       bool ZeroMask = Name[11] == 'z';
3968       bool IndexForm = Name[17] == 'i';
3969       Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3970     } else if (IsX86 && (Name.starts_with("avx512.mask.vpdpbusd.") ||
3971                          Name.starts_with("avx512.maskz.vpdpbusd.") ||
3972                          Name.starts_with("avx512.mask.vpdpbusds.") ||
3973                          Name.starts_with("avx512.maskz.vpdpbusds."))) {
3974       bool ZeroMask = Name[11] == 'z';
3975       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3976       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3977       Intrinsic::ID IID;
3978       if (VecWidth == 128 && !IsSaturating)
3979         IID = Intrinsic::x86_avx512_vpdpbusd_128;
3980       else if (VecWidth == 256 && !IsSaturating)
3981         IID = Intrinsic::x86_avx512_vpdpbusd_256;
3982       else if (VecWidth == 512 && !IsSaturating)
3983         IID = Intrinsic::x86_avx512_vpdpbusd_512;
3984       else if (VecWidth == 128 && IsSaturating)
3985         IID = Intrinsic::x86_avx512_vpdpbusds_128;
3986       else if (VecWidth == 256 && IsSaturating)
3987         IID = Intrinsic::x86_avx512_vpdpbusds_256;
3988       else if (VecWidth == 512 && IsSaturating)
3989         IID = Intrinsic::x86_avx512_vpdpbusds_512;
3990       else
3991         llvm_unreachable("Unexpected intrinsic");
3992 
3993       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3994                         CI->getArgOperand(2)  };
3995       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3996                                Args);
3997       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3998                                  : CI->getArgOperand(0);
3999       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4000     } else if (IsX86 && (Name.starts_with("avx512.mask.vpdpwssd.") ||
4001                          Name.starts_with("avx512.maskz.vpdpwssd.") ||
4002                          Name.starts_with("avx512.mask.vpdpwssds.") ||
4003                          Name.starts_with("avx512.maskz.vpdpwssds."))) {
4004       bool ZeroMask = Name[11] == 'z';
4005       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4006       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4007       Intrinsic::ID IID;
4008       if (VecWidth == 128 && !IsSaturating)
4009         IID = Intrinsic::x86_avx512_vpdpwssd_128;
4010       else if (VecWidth == 256 && !IsSaturating)
4011         IID = Intrinsic::x86_avx512_vpdpwssd_256;
4012       else if (VecWidth == 512 && !IsSaturating)
4013         IID = Intrinsic::x86_avx512_vpdpwssd_512;
4014       else if (VecWidth == 128 && IsSaturating)
4015         IID = Intrinsic::x86_avx512_vpdpwssds_128;
4016       else if (VecWidth == 256 && IsSaturating)
4017         IID = Intrinsic::x86_avx512_vpdpwssds_256;
4018       else if (VecWidth == 512 && IsSaturating)
4019         IID = Intrinsic::x86_avx512_vpdpwssds_512;
4020       else
4021         llvm_unreachable("Unexpected intrinsic");
4022 
4023       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4024                         CI->getArgOperand(2)  };
4025       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4026                                Args);
4027       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4028                                  : CI->getArgOperand(0);
4029       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4030     } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4031                          Name == "addcarry.u32" || Name == "addcarry.u64" ||
4032                          Name == "subborrow.u32" || Name == "subborrow.u64")) {
4033       Intrinsic::ID IID;
4034       if (Name[0] == 'a' && Name.back() == '2')
4035         IID = Intrinsic::x86_addcarry_32;
4036       else if (Name[0] == 'a' && Name.back() == '4')
4037         IID = Intrinsic::x86_addcarry_64;
4038       else if (Name[0] == 's' && Name.back() == '2')
4039         IID = Intrinsic::x86_subborrow_32;
4040       else if (Name[0] == 's' && Name.back() == '4')
4041         IID = Intrinsic::x86_subborrow_64;
4042       else
4043         llvm_unreachable("Unexpected intrinsic");
4044 
4045       // Make a call with 3 operands.
4046       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4047                         CI->getArgOperand(2)};
4048       Value *NewCall = Builder.CreateCall(
4049                                 Intrinsic::getDeclaration(CI->getModule(), IID),
4050                                 Args);
4051 
4052       // Extract the second result and store it.
4053       Value *Data = Builder.CreateExtractValue(NewCall, 1);
4054       // Cast the pointer to the right type.
4055       Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
4056                                  llvm::PointerType::getUnqual(Data->getType()));
4057       Builder.CreateAlignedStore(Data, Ptr, Align(1));
4058       // Replace the original call result with the first result of the new call.
4059       Value *CF = Builder.CreateExtractValue(NewCall, 0);
4060 
4061       CI->replaceAllUsesWith(CF);
4062       Rep = nullptr;
4063     } else if (IsX86 && Name.starts_with("avx512.mask.") &&
4064                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4065       // Rep will be updated by the call in the condition.
4066     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
4067       Value *Arg = CI->getArgOperand(0);
4068       Value *Neg = Builder.CreateNeg(Arg, "neg");
4069       Value *Cmp = Builder.CreateICmpSGE(
4070           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
4071       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
4072     } else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") ||
4073                           Name.starts_with("atomic.load.add.f64.p"))) {
4074       Value *Ptr = CI->getArgOperand(0);
4075       Value *Val = CI->getArgOperand(1);
4076       Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
4077                                     AtomicOrdering::SequentiallyConsistent);
4078     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
4079                           Name == "max.ui" || Name == "max.ull")) {
4080       Value *Arg0 = CI->getArgOperand(0);
4081       Value *Arg1 = CI->getArgOperand(1);
4082       Value *Cmp = Name.ends_with(".ui") || Name.ends_with(".ull")
4083                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
4084                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
4085       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
4086     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
4087                           Name == "min.ui" || Name == "min.ull")) {
4088       Value *Arg0 = CI->getArgOperand(0);
4089       Value *Arg1 = CI->getArgOperand(1);
4090       Value *Cmp = Name.ends_with(".ui") || Name.ends_with(".ull")
4091                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
4092                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
4093       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
4094     } else if (IsNVVM && Name == "clz.ll") {
4095       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
4096       Value *Arg = CI->getArgOperand(0);
4097       Value *Ctlz = Builder.CreateCall(
4098           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
4099                                     {Arg->getType()}),
4100           {Arg, Builder.getFalse()}, "ctlz");
4101       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
4102     } else if (IsNVVM && Name == "popc.ll") {
4103       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
4104       // i64.
4105       Value *Arg = CI->getArgOperand(0);
4106       Value *Popc = Builder.CreateCall(
4107           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
4108                                     {Arg->getType()}),
4109           Arg, "ctpop");
4110       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
4111     } else if (IsNVVM) {
4112       if (Name == "h2f") {
4113         Rep =
4114             Builder.CreateCall(Intrinsic::getDeclaration(
4115                                    F->getParent(), Intrinsic::convert_from_fp16,
4116                                    {Builder.getFloatTy()}),
4117                                CI->getArgOperand(0), "h2f");
4118       } else {
4119         Intrinsic::ID IID = ShouldUpgradeNVPTXBF16Intrinsic(Name);
4120         if (IID != Intrinsic::not_intrinsic &&
4121             !F->getReturnType()->getScalarType()->isBFloatTy()) {
4122           rename(F);
4123           NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
4124           SmallVector<Value *, 2> Args;
4125           for (size_t I = 0; I < NewFn->arg_size(); ++I) {
4126             Value *Arg = CI->getArgOperand(I);
4127             Type *OldType = Arg->getType();
4128             Type *NewType = NewFn->getArg(I)->getType();
4129             Args.push_back((OldType->isIntegerTy() &&
4130                             NewType->getScalarType()->isBFloatTy())
4131                                ? Builder.CreateBitCast(Arg, NewType)
4132                                : Arg);
4133           }
4134           Rep = Builder.CreateCall(NewFn, Args);
4135           if (F->getReturnType()->isIntegerTy())
4136             Rep = Builder.CreateBitCast(Rep, F->getReturnType());
4137         }
4138       }
4139     } else if (IsARM) {
4140       Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder);
4141     } else if (IsAMDGCN) {
4142       Rep = UpgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4143     } else {
4144       llvm_unreachable("Unknown function for CallBase upgrade.");
4145     }
4146 
4147     if (Rep)
4148       CI->replaceAllUsesWith(Rep);
4149     CI->eraseFromParent();
4150     return;
4151   }
4152 
4153   const auto &DefaultCase = [&]() -> void {
4154     if (CI->getFunctionType() == NewFn->getFunctionType()) {
4155       // Handle generic mangling change.
4156       assert(
4157           (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4158           "Unknown function for CallBase upgrade and isn't just a name change");
4159       CI->setCalledFunction(NewFn);
4160       return;
4161     }
4162 
4163     // This must be an upgrade from a named to a literal struct.
4164     if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4165       assert(OldST != NewFn->getReturnType() &&
4166              "Return type must have changed");
4167       assert(OldST->getNumElements() ==
4168                  cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4169              "Must have same number of elements");
4170 
4171       SmallVector<Value *> Args(CI->args());
4172       Value *NewCI = Builder.CreateCall(NewFn, Args);
4173       Value *Res = PoisonValue::get(OldST);
4174       for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4175         Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4176         Res = Builder.CreateInsertValue(Res, Elem, Idx);
4177       }
4178       CI->replaceAllUsesWith(Res);
4179       CI->eraseFromParent();
4180       return;
4181     }
4182 
4183     // We're probably about to produce something invalid. Let the verifier catch
4184     // it instead of dying here.
4185     CI->setCalledOperand(
4186         ConstantExpr::getPointerCast(NewFn, CI->getCalledOperand()->getType()));
4187     return;
4188   };
4189   CallInst *NewCall = nullptr;
4190   switch (NewFn->getIntrinsicID()) {
4191   default: {
4192     DefaultCase();
4193     return;
4194   }
4195   case Intrinsic::arm_neon_vst1:
4196   case Intrinsic::arm_neon_vst2:
4197   case Intrinsic::arm_neon_vst3:
4198   case Intrinsic::arm_neon_vst4:
4199   case Intrinsic::arm_neon_vst2lane:
4200   case Intrinsic::arm_neon_vst3lane:
4201   case Intrinsic::arm_neon_vst4lane: {
4202     SmallVector<Value *, 4> Args(CI->args());
4203     NewCall = Builder.CreateCall(NewFn, Args);
4204     break;
4205   }
4206   case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4207   case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4208   case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4209     LLVMContext &Ctx = F->getParent()->getContext();
4210     SmallVector<Value *, 4> Args(CI->args());
4211     Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4212                                cast<ConstantInt>(Args[3])->getZExtValue());
4213     NewCall = Builder.CreateCall(NewFn, Args);
4214     break;
4215   }
4216   case Intrinsic::aarch64_sve_ld3_sret:
4217   case Intrinsic::aarch64_sve_ld4_sret:
4218   case Intrinsic::aarch64_sve_ld2_sret: {
4219     StringRef Name = F->getName();
4220     Name = Name.substr(5);
4221     unsigned N = StringSwitch<unsigned>(Name)
4222                      .StartsWith("aarch64.sve.ld2", 2)
4223                      .StartsWith("aarch64.sve.ld3", 3)
4224                      .StartsWith("aarch64.sve.ld4", 4)
4225                      .Default(0);
4226     ScalableVectorType *RetTy =
4227         dyn_cast<ScalableVectorType>(F->getReturnType());
4228     unsigned MinElts = RetTy->getMinNumElements() / N;
4229     SmallVector<Value *, 2> Args(CI->args());
4230     Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4231     Value *Ret = llvm::PoisonValue::get(RetTy);
4232     for (unsigned I = 0; I < N; I++) {
4233       Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4234       Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4235       Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
4236     }
4237     NewCall = dyn_cast<CallInst>(Ret);
4238     break;
4239   }
4240 
4241   case Intrinsic::coro_end: {
4242     SmallVector<Value *, 3> Args(CI->args());
4243     Args.push_back(ConstantTokenNone::get(CI->getContext()));
4244     NewCall = Builder.CreateCall(NewFn, Args);
4245     break;
4246   }
4247 
4248   case Intrinsic::vector_extract: {
4249     StringRef Name = F->getName();
4250     Name = Name.substr(5); // Strip llvm
4251     if (!Name.starts_with("aarch64.sve.tuple.get")) {
4252       DefaultCase();
4253       return;
4254     }
4255     ScalableVectorType *RetTy =
4256         dyn_cast<ScalableVectorType>(F->getReturnType());
4257     unsigned MinElts = RetTy->getMinNumElements();
4258     unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4259     Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4260     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4261     break;
4262   }
4263 
4264   case Intrinsic::vector_insert: {
4265     StringRef Name = F->getName();
4266     Name = Name.substr(5);
4267     if (!Name.starts_with("aarch64.sve.tuple")) {
4268       DefaultCase();
4269       return;
4270     }
4271     if (Name.starts_with("aarch64.sve.tuple.set")) {
4272       unsigned I = dyn_cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4273       ScalableVectorType *Ty =
4274           dyn_cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4275       Value *NewIdx =
4276           ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4277       NewCall = Builder.CreateCall(
4278           NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4279       break;
4280     }
4281     if (Name.starts_with("aarch64.sve.tuple.create")) {
4282       unsigned N = StringSwitch<unsigned>(Name)
4283                        .StartsWith("aarch64.sve.tuple.create2", 2)
4284                        .StartsWith("aarch64.sve.tuple.create3", 3)
4285                        .StartsWith("aarch64.sve.tuple.create4", 4)
4286                        .Default(0);
4287       assert(N > 1 && "Create is expected to be between 2-4");
4288       ScalableVectorType *RetTy =
4289           dyn_cast<ScalableVectorType>(F->getReturnType());
4290       Value *Ret = llvm::PoisonValue::get(RetTy);
4291       unsigned MinElts = RetTy->getMinNumElements() / N;
4292       for (unsigned I = 0; I < N; I++) {
4293         Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4294         Value *V = CI->getArgOperand(I);
4295         Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4296       }
4297       NewCall = dyn_cast<CallInst>(Ret);
4298     }
4299     break;
4300   }
4301 
4302   case Intrinsic::arm_neon_bfdot:
4303   case Intrinsic::arm_neon_bfmmla:
4304   case Intrinsic::arm_neon_bfmlalb:
4305   case Intrinsic::arm_neon_bfmlalt:
4306   case Intrinsic::aarch64_neon_bfdot:
4307   case Intrinsic::aarch64_neon_bfmmla:
4308   case Intrinsic::aarch64_neon_bfmlalb:
4309   case Intrinsic::aarch64_neon_bfmlalt: {
4310     SmallVector<Value *, 3> Args;
4311     assert(CI->arg_size() == 3 &&
4312            "Mismatch between function args and call args");
4313     size_t OperandWidth =
4314         CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
4315     assert((OperandWidth == 64 || OperandWidth == 128) &&
4316            "Unexpected operand width");
4317     Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4318     auto Iter = CI->args().begin();
4319     Args.push_back(*Iter++);
4320     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4321     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4322     NewCall = Builder.CreateCall(NewFn, Args);
4323     break;
4324   }
4325 
4326   case Intrinsic::bitreverse:
4327     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4328     break;
4329 
4330   case Intrinsic::ctlz:
4331   case Intrinsic::cttz:
4332     assert(CI->arg_size() == 1 &&
4333            "Mismatch between function args and call args");
4334     NewCall =
4335         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4336     break;
4337 
4338   case Intrinsic::objectsize: {
4339     Value *NullIsUnknownSize =
4340         CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4341     Value *Dynamic =
4342         CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4343     NewCall = Builder.CreateCall(
4344         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4345     break;
4346   }
4347 
4348   case Intrinsic::ctpop:
4349     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4350     break;
4351 
4352   case Intrinsic::convert_from_fp16:
4353     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4354     break;
4355 
4356   case Intrinsic::dbg_value: {
4357     StringRef Name = F->getName();
4358     Name = Name.substr(5); // Strip llvm.
4359     // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4360     if (Name.starts_with("dbg.addr")) {
4361       DIExpression *Expr = cast<DIExpression>(
4362           cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4363       Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4364       NewCall =
4365           Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4366                                      MetadataAsValue::get(C, Expr)});
4367       break;
4368     }
4369 
4370     // Upgrade from the old version that had an extra offset argument.
4371     assert(CI->arg_size() == 4);
4372     // Drop nonzero offsets instead of attempting to upgrade them.
4373     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4374       if (Offset->isZeroValue()) {
4375         NewCall = Builder.CreateCall(
4376             NewFn,
4377             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4378         break;
4379       }
4380     CI->eraseFromParent();
4381     return;
4382   }
4383 
4384   case Intrinsic::ptr_annotation:
4385     // Upgrade from versions that lacked the annotation attribute argument.
4386     if (CI->arg_size() != 4) {
4387       DefaultCase();
4388       return;
4389     }
4390 
4391     // Create a new call with an added null annotation attribute argument.
4392     NewCall =
4393         Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4394                                    CI->getArgOperand(2), CI->getArgOperand(3),
4395                                    Constant::getNullValue(Builder.getPtrTy())});
4396     NewCall->takeName(CI);
4397     CI->replaceAllUsesWith(NewCall);
4398     CI->eraseFromParent();
4399     return;
4400 
4401   case Intrinsic::var_annotation:
4402     // Upgrade from versions that lacked the annotation attribute argument.
4403     if (CI->arg_size() != 4) {
4404       DefaultCase();
4405       return;
4406     }
4407     // Create a new call with an added null annotation attribute argument.
4408     NewCall =
4409         Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4410                                    CI->getArgOperand(2), CI->getArgOperand(3),
4411                                    Constant::getNullValue(Builder.getPtrTy())});
4412     NewCall->takeName(CI);
4413     CI->replaceAllUsesWith(NewCall);
4414     CI->eraseFromParent();
4415     return;
4416 
4417   case Intrinsic::riscv_aes32dsi:
4418   case Intrinsic::riscv_aes32dsmi:
4419   case Intrinsic::riscv_aes32esi:
4420   case Intrinsic::riscv_aes32esmi:
4421   case Intrinsic::riscv_sm4ks:
4422   case Intrinsic::riscv_sm4ed: {
4423     // The last argument to these intrinsics used to be i8 and changed to i32.
4424     // The type overload for sm4ks and sm4ed was removed.
4425     Value *Arg2 = CI->getArgOperand(2);
4426     if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4427       return;
4428 
4429     Value *Arg0 = CI->getArgOperand(0);
4430     Value *Arg1 = CI->getArgOperand(1);
4431     if (CI->getType()->isIntegerTy(64)) {
4432       Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4433       Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4434     }
4435 
4436     Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4437                             cast<ConstantInt>(Arg2)->getZExtValue());
4438 
4439     NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4440     Value *Res = NewCall;
4441     if (Res->getType() != CI->getType())
4442       Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4443     NewCall->takeName(CI);
4444     CI->replaceAllUsesWith(Res);
4445     CI->eraseFromParent();
4446     return;
4447   }
4448   case Intrinsic::riscv_sha256sig0:
4449   case Intrinsic::riscv_sha256sig1:
4450   case Intrinsic::riscv_sha256sum0:
4451   case Intrinsic::riscv_sha256sum1:
4452   case Intrinsic::riscv_sm3p0:
4453   case Intrinsic::riscv_sm3p1: {
4454     // The last argument to these intrinsics used to be i8 and changed to i32.
4455     // The type overload for sm4ks and sm4ed was removed.
4456     if (!CI->getType()->isIntegerTy(64))
4457       return;
4458 
4459     Value *Arg =
4460         Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4461 
4462     NewCall = Builder.CreateCall(NewFn, Arg);
4463     Value *Res =
4464         Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4465     NewCall->takeName(CI);
4466     CI->replaceAllUsesWith(Res);
4467     CI->eraseFromParent();
4468     return;
4469   }
4470 
4471   case Intrinsic::x86_xop_vfrcz_ss:
4472   case Intrinsic::x86_xop_vfrcz_sd:
4473     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4474     break;
4475 
4476   case Intrinsic::x86_xop_vpermil2pd:
4477   case Intrinsic::x86_xop_vpermil2ps:
4478   case Intrinsic::x86_xop_vpermil2pd_256:
4479   case Intrinsic::x86_xop_vpermil2ps_256: {
4480     SmallVector<Value *, 4> Args(CI->args());
4481     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4482     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4483     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4484     NewCall = Builder.CreateCall(NewFn, Args);
4485     break;
4486   }
4487 
4488   case Intrinsic::x86_sse41_ptestc:
4489   case Intrinsic::x86_sse41_ptestz:
4490   case Intrinsic::x86_sse41_ptestnzc: {
4491     // The arguments for these intrinsics used to be v4f32, and changed
4492     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4493     // So, the only thing required is a bitcast for both arguments.
4494     // First, check the arguments have the old type.
4495     Value *Arg0 = CI->getArgOperand(0);
4496     if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4497       return;
4498 
4499     // Old intrinsic, add bitcasts
4500     Value *Arg1 = CI->getArgOperand(1);
4501 
4502     auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4503 
4504     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4505     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4506 
4507     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4508     break;
4509   }
4510 
4511   case Intrinsic::x86_rdtscp: {
4512     // This used to take 1 arguments. If we have no arguments, it is already
4513     // upgraded.
4514     if (CI->getNumOperands() == 0)
4515       return;
4516 
4517     NewCall = Builder.CreateCall(NewFn);
4518     // Extract the second result and store it.
4519     Value *Data = Builder.CreateExtractValue(NewCall, 1);
4520     // Cast the pointer to the right type.
4521     Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
4522                                  llvm::PointerType::getUnqual(Data->getType()));
4523     Builder.CreateAlignedStore(Data, Ptr, Align(1));
4524     // Replace the original call result with the first result of the new call.
4525     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4526 
4527     NewCall->takeName(CI);
4528     CI->replaceAllUsesWith(TSC);
4529     CI->eraseFromParent();
4530     return;
4531   }
4532 
4533   case Intrinsic::x86_sse41_insertps:
4534   case Intrinsic::x86_sse41_dppd:
4535   case Intrinsic::x86_sse41_dpps:
4536   case Intrinsic::x86_sse41_mpsadbw:
4537   case Intrinsic::x86_avx_dp_ps_256:
4538   case Intrinsic::x86_avx2_mpsadbw: {
4539     // Need to truncate the last argument from i32 to i8 -- this argument models
4540     // an inherently 8-bit immediate operand to these x86 instructions.
4541     SmallVector<Value *, 4> Args(CI->args());
4542 
4543     // Replace the last argument with a trunc.
4544     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4545     NewCall = Builder.CreateCall(NewFn, Args);
4546     break;
4547   }
4548 
4549   case Intrinsic::x86_avx512_mask_cmp_pd_128:
4550   case Intrinsic::x86_avx512_mask_cmp_pd_256:
4551   case Intrinsic::x86_avx512_mask_cmp_pd_512:
4552   case Intrinsic::x86_avx512_mask_cmp_ps_128:
4553   case Intrinsic::x86_avx512_mask_cmp_ps_256:
4554   case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4555     SmallVector<Value *, 4> Args(CI->args());
4556     unsigned NumElts =
4557         cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4558     Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4559 
4560     NewCall = Builder.CreateCall(NewFn, Args);
4561     Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4562 
4563     NewCall->takeName(CI);
4564     CI->replaceAllUsesWith(Res);
4565     CI->eraseFromParent();
4566     return;
4567   }
4568 
4569   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4570   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4571   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4572   case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4573   case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4574   case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4575     SmallVector<Value *, 4> Args(CI->args());
4576     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4577     if (NewFn->getIntrinsicID() ==
4578         Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4579       Args[1] = Builder.CreateBitCast(
4580           Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4581 
4582     NewCall = Builder.CreateCall(NewFn, Args);
4583     Value *Res = Builder.CreateBitCast(
4584         NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4585 
4586     NewCall->takeName(CI);
4587     CI->replaceAllUsesWith(Res);
4588     CI->eraseFromParent();
4589     return;
4590   }
4591   case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4592   case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4593   case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4594     SmallVector<Value *, 4> Args(CI->args());
4595     unsigned NumElts =
4596         cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4597     Args[1] = Builder.CreateBitCast(
4598         Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4599     Args[2] = Builder.CreateBitCast(
4600         Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4601 
4602     NewCall = Builder.CreateCall(NewFn, Args);
4603     break;
4604   }
4605 
4606   case Intrinsic::thread_pointer: {
4607     NewCall = Builder.CreateCall(NewFn, {});
4608     break;
4609   }
4610 
4611   case Intrinsic::memcpy:
4612   case Intrinsic::memmove:
4613   case Intrinsic::memset: {
4614     // We have to make sure that the call signature is what we're expecting.
4615     // We only want to change the old signatures by removing the alignment arg:
4616     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4617     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4618     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4619     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
4620     // Note: i8*'s in the above can be any pointer type
4621     if (CI->arg_size() != 5) {
4622       DefaultCase();
4623       return;
4624     }
4625     // Remove alignment argument (3), and add alignment attributes to the
4626     // dest/src pointers.
4627     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4628                       CI->getArgOperand(2), CI->getArgOperand(4)};
4629     NewCall = Builder.CreateCall(NewFn, Args);
4630     AttributeList OldAttrs = CI->getAttributes();
4631     AttributeList NewAttrs = AttributeList::get(
4632         C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4633         {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4634          OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4635     NewCall->setAttributes(NewAttrs);
4636     auto *MemCI = cast<MemIntrinsic>(NewCall);
4637     // All mem intrinsics support dest alignment.
4638     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4639     MemCI->setDestAlignment(Align->getMaybeAlignValue());
4640     // Memcpy/Memmove also support source alignment.
4641     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4642       MTI->setSourceAlignment(Align->getMaybeAlignValue());
4643     break;
4644   }
4645   }
4646   assert(NewCall && "Should have either set this variable or returned through "
4647                     "the default case");
4648   NewCall->takeName(CI);
4649   CI->replaceAllUsesWith(NewCall);
4650   CI->eraseFromParent();
4651 }
4652 
4653 void llvm::UpgradeCallsToIntrinsic(Function *F) {
4654   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4655 
4656   // Check if this function should be upgraded and get the replacement function
4657   // if there is one.
4658   Function *NewFn;
4659   if (UpgradeIntrinsicFunction(F, NewFn)) {
4660     // Replace all users of the old function with the new function or new
4661     // instructions. This is not a range loop because the call is deleted.
4662     for (User *U : make_early_inc_range(F->users()))
4663       if (CallBase *CB = dyn_cast<CallBase>(U))
4664         UpgradeIntrinsicCall(CB, NewFn);
4665 
4666     // Remove old function, no longer used, from the module.
4667     F->eraseFromParent();
4668   }
4669 }
4670 
4671 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
4672   const unsigned NumOperands = MD.getNumOperands();
4673   if (NumOperands == 0)
4674     return &MD; // Invalid, punt to a verifier error.
4675 
4676   // Check if the tag uses struct-path aware TBAA format.
4677   if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
4678     return &MD;
4679 
4680   auto &Context = MD.getContext();
4681   if (NumOperands == 3) {
4682     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4683     MDNode *ScalarType = MDNode::get(Context, Elts);
4684     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4685     Metadata *Elts2[] = {ScalarType, ScalarType,
4686                          ConstantAsMetadata::get(
4687                              Constant::getNullValue(Type::getInt64Ty(Context))),
4688                          MD.getOperand(2)};
4689     return MDNode::get(Context, Elts2);
4690   }
4691   // Create a MDNode <MD, MD, offset 0>
4692   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
4693                                     Type::getInt64Ty(Context)))};
4694   return MDNode::get(Context, Elts);
4695 }
4696 
4697 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4698                                       Instruction *&Temp) {
4699   if (Opc != Instruction::BitCast)
4700     return nullptr;
4701 
4702   Temp = nullptr;
4703   Type *SrcTy = V->getType();
4704   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4705       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4706     LLVMContext &Context = V->getContext();
4707 
4708     // We have no information about target data layout, so we assume that
4709     // the maximum pointer size is 64bit.
4710     Type *MidTy = Type::getInt64Ty(Context);
4711     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4712 
4713     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4714   }
4715 
4716   return nullptr;
4717 }
4718 
4719 Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4720   if (Opc != Instruction::BitCast)
4721     return nullptr;
4722 
4723   Type *SrcTy = C->getType();
4724   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4725       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4726     LLVMContext &Context = C->getContext();
4727 
4728     // We have no information about target data layout, so we assume that
4729     // the maximum pointer size is 64bit.
4730     Type *MidTy = Type::getInt64Ty(Context);
4731 
4732     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
4733                                      DestTy);
4734   }
4735 
4736   return nullptr;
4737 }
4738 
4739 /// Check the debug info version number, if it is out-dated, drop the debug
4740 /// info. Return true if module is modified.
4741 bool llvm::UpgradeDebugInfo(Module &M) {
4742   if (DisableAutoUpgradeDebugInfo)
4743     return false;
4744 
4745   unsigned Version = getDebugMetadataVersionFromModule(M);
4746   if (Version == DEBUG_METADATA_VERSION) {
4747     bool BrokenDebugInfo = false;
4748     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4749       report_fatal_error("Broken module found, compilation aborted!");
4750     if (!BrokenDebugInfo)
4751       // Everything is ok.
4752       return false;
4753     else {
4754       // Diagnose malformed debug info.
4755       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
4756       M.getContext().diagnose(Diag);
4757     }
4758   }
4759   bool Modified = StripDebugInfo(M);
4760   if (Modified && Version != DEBUG_METADATA_VERSION) {
4761     // Diagnose a version mismatch.
4762     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4763     M.getContext().diagnose(DiagVersion);
4764   }
4765   return Modified;
4766 }
4767 
4768 /// This checks for objc retain release marker which should be upgraded. It
4769 /// returns true if module is modified.
4770 static bool UpgradeRetainReleaseMarker(Module &M) {
4771   bool Changed = false;
4772   const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4773   NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4774   if (ModRetainReleaseMarker) {
4775     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4776     if (Op) {
4777       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4778       if (ID) {
4779         SmallVector<StringRef, 4> ValueComp;
4780         ID->getString().split(ValueComp, "#");
4781         if (ValueComp.size() == 2) {
4782           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4783           ID = MDString::get(M.getContext(), NewValue);
4784         }
4785         M.addModuleFlag(Module::Error, MarkerKey, ID);
4786         M.eraseNamedMetadata(ModRetainReleaseMarker);
4787         Changed = true;
4788       }
4789     }
4790   }
4791   return Changed;
4792 }
4793 
4794 void llvm::UpgradeARCRuntime(Module &M) {
4795   // This lambda converts normal function calls to ARC runtime functions to
4796   // intrinsic calls.
4797   auto UpgradeToIntrinsic = [&](const char *OldFunc,
4798                                 llvm::Intrinsic::ID IntrinsicFunc) {
4799     Function *Fn = M.getFunction(OldFunc);
4800 
4801     if (!Fn)
4802       return;
4803 
4804     Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4805 
4806     for (User *U : make_early_inc_range(Fn->users())) {
4807       CallInst *CI = dyn_cast<CallInst>(U);
4808       if (!CI || CI->getCalledFunction() != Fn)
4809         continue;
4810 
4811       IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4812       FunctionType *NewFuncTy = NewFn->getFunctionType();
4813       SmallVector<Value *, 2> Args;
4814 
4815       // Don't upgrade the intrinsic if it's not valid to bitcast the return
4816       // value to the return type of the old function.
4817       if (NewFuncTy->getReturnType() != CI->getType() &&
4818           !CastInst::castIsValid(Instruction::BitCast, CI,
4819                                  NewFuncTy->getReturnType()))
4820         continue;
4821 
4822       bool InvalidCast = false;
4823 
4824       for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
4825         Value *Arg = CI->getArgOperand(I);
4826 
4827         // Bitcast argument to the parameter type of the new function if it's
4828         // not a variadic argument.
4829         if (I < NewFuncTy->getNumParams()) {
4830           // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4831           // to the parameter type of the new function.
4832           if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4833                                      NewFuncTy->getParamType(I))) {
4834             InvalidCast = true;
4835             break;
4836           }
4837           Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4838         }
4839         Args.push_back(Arg);
4840       }
4841 
4842       if (InvalidCast)
4843         continue;
4844 
4845       // Create a call instruction that calls the new function.
4846       CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4847       NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4848       NewCall->takeName(CI);
4849 
4850       // Bitcast the return value back to the type of the old call.
4851       Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4852 
4853       if (!CI->use_empty())
4854         CI->replaceAllUsesWith(NewRetVal);
4855       CI->eraseFromParent();
4856     }
4857 
4858     if (Fn->use_empty())
4859       Fn->eraseFromParent();
4860   };
4861 
4862   // Unconditionally convert a call to "clang.arc.use" to a call to
4863   // "llvm.objc.clang.arc.use".
4864   UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
4865 
4866   // Upgrade the retain release marker. If there is no need to upgrade
4867   // the marker, that means either the module is already new enough to contain
4868   // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
4869   if (!UpgradeRetainReleaseMarker(M))
4870     return;
4871 
4872   std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
4873       {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
4874       {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
4875       {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
4876       {"objc_autoreleaseReturnValue",
4877        llvm::Intrinsic::objc_autoreleaseReturnValue},
4878       {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
4879       {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
4880       {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
4881       {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
4882       {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
4883       {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
4884       {"objc_release", llvm::Intrinsic::objc_release},
4885       {"objc_retain", llvm::Intrinsic::objc_retain},
4886       {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
4887       {"objc_retainAutoreleaseReturnValue",
4888        llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
4889       {"objc_retainAutoreleasedReturnValue",
4890        llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
4891       {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
4892       {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
4893       {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
4894       {"objc_unsafeClaimAutoreleasedReturnValue",
4895        llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
4896       {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
4897       {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
4898       {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
4899       {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
4900       {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
4901       {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
4902       {"objc_arc_annotation_topdown_bbstart",
4903        llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
4904       {"objc_arc_annotation_topdown_bbend",
4905        llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
4906       {"objc_arc_annotation_bottomup_bbstart",
4907        llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
4908       {"objc_arc_annotation_bottomup_bbend",
4909        llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
4910 
4911   for (auto &I : RuntimeFuncs)
4912     UpgradeToIntrinsic(I.first, I.second);
4913 }
4914 
4915 bool llvm::UpgradeModuleFlags(Module &M) {
4916   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4917   if (!ModFlags)
4918     return false;
4919 
4920   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4921   bool HasSwiftVersionFlag = false;
4922   uint8_t SwiftMajorVersion, SwiftMinorVersion;
4923   uint32_t SwiftABIVersion;
4924   auto Int8Ty = Type::getInt8Ty(M.getContext());
4925   auto Int32Ty = Type::getInt32Ty(M.getContext());
4926 
4927   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4928     MDNode *Op = ModFlags->getOperand(I);
4929     if (Op->getNumOperands() != 3)
4930       continue;
4931     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4932     if (!ID)
4933       continue;
4934     auto SetBehavior = [&](Module::ModFlagBehavior B) {
4935       Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
4936                               Type::getInt32Ty(M.getContext()), B)),
4937                           MDString::get(M.getContext(), ID->getString()),
4938                           Op->getOperand(2)};
4939       ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4940       Changed = true;
4941     };
4942 
4943     if (ID->getString() == "Objective-C Image Info Version")
4944       HasObjCFlag = true;
4945     if (ID->getString() == "Objective-C Class Properties")
4946       HasClassProperties = true;
4947     // Upgrade PIC from Error/Max to Min.
4948     if (ID->getString() == "PIC Level") {
4949       if (auto *Behavior =
4950               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4951         uint64_t V = Behavior->getLimitedValue();
4952         if (V == Module::Error || V == Module::Max)
4953           SetBehavior(Module::Min);
4954       }
4955     }
4956     // Upgrade "PIE Level" from Error to Max.
4957     if (ID->getString() == "PIE Level")
4958       if (auto *Behavior =
4959               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
4960         if (Behavior->getLimitedValue() == Module::Error)
4961           SetBehavior(Module::Max);
4962 
4963     // Upgrade branch protection and return address signing module flags. The
4964     // module flag behavior for these fields were Error and now they are Min.
4965     if (ID->getString() == "branch-target-enforcement" ||
4966         ID->getString().starts_with("sign-return-address")) {
4967       if (auto *Behavior =
4968               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4969         if (Behavior->getLimitedValue() == Module::Error) {
4970           Type *Int32Ty = Type::getInt32Ty(M.getContext());
4971           Metadata *Ops[3] = {
4972               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
4973               Op->getOperand(1), Op->getOperand(2)};
4974           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4975           Changed = true;
4976         }
4977       }
4978     }
4979 
4980     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
4981     // section name so that llvm-lto will not complain about mismatching
4982     // module flags that is functionally the same.
4983     if (ID->getString() == "Objective-C Image Info Section") {
4984       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
4985         SmallVector<StringRef, 4> ValueComp;
4986         Value->getString().split(ValueComp, " ");
4987         if (ValueComp.size() != 1) {
4988           std::string NewValue;
4989           for (auto &S : ValueComp)
4990             NewValue += S.str();
4991           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
4992                               MDString::get(M.getContext(), NewValue)};
4993           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4994           Changed = true;
4995         }
4996       }
4997     }
4998 
4999     // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5000     // If the higher bits are set, it adds new module flag for swift info.
5001     if (ID->getString() == "Objective-C Garbage Collection") {
5002       auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5003       if (Md) {
5004         assert(Md->getValue() && "Expected non-empty metadata");
5005         auto Type = Md->getValue()->getType();
5006         if (Type == Int8Ty)
5007           continue;
5008         unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5009         if ((Val & 0xff) != Val) {
5010           HasSwiftVersionFlag = true;
5011           SwiftABIVersion = (Val & 0xff00) >> 8;
5012           SwiftMajorVersion = (Val & 0xff000000) >> 24;
5013           SwiftMinorVersion = (Val & 0xff0000) >> 16;
5014         }
5015         Metadata *Ops[3] = {
5016           ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5017           Op->getOperand(1),
5018           ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5019         ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5020         Changed = true;
5021       }
5022     }
5023   }
5024 
5025   // "Objective-C Class Properties" is recently added for Objective-C. We
5026   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5027   // flag of value 0, so we can correclty downgrade this flag when trying to
5028   // link an ObjC bitcode without this module flag with an ObjC bitcode with
5029   // this module flag.
5030   if (HasObjCFlag && !HasClassProperties) {
5031     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5032                     (uint32_t)0);
5033     Changed = true;
5034   }
5035 
5036   if (HasSwiftVersionFlag) {
5037     M.addModuleFlag(Module::Error, "Swift ABI Version",
5038                     SwiftABIVersion);
5039     M.addModuleFlag(Module::Error, "Swift Major Version",
5040                     ConstantInt::get(Int8Ty, SwiftMajorVersion));
5041     M.addModuleFlag(Module::Error, "Swift Minor Version",
5042                     ConstantInt::get(Int8Ty, SwiftMinorVersion));
5043     Changed = true;
5044   }
5045 
5046   return Changed;
5047 }
5048 
5049 void llvm::UpgradeSectionAttributes(Module &M) {
5050   auto TrimSpaces = [](StringRef Section) -> std::string {
5051     SmallVector<StringRef, 5> Components;
5052     Section.split(Components, ',');
5053 
5054     SmallString<32> Buffer;
5055     raw_svector_ostream OS(Buffer);
5056 
5057     for (auto Component : Components)
5058       OS << ',' << Component.trim();
5059 
5060     return std::string(OS.str().substr(1));
5061   };
5062 
5063   for (auto &GV : M.globals()) {
5064     if (!GV.hasSection())
5065       continue;
5066 
5067     StringRef Section = GV.getSection();
5068 
5069     if (!Section.starts_with("__DATA, __objc_catlist"))
5070       continue;
5071 
5072     // __DATA, __objc_catlist, regular, no_dead_strip
5073     // __DATA,__objc_catlist,regular,no_dead_strip
5074     GV.setSection(TrimSpaces(Section));
5075   }
5076 }
5077 
5078 namespace {
5079 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
5080 // callsites within a function that did not also have the strictfp attribute.
5081 // Since 10.0, if strict FP semantics are needed within a function, the
5082 // function must have the strictfp attribute and all calls within the function
5083 // must also have the strictfp attribute. This latter restriction is
5084 // necessary to prevent unwanted libcall simplification when a function is
5085 // being cloned (such as for inlining).
5086 //
5087 // The "dangling" strictfp attribute usage was only used to prevent constant
5088 // folding and other libcall simplification. The nobuiltin attribute on the
5089 // callsite has the same effect.
5090 struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5091   StrictFPUpgradeVisitor() = default;
5092 
5093   void visitCallBase(CallBase &Call) {
5094     if (!Call.isStrictFP())
5095       return;
5096     if (isa<ConstrainedFPIntrinsic>(&Call))
5097       return;
5098     // If we get here, the caller doesn't have the strictfp attribute
5099     // but this callsite does. Replace the strictfp attribute with nobuiltin.
5100     Call.removeFnAttr(Attribute::StrictFP);
5101     Call.addFnAttr(Attribute::NoBuiltin);
5102   }
5103 };
5104 } // namespace
5105 
5106 void llvm::UpgradeFunctionAttributes(Function &F) {
5107   // If a function definition doesn't have the strictfp attribute,
5108   // convert any callsite strictfp attributes to nobuiltin.
5109   if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5110     StrictFPUpgradeVisitor SFPV;
5111     SFPV.visit(F);
5112   }
5113 
5114   // Remove all incompatibile attributes from function.
5115   F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
5116   for (auto &Arg : F.args())
5117     Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
5118 }
5119 
5120 static bool isOldLoopArgument(Metadata *MD) {
5121   auto *T = dyn_cast_or_null<MDTuple>(MD);
5122   if (!T)
5123     return false;
5124   if (T->getNumOperands() < 1)
5125     return false;
5126   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5127   if (!S)
5128     return false;
5129   return S->getString().starts_with("llvm.vectorizer.");
5130 }
5131 
5132 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
5133   StringRef OldPrefix = "llvm.vectorizer.";
5134   assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5135 
5136   if (OldTag == "llvm.vectorizer.unroll")
5137     return MDString::get(C, "llvm.loop.interleave.count");
5138 
5139   return MDString::get(
5140       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5141              .str());
5142 }
5143 
5144 static Metadata *upgradeLoopArgument(Metadata *MD) {
5145   auto *T = dyn_cast_or_null<MDTuple>(MD);
5146   if (!T)
5147     return MD;
5148   if (T->getNumOperands() < 1)
5149     return MD;
5150   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5151   if (!OldTag)
5152     return MD;
5153   if (!OldTag->getString().starts_with("llvm.vectorizer."))
5154     return MD;
5155 
5156   // This has an old tag.  Upgrade it.
5157   SmallVector<Metadata *, 8> Ops;
5158   Ops.reserve(T->getNumOperands());
5159   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5160   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5161     Ops.push_back(T->getOperand(I));
5162 
5163   return MDTuple::get(T->getContext(), Ops);
5164 }
5165 
5166 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
5167   auto *T = dyn_cast<MDTuple>(&N);
5168   if (!T)
5169     return &N;
5170 
5171   if (none_of(T->operands(), isOldLoopArgument))
5172     return &N;
5173 
5174   SmallVector<Metadata *, 8> Ops;
5175   Ops.reserve(T->getNumOperands());
5176   for (Metadata *MD : T->operands())
5177     Ops.push_back(upgradeLoopArgument(MD));
5178 
5179   return MDTuple::get(T->getContext(), Ops);
5180 }
5181 
5182 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
5183   Triple T(TT);
5184   // The only data layout upgrades needed for pre-GCN are setting the address
5185   // space of globals to 1.
5186   if (T.isAMDGPU() && !T.isAMDGCN() && !DL.contains("-G") &&
5187       !DL.starts_with("G")) {
5188     return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5189   }
5190 
5191   if (T.isRISCV64()) {
5192     // Make i32 a native type for 64-bit RISC-V.
5193     auto I = DL.find("-n64-");
5194     if (I != StringRef::npos)
5195       return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5196     return DL.str();
5197   }
5198 
5199   std::string Res = DL.str();
5200   // AMDGCN data layout upgrades.
5201   if (T.isAMDGCN()) {
5202     // Define address spaces for constants.
5203     if (!DL.contains("-G") && !DL.starts_with("G"))
5204       Res.append(Res.empty() ? "G1" : "-G1");
5205 
5206     // Add missing non-integral declarations.
5207     // This goes before adding new address spaces to prevent incoherent string
5208     // values.
5209     if (!DL.contains("-ni") && !DL.starts_with("ni"))
5210       Res.append("-ni:7:8:9");
5211     // Update ni:7 to ni:7:8:9.
5212     if (DL.ends_with("ni:7"))
5213       Res.append(":8:9");
5214     if (DL.ends_with("ni:7:8"))
5215       Res.append(":9");
5216 
5217     // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5218     // resources) An empty data layout has already been upgraded to G1 by now.
5219     if (!DL.contains("-p7") && !DL.starts_with("p7"))
5220       Res.append("-p7:160:256:256:32");
5221     if (!DL.contains("-p8") && !DL.starts_with("p8"))
5222       Res.append("-p8:128:128");
5223     if (!DL.contains("-p9") && !DL.starts_with("p9"))
5224       Res.append("-p9:192:256:256:32");
5225 
5226     return Res;
5227   }
5228 
5229   if (!T.isX86())
5230     return Res;
5231 
5232   // If the datalayout matches the expected format, add pointer size address
5233   // spaces to the datalayout.
5234   std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
5235   if (StringRef Ref = Res; !Ref.contains(AddrSpaces)) {
5236     SmallVector<StringRef, 4> Groups;
5237     Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
5238     if (R.match(Res, &Groups))
5239       Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5240   }
5241 
5242   // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5243   // for i128 operations prior to this being reflected in the data layout, and
5244   // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5245   // boundaries, so although this is a breaking change, the upgrade is expected
5246   // to fix more IR than it breaks.
5247   // Intel MCU is an exception and uses 4-byte-alignment.
5248   if (!T.isOSIAMCU()) {
5249     std::string I128 = "-i128:128";
5250     if (StringRef Ref = Res; !Ref.contains(I128)) {
5251       SmallVector<StringRef, 4> Groups;
5252       Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5253       if (R.match(Res, &Groups))
5254         Res = (Groups[1] + I128 + Groups[3]).str();
5255     }
5256   }
5257 
5258   // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5259   // Raising the alignment is safe because Clang did not produce f80 values in
5260   // the MSVC environment before this upgrade was added.
5261   if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5262     StringRef Ref = Res;
5263     auto I = Ref.find("-f80:32-");
5264     if (I != StringRef::npos)
5265       Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5266   }
5267 
5268   return Res;
5269 }
5270 
5271 void llvm::UpgradeAttributes(AttrBuilder &B) {
5272   StringRef FramePointer;
5273   Attribute A = B.getAttribute("no-frame-pointer-elim");
5274   if (A.isValid()) {
5275     // The value can be "true" or "false".
5276     FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5277     B.removeAttribute("no-frame-pointer-elim");
5278   }
5279   if (B.contains("no-frame-pointer-elim-non-leaf")) {
5280     // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5281     if (FramePointer != "all")
5282       FramePointer = "non-leaf";
5283     B.removeAttribute("no-frame-pointer-elim-non-leaf");
5284   }
5285   if (!FramePointer.empty())
5286     B.addAttribute("frame-pointer", FramePointer);
5287 
5288   A = B.getAttribute("null-pointer-is-valid");
5289   if (A.isValid()) {
5290     // The value can be "true" or "false".
5291     bool NullPointerIsValid = A.getValueAsString() == "true";
5292     B.removeAttribute("null-pointer-is-valid");
5293     if (NullPointerIsValid)
5294       B.addAttribute(Attribute::NullPointerIsValid);
5295   }
5296 }
5297 
5298 void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5299   // clang.arc.attachedcall bundles are now required to have an operand.
5300   // If they don't, it's okay to drop them entirely: when there is an operand,
5301   // the "attachedcall" is meaningful and required, but without an operand,
5302   // it's just a marker NOP.  Dropping it merely prevents an optimization.
5303   erase_if(Bundles, [&](OperandBundleDef &OBD) {
5304     return OBD.getTag() == "clang.arc.attachedcall" &&
5305            OBD.inputs().empty();
5306   });
5307 }
5308