xref: /freebsd-src/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/InstVisitor.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/Intrinsics.h"
27 #include "llvm/IR/IntrinsicsAArch64.h"
28 #include "llvm/IR/IntrinsicsARM.h"
29 #include "llvm/IR/IntrinsicsX86.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Verifier.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/Regex.h"
35 #include <cstring>
36 using namespace llvm;
37 
38 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
39 
40 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
41 // changed their type from v4f32 to v2i64.
42 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
43                                   Function *&NewFn) {
44   // Check whether this is an old version of the function, which received
45   // v4f32 arguments.
46   Type *Arg0Type = F->getFunctionType()->getParamType(0);
47   if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
48     return false;
49 
50   // Yes, it's old, replace it with new version.
51   rename(F);
52   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
53   return true;
54 }
55 
56 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
57 // arguments have changed their type from i32 to i8.
58 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
59                                              Function *&NewFn) {
60   // Check that the last argument is an i32.
61   Type *LastArgType = F->getFunctionType()->getParamType(
62      F->getFunctionType()->getNumParams() - 1);
63   if (!LastArgType->isIntegerTy(32))
64     return false;
65 
66   // Move this function aside and map down.
67   rename(F);
68   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
69   return true;
70 }
71 
72 // Upgrade the declaration of fp compare intrinsics that change return type
73 // from scalar to vXi1 mask.
74 static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
75                                       Function *&NewFn) {
76   // Check if the return type is a vector.
77   if (F->getReturnType()->isVectorTy())
78     return false;
79 
80   rename(F);
81   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
82   return true;
83 }
84 
85 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
86   // All of the intrinsics matches below should be marked with which llvm
87   // version started autoupgrading them. At some point in the future we would
88   // like to use this information to remove upgrade code for some older
89   // intrinsics. It is currently undecided how we will determine that future
90   // point.
91   if (Name == "addcarryx.u32" || // Added in 8.0
92       Name == "addcarryx.u64" || // Added in 8.0
93       Name == "addcarry.u32" || // Added in 8.0
94       Name == "addcarry.u64" || // Added in 8.0
95       Name == "subborrow.u32" || // Added in 8.0
96       Name == "subborrow.u64" || // Added in 8.0
97       Name.startswith("sse2.padds.") || // Added in 8.0
98       Name.startswith("sse2.psubs.") || // Added in 8.0
99       Name.startswith("sse2.paddus.") || // Added in 8.0
100       Name.startswith("sse2.psubus.") || // Added in 8.0
101       Name.startswith("avx2.padds.") || // Added in 8.0
102       Name.startswith("avx2.psubs.") || // Added in 8.0
103       Name.startswith("avx2.paddus.") || // Added in 8.0
104       Name.startswith("avx2.psubus.") || // Added in 8.0
105       Name.startswith("avx512.padds.") || // Added in 8.0
106       Name.startswith("avx512.psubs.") || // Added in 8.0
107       Name.startswith("avx512.mask.padds.") || // Added in 8.0
108       Name.startswith("avx512.mask.psubs.") || // Added in 8.0
109       Name.startswith("avx512.mask.paddus.") || // Added in 8.0
110       Name.startswith("avx512.mask.psubus.") || // Added in 8.0
111       Name=="ssse3.pabs.b.128" || // Added in 6.0
112       Name=="ssse3.pabs.w.128" || // Added in 6.0
113       Name=="ssse3.pabs.d.128" || // Added in 6.0
114       Name.startswith("fma4.vfmadd.s") || // Added in 7.0
115       Name.startswith("fma.vfmadd.") || // Added in 7.0
116       Name.startswith("fma.vfmsub.") || // Added in 7.0
117       Name.startswith("fma.vfmsubadd.") || // Added in 7.0
118       Name.startswith("fma.vfnmadd.") || // Added in 7.0
119       Name.startswith("fma.vfnmsub.") || // Added in 7.0
120       Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
121       Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
122       Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
123       Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
124       Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
125       Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
126       Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
127       Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
128       Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
129       Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
130       Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
131       Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
132       Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
133       Name.startswith("avx512.kunpck") || //added in 6.0
134       Name.startswith("avx2.pabs.") || // Added in 6.0
135       Name.startswith("avx512.mask.pabs.") || // Added in 6.0
136       Name.startswith("avx512.broadcastm") || // Added in 6.0
137       Name == "sse.sqrt.ss" || // Added in 7.0
138       Name == "sse2.sqrt.sd" || // Added in 7.0
139       Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
140       Name.startswith("avx.sqrt.p") || // Added in 7.0
141       Name.startswith("sse2.sqrt.p") || // Added in 7.0
142       Name.startswith("sse.sqrt.p") || // Added in 7.0
143       Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
144       Name.startswith("sse2.pcmpeq.") || // Added in 3.1
145       Name.startswith("sse2.pcmpgt.") || // Added in 3.1
146       Name.startswith("avx2.pcmpeq.") || // Added in 3.1
147       Name.startswith("avx2.pcmpgt.") || // Added in 3.1
148       Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
149       Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
150       Name.startswith("avx.vperm2f128.") || // Added in 6.0
151       Name == "avx2.vperm2i128" || // Added in 6.0
152       Name == "sse.add.ss" || // Added in 4.0
153       Name == "sse2.add.sd" || // Added in 4.0
154       Name == "sse.sub.ss" || // Added in 4.0
155       Name == "sse2.sub.sd" || // Added in 4.0
156       Name == "sse.mul.ss" || // Added in 4.0
157       Name == "sse2.mul.sd" || // Added in 4.0
158       Name == "sse.div.ss" || // Added in 4.0
159       Name == "sse2.div.sd" || // Added in 4.0
160       Name == "sse41.pmaxsb" || // Added in 3.9
161       Name == "sse2.pmaxs.w" || // Added in 3.9
162       Name == "sse41.pmaxsd" || // Added in 3.9
163       Name == "sse2.pmaxu.b" || // Added in 3.9
164       Name == "sse41.pmaxuw" || // Added in 3.9
165       Name == "sse41.pmaxud" || // Added in 3.9
166       Name == "sse41.pminsb" || // Added in 3.9
167       Name == "sse2.pmins.w" || // Added in 3.9
168       Name == "sse41.pminsd" || // Added in 3.9
169       Name == "sse2.pminu.b" || // Added in 3.9
170       Name == "sse41.pminuw" || // Added in 3.9
171       Name == "sse41.pminud" || // Added in 3.9
172       Name == "avx512.kand.w" || // Added in 7.0
173       Name == "avx512.kandn.w" || // Added in 7.0
174       Name == "avx512.knot.w" || // Added in 7.0
175       Name == "avx512.kor.w" || // Added in 7.0
176       Name == "avx512.kxor.w" || // Added in 7.0
177       Name == "avx512.kxnor.w" || // Added in 7.0
178       Name == "avx512.kortestc.w" || // Added in 7.0
179       Name == "avx512.kortestz.w" || // Added in 7.0
180       Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
181       Name.startswith("avx2.pmax") || // Added in 3.9
182       Name.startswith("avx2.pmin") || // Added in 3.9
183       Name.startswith("avx512.mask.pmax") || // Added in 4.0
184       Name.startswith("avx512.mask.pmin") || // Added in 4.0
185       Name.startswith("avx2.vbroadcast") || // Added in 3.8
186       Name.startswith("avx2.pbroadcast") || // Added in 3.8
187       Name.startswith("avx.vpermil.") || // Added in 3.1
188       Name.startswith("sse2.pshuf") || // Added in 3.9
189       Name.startswith("avx512.pbroadcast") || // Added in 3.9
190       Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
191       Name.startswith("avx512.mask.movddup") || // Added in 3.9
192       Name.startswith("avx512.mask.movshdup") || // Added in 3.9
193       Name.startswith("avx512.mask.movsldup") || // Added in 3.9
194       Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
195       Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
196       Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
197       Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
198       Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
199       Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
200       Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
201       Name.startswith("avx512.mask.punpckl") || // Added in 3.9
202       Name.startswith("avx512.mask.punpckh") || // Added in 3.9
203       Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
204       Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
205       Name.startswith("avx512.mask.pand.") || // Added in 3.9
206       Name.startswith("avx512.mask.pandn.") || // Added in 3.9
207       Name.startswith("avx512.mask.por.") || // Added in 3.9
208       Name.startswith("avx512.mask.pxor.") || // Added in 3.9
209       Name.startswith("avx512.mask.and.") || // Added in 3.9
210       Name.startswith("avx512.mask.andn.") || // Added in 3.9
211       Name.startswith("avx512.mask.or.") || // Added in 3.9
212       Name.startswith("avx512.mask.xor.") || // Added in 3.9
213       Name.startswith("avx512.mask.padd.") || // Added in 4.0
214       Name.startswith("avx512.mask.psub.") || // Added in 4.0
215       Name.startswith("avx512.mask.pmull.") || // Added in 4.0
216       Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
217       Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
218       Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
219       Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
220       Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
221       Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
222       Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
223       Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
224       Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
225       Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
226       Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
227       Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
228       Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
229       Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
230       Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
231       Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
232       Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
233       Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
234       Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
235       Name == "avx512.cvtusi2sd" || // Added in 7.0
236       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
237       Name == "sse2.pmulu.dq" || // Added in 7.0
238       Name == "sse41.pmuldq" || // Added in 7.0
239       Name == "avx2.pmulu.dq" || // Added in 7.0
240       Name == "avx2.pmul.dq" || // Added in 7.0
241       Name == "avx512.pmulu.dq.512" || // Added in 7.0
242       Name == "avx512.pmul.dq.512" || // Added in 7.0
243       Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
244       Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
245       Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
246       Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
247       Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
248       Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
249       Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
250       Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
251       Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
252       Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
253       Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
254       Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
255       Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
256       Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
257       Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
258       Name.startswith("avx512.cmp.p") || // Added in 12.0
259       Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
260       Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
261       Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
262       Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
263       Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
264       Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
265       Name.startswith("avx512.mask.psll.d") || // Added in 4.0
266       Name.startswith("avx512.mask.psll.q") || // Added in 4.0
267       Name.startswith("avx512.mask.psll.w") || // Added in 4.0
268       Name.startswith("avx512.mask.psra.d") || // Added in 4.0
269       Name.startswith("avx512.mask.psra.q") || // Added in 4.0
270       Name.startswith("avx512.mask.psra.w") || // Added in 4.0
271       Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
272       Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
273       Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
274       Name.startswith("avx512.mask.pslli") || // Added in 4.0
275       Name.startswith("avx512.mask.psrai") || // Added in 4.0
276       Name.startswith("avx512.mask.psrli") || // Added in 4.0
277       Name.startswith("avx512.mask.psllv") || // Added in 4.0
278       Name.startswith("avx512.mask.psrav") || // Added in 4.0
279       Name.startswith("avx512.mask.psrlv") || // Added in 4.0
280       Name.startswith("sse41.pmovsx") || // Added in 3.8
281       Name.startswith("sse41.pmovzx") || // Added in 3.9
282       Name.startswith("avx2.pmovsx") || // Added in 3.9
283       Name.startswith("avx2.pmovzx") || // Added in 3.9
284       Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
285       Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
286       Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
287       Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
288       Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
289       Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
290       Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
291       Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
292       Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
293       Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
294       Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
295       Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
296       Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
297       Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
298       Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
299       Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
300       Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
301       Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
302       Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
303       Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
304       Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
305       Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
306       Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
307       Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
308       Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
309       Name.startswith("avx512.vpshld.") || // Added in 8.0
310       Name.startswith("avx512.vpshrd.") || // Added in 8.0
311       Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
312       Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
313       Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
314       Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
315       Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
316       Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
317       Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
318       Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
319       Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
320       Name.startswith("avx512.mask.conflict.") || // Added in 9.0
321       Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
322       Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
323       Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
324       Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
325       Name == "sse.cvtsi2ss" || // Added in 7.0
326       Name == "sse.cvtsi642ss" || // Added in 7.0
327       Name == "sse2.cvtsi2sd" || // Added in 7.0
328       Name == "sse2.cvtsi642sd" || // Added in 7.0
329       Name == "sse2.cvtss2sd" || // Added in 7.0
330       Name == "sse2.cvtdq2pd" || // Added in 3.9
331       Name == "sse2.cvtdq2ps" || // Added in 7.0
332       Name == "sse2.cvtps2pd" || // Added in 3.9
333       Name == "avx.cvtdq2.pd.256" || // Added in 3.9
334       Name == "avx.cvtdq2.ps.256" || // Added in 7.0
335       Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
336       Name.startswith("vcvtph2ps.") || // Added in 11.0
337       Name.startswith("avx.vinsertf128.") || // Added in 3.7
338       Name == "avx2.vinserti128" || // Added in 3.7
339       Name.startswith("avx512.mask.insert") || // Added in 4.0
340       Name.startswith("avx.vextractf128.") || // Added in 3.7
341       Name == "avx2.vextracti128" || // Added in 3.7
342       Name.startswith("avx512.mask.vextract") || // Added in 4.0
343       Name.startswith("sse4a.movnt.") || // Added in 3.9
344       Name.startswith("avx.movnt.") || // Added in 3.2
345       Name.startswith("avx512.storent.") || // Added in 3.9
346       Name == "sse41.movntdqa" || // Added in 5.0
347       Name == "avx2.movntdqa" || // Added in 5.0
348       Name == "avx512.movntdqa" || // Added in 5.0
349       Name == "sse2.storel.dq" || // Added in 3.9
350       Name.startswith("sse.storeu.") || // Added in 3.9
351       Name.startswith("sse2.storeu.") || // Added in 3.9
352       Name.startswith("avx.storeu.") || // Added in 3.9
353       Name.startswith("avx512.mask.storeu.") || // Added in 3.9
354       Name.startswith("avx512.mask.store.p") || // Added in 3.9
355       Name.startswith("avx512.mask.store.b.") || // Added in 3.9
356       Name.startswith("avx512.mask.store.w.") || // Added in 3.9
357       Name.startswith("avx512.mask.store.d.") || // Added in 3.9
358       Name.startswith("avx512.mask.store.q.") || // Added in 3.9
359       Name == "avx512.mask.store.ss" || // Added in 7.0
360       Name.startswith("avx512.mask.loadu.") || // Added in 3.9
361       Name.startswith("avx512.mask.load.") || // Added in 3.9
362       Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
363       Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
364       Name.startswith("avx512.mask.expand.b") || // Added in 9.0
365       Name.startswith("avx512.mask.expand.w") || // Added in 9.0
366       Name.startswith("avx512.mask.expand.d") || // Added in 9.0
367       Name.startswith("avx512.mask.expand.q") || // Added in 9.0
368       Name.startswith("avx512.mask.expand.p") || // Added in 9.0
369       Name.startswith("avx512.mask.compress.b") || // Added in 9.0
370       Name.startswith("avx512.mask.compress.w") || // Added in 9.0
371       Name.startswith("avx512.mask.compress.d") || // Added in 9.0
372       Name.startswith("avx512.mask.compress.q") || // Added in 9.0
373       Name.startswith("avx512.mask.compress.p") || // Added in 9.0
374       Name == "sse42.crc32.64.8" || // Added in 3.4
375       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
376       Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
377       Name.startswith("avx512.mask.palignr.") || // Added in 3.9
378       Name.startswith("avx512.mask.valign.") || // Added in 4.0
379       Name.startswith("sse2.psll.dq") || // Added in 3.7
380       Name.startswith("sse2.psrl.dq") || // Added in 3.7
381       Name.startswith("avx2.psll.dq") || // Added in 3.7
382       Name.startswith("avx2.psrl.dq") || // Added in 3.7
383       Name.startswith("avx512.psll.dq") || // Added in 3.9
384       Name.startswith("avx512.psrl.dq") || // Added in 3.9
385       Name == "sse41.pblendw" || // Added in 3.7
386       Name.startswith("sse41.blendp") || // Added in 3.7
387       Name.startswith("avx.blend.p") || // Added in 3.7
388       Name == "avx2.pblendw" || // Added in 3.7
389       Name.startswith("avx2.pblendd.") || // Added in 3.7
390       Name.startswith("avx.vbroadcastf128") || // Added in 4.0
391       Name == "avx2.vbroadcasti128" || // Added in 3.7
392       Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
393       Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
394       Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
395       Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
396       Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
397       Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
398       Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
399       Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
400       Name == "xop.vpcmov" || // Added in 3.8
401       Name == "xop.vpcmov.256" || // Added in 5.0
402       Name.startswith("avx512.mask.move.s") || // Added in 4.0
403       Name.startswith("avx512.cvtmask2") || // Added in 5.0
404       Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
405       Name.startswith("xop.vprot") || // Added in 8.0
406       Name.startswith("avx512.prol") || // Added in 8.0
407       Name.startswith("avx512.pror") || // Added in 8.0
408       Name.startswith("avx512.mask.prorv.") || // Added in 8.0
409       Name.startswith("avx512.mask.pror.") ||  // Added in 8.0
410       Name.startswith("avx512.mask.prolv.") || // Added in 8.0
411       Name.startswith("avx512.mask.prol.") ||  // Added in 8.0
412       Name.startswith("avx512.ptestm") || //Added in 6.0
413       Name.startswith("avx512.ptestnm") || //Added in 6.0
414       Name.startswith("avx512.mask.pavg")) // Added in 6.0
415     return true;
416 
417   return false;
418 }
419 
420 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
421                                         Function *&NewFn) {
422   // Only handle intrinsics that start with "x86.".
423   if (!Name.startswith("x86."))
424     return false;
425   // Remove "x86." prefix.
426   Name = Name.substr(4);
427 
428   if (ShouldUpgradeX86Intrinsic(F, Name)) {
429     NewFn = nullptr;
430     return true;
431   }
432 
433   if (Name == "rdtscp") { // Added in 8.0
434     // If this intrinsic has 0 operands, it's the new version.
435     if (F->getFunctionType()->getNumParams() == 0)
436       return false;
437 
438     rename(F);
439     NewFn = Intrinsic::getDeclaration(F->getParent(),
440                                       Intrinsic::x86_rdtscp);
441     return true;
442   }
443 
444   // SSE4.1 ptest functions may have an old signature.
445   if (Name.startswith("sse41.ptest")) { // Added in 3.2
446     if (Name.substr(11) == "c")
447       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
448     if (Name.substr(11) == "z")
449       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
450     if (Name.substr(11) == "nzc")
451       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
452   }
453   // Several blend and other instructions with masks used the wrong number of
454   // bits.
455   if (Name == "sse41.insertps") // Added in 3.6
456     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
457                                             NewFn);
458   if (Name == "sse41.dppd") // Added in 3.6
459     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
460                                             NewFn);
461   if (Name == "sse41.dpps") // Added in 3.6
462     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
463                                             NewFn);
464   if (Name == "sse41.mpsadbw") // Added in 3.6
465     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
466                                             NewFn);
467   if (Name == "avx.dp.ps.256") // Added in 3.6
468     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
469                                             NewFn);
470   if (Name == "avx2.mpsadbw") // Added in 3.6
471     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
472                                             NewFn);
473   if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
474     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
475                                      NewFn);
476   if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
477     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
478                                      NewFn);
479   if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
480     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
481                                      NewFn);
482   if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
483     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
484                                      NewFn);
485   if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
486     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
487                                      NewFn);
488   if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
489     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
490                                      NewFn);
491 
492   // frcz.ss/sd may need to have an argument dropped. Added in 3.2
493   if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
494     rename(F);
495     NewFn = Intrinsic::getDeclaration(F->getParent(),
496                                       Intrinsic::x86_xop_vfrcz_ss);
497     return true;
498   }
499   if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
500     rename(F);
501     NewFn = Intrinsic::getDeclaration(F->getParent(),
502                                       Intrinsic::x86_xop_vfrcz_sd);
503     return true;
504   }
505   // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
506   if (Name.startswith("xop.vpermil2")) { // Added in 3.9
507     auto Idx = F->getFunctionType()->getParamType(2);
508     if (Idx->isFPOrFPVectorTy()) {
509       rename(F);
510       unsigned IdxSize = Idx->getPrimitiveSizeInBits();
511       unsigned EltSize = Idx->getScalarSizeInBits();
512       Intrinsic::ID Permil2ID;
513       if (EltSize == 64 && IdxSize == 128)
514         Permil2ID = Intrinsic::x86_xop_vpermil2pd;
515       else if (EltSize == 32 && IdxSize == 128)
516         Permil2ID = Intrinsic::x86_xop_vpermil2ps;
517       else if (EltSize == 64 && IdxSize == 256)
518         Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
519       else
520         Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
521       NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
522       return true;
523     }
524   }
525 
526   if (Name == "seh.recoverfp") {
527     NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
528     return true;
529   }
530 
531   return false;
532 }
533 
534 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
535   assert(F && "Illegal to upgrade a non-existent Function.");
536 
537   // Quickly eliminate it, if it's not a candidate.
538   StringRef Name = F->getName();
539   if (Name.size() <= 8 || !Name.startswith("llvm."))
540     return false;
541   Name = Name.substr(5); // Strip off "llvm."
542 
543   switch (Name[0]) {
544   default: break;
545   case 'a': {
546     if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
547       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
548                                         F->arg_begin()->getType());
549       return true;
550     }
551     if (Name.startswith("aarch64.neon.frintn")) {
552       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
553                                         F->arg_begin()->getType());
554       return true;
555     }
556     if (Name.startswith("aarch64.neon.rbit")) {
557       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
558                                         F->arg_begin()->getType());
559       return true;
560     }
561     if (Name.startswith("arm.neon.vclz")) {
562       Type* args[2] = {
563         F->arg_begin()->getType(),
564         Type::getInt1Ty(F->getContext())
565       };
566       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
567       // the end of the name. Change name from llvm.arm.neon.vclz.* to
568       //  llvm.ctlz.*
569       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
570       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
571                                "llvm.ctlz." + Name.substr(14), F->getParent());
572       return true;
573     }
574     if (Name.startswith("arm.neon.vcnt")) {
575       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
576                                         F->arg_begin()->getType());
577       return true;
578     }
579     static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
580     if (vldRegex.match(Name)) {
581       auto fArgs = F->getFunctionType()->params();
582       SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
583       // Can't use Intrinsic::getDeclaration here as the return types might
584       // then only be structurally equal.
585       FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
586       StringRef Suffix =
587           F->getContext().supportsTypedPointers() ? "p0i8" : "p0";
588       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
589                                "llvm." + Name + "." + Suffix, F->getParent());
590       return true;
591     }
592     static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
593     if (vstRegex.match(Name)) {
594       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
595                                                 Intrinsic::arm_neon_vst2,
596                                                 Intrinsic::arm_neon_vst3,
597                                                 Intrinsic::arm_neon_vst4};
598 
599       static const Intrinsic::ID StoreLaneInts[] = {
600         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
601         Intrinsic::arm_neon_vst4lane
602       };
603 
604       auto fArgs = F->getFunctionType()->params();
605       Type *Tys[] = {fArgs[0], fArgs[1]};
606       if (!Name.contains("lane"))
607         NewFn = Intrinsic::getDeclaration(F->getParent(),
608                                           StoreInts[fArgs.size() - 3], Tys);
609       else
610         NewFn = Intrinsic::getDeclaration(F->getParent(),
611                                           StoreLaneInts[fArgs.size() - 5], Tys);
612       return true;
613     }
614     if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
615       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
616       return true;
617     }
618     if (Name.startswith("arm.neon.vqadds.")) {
619       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
620                                         F->arg_begin()->getType());
621       return true;
622     }
623     if (Name.startswith("arm.neon.vqaddu.")) {
624       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
625                                         F->arg_begin()->getType());
626       return true;
627     }
628     if (Name.startswith("arm.neon.vqsubs.")) {
629       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
630                                         F->arg_begin()->getType());
631       return true;
632     }
633     if (Name.startswith("arm.neon.vqsubu.")) {
634       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
635                                         F->arg_begin()->getType());
636       return true;
637     }
638     if (Name.startswith("aarch64.neon.addp")) {
639       if (F->arg_size() != 2)
640         break; // Invalid IR.
641       VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
642       if (Ty && Ty->getElementType()->isFloatingPointTy()) {
643         NewFn = Intrinsic::getDeclaration(F->getParent(),
644                                           Intrinsic::aarch64_neon_faddp, Ty);
645         return true;
646       }
647     }
648 
649     // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
650     // respectively
651     if ((Name.startswith("arm.neon.bfdot.") ||
652          Name.startswith("aarch64.neon.bfdot.")) &&
653         Name.endswith("i8")) {
654       Intrinsic::ID IID =
655           StringSwitch<Intrinsic::ID>(Name)
656               .Cases("arm.neon.bfdot.v2f32.v8i8",
657                      "arm.neon.bfdot.v4f32.v16i8",
658                      Intrinsic::arm_neon_bfdot)
659               .Cases("aarch64.neon.bfdot.v2f32.v8i8",
660                      "aarch64.neon.bfdot.v4f32.v16i8",
661                      Intrinsic::aarch64_neon_bfdot)
662               .Default(Intrinsic::not_intrinsic);
663       if (IID == Intrinsic::not_intrinsic)
664         break;
665 
666       size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
667       assert((OperandWidth == 64 || OperandWidth == 128) &&
668              "Unexpected operand width");
669       LLVMContext &Ctx = F->getParent()->getContext();
670       std::array<Type *, 2> Tys {{
671         F->getReturnType(),
672         FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
673       }};
674       NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
675       return true;
676     }
677 
678     // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
679     // and accept v8bf16 instead of v16i8
680     if ((Name.startswith("arm.neon.bfm") ||
681          Name.startswith("aarch64.neon.bfm")) &&
682         Name.endswith(".v4f32.v16i8")) {
683       Intrinsic::ID IID =
684           StringSwitch<Intrinsic::ID>(Name)
685               .Case("arm.neon.bfmmla.v4f32.v16i8",
686                     Intrinsic::arm_neon_bfmmla)
687               .Case("arm.neon.bfmlalb.v4f32.v16i8",
688                     Intrinsic::arm_neon_bfmlalb)
689               .Case("arm.neon.bfmlalt.v4f32.v16i8",
690                     Intrinsic::arm_neon_bfmlalt)
691               .Case("aarch64.neon.bfmmla.v4f32.v16i8",
692                     Intrinsic::aarch64_neon_bfmmla)
693               .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
694                     Intrinsic::aarch64_neon_bfmlalb)
695               .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
696                     Intrinsic::aarch64_neon_bfmlalt)
697               .Default(Intrinsic::not_intrinsic);
698       if (IID == Intrinsic::not_intrinsic)
699         break;
700 
701       std::array<Type *, 0> Tys;
702       NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
703       return true;
704     }
705 
706     if (Name == "arm.mve.vctp64" &&
707         cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
708       // A vctp64 returning a v4i1 is converted to return a v2i1. Rename the
709       // function and deal with it below in UpgradeIntrinsicCall.
710       rename(F);
711       return true;
712     }
713     // These too are changed to accept a v2i1 insteead of the old v4i1.
714     if (Name == "arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
715         Name == "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
716         Name == "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
717         Name == "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
718         Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
719         Name == "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
720         Name == "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
721         Name == "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
722         Name == "arm.cde.vcx1q.predicated.v2i64.v4i1" ||
723         Name == "arm.cde.vcx1qa.predicated.v2i64.v4i1" ||
724         Name == "arm.cde.vcx2q.predicated.v2i64.v4i1" ||
725         Name == "arm.cde.vcx2qa.predicated.v2i64.v4i1" ||
726         Name == "arm.cde.vcx3q.predicated.v2i64.v4i1" ||
727         Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
728       return true;
729 
730     break;
731   }
732 
733   case 'c': {
734     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
735       rename(F);
736       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
737                                         F->arg_begin()->getType());
738       return true;
739     }
740     if (Name.startswith("cttz.") && F->arg_size() == 1) {
741       rename(F);
742       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
743                                         F->arg_begin()->getType());
744       return true;
745     }
746     break;
747   }
748   case 'd': {
749     if (Name == "dbg.value" && F->arg_size() == 4) {
750       rename(F);
751       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
752       return true;
753     }
754     break;
755   }
756   case 'e': {
757     SmallVector<StringRef, 2> Groups;
758     static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
759     if (R.match(Name, &Groups)) {
760       Intrinsic::ID ID;
761       ID = StringSwitch<Intrinsic::ID>(Groups[1])
762                .Case("add", Intrinsic::vector_reduce_add)
763                .Case("mul", Intrinsic::vector_reduce_mul)
764                .Case("and", Intrinsic::vector_reduce_and)
765                .Case("or", Intrinsic::vector_reduce_or)
766                .Case("xor", Intrinsic::vector_reduce_xor)
767                .Case("smax", Intrinsic::vector_reduce_smax)
768                .Case("smin", Intrinsic::vector_reduce_smin)
769                .Case("umax", Intrinsic::vector_reduce_umax)
770                .Case("umin", Intrinsic::vector_reduce_umin)
771                .Case("fmax", Intrinsic::vector_reduce_fmax)
772                .Case("fmin", Intrinsic::vector_reduce_fmin)
773                .Default(Intrinsic::not_intrinsic);
774       if (ID != Intrinsic::not_intrinsic) {
775         rename(F);
776         auto Args = F->getFunctionType()->params();
777         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
778         return true;
779       }
780     }
781     static const Regex R2(
782         "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
783     Groups.clear();
784     if (R2.match(Name, &Groups)) {
785       Intrinsic::ID ID = Intrinsic::not_intrinsic;
786       if (Groups[1] == "fadd")
787         ID = Intrinsic::vector_reduce_fadd;
788       if (Groups[1] == "fmul")
789         ID = Intrinsic::vector_reduce_fmul;
790       if (ID != Intrinsic::not_intrinsic) {
791         rename(F);
792         auto Args = F->getFunctionType()->params();
793         Type *Tys[] = {Args[1]};
794         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
795         return true;
796       }
797     }
798     break;
799   }
800   case 'i':
801   case 'l': {
802     bool IsLifetimeStart = Name.startswith("lifetime.start");
803     if (IsLifetimeStart || Name.startswith("invariant.start")) {
804       Intrinsic::ID ID = IsLifetimeStart ?
805         Intrinsic::lifetime_start : Intrinsic::invariant_start;
806       auto Args = F->getFunctionType()->params();
807       Type* ObjectPtr[1] = {Args[1]};
808       if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
809         rename(F);
810         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
811         return true;
812       }
813     }
814 
815     bool IsLifetimeEnd = Name.startswith("lifetime.end");
816     if (IsLifetimeEnd || Name.startswith("invariant.end")) {
817       Intrinsic::ID ID = IsLifetimeEnd ?
818         Intrinsic::lifetime_end : Intrinsic::invariant_end;
819 
820       auto Args = F->getFunctionType()->params();
821       Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
822       if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
823         rename(F);
824         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
825         return true;
826       }
827     }
828     if (Name.startswith("invariant.group.barrier")) {
829       // Rename invariant.group.barrier to launder.invariant.group
830       auto Args = F->getFunctionType()->params();
831       Type* ObjectPtr[1] = {Args[0]};
832       rename(F);
833       NewFn = Intrinsic::getDeclaration(F->getParent(),
834           Intrinsic::launder_invariant_group, ObjectPtr);
835       return true;
836 
837     }
838 
839     break;
840   }
841   case 'm': {
842     if (Name.startswith("masked.load.")) {
843       Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
844       if (F->getName() !=
845           Intrinsic::getName(Intrinsic::masked_load, Tys, F->getParent())) {
846         rename(F);
847         NewFn = Intrinsic::getDeclaration(F->getParent(),
848                                           Intrinsic::masked_load,
849                                           Tys);
850         return true;
851       }
852     }
853     if (Name.startswith("masked.store.")) {
854       auto Args = F->getFunctionType()->params();
855       Type *Tys[] = { Args[0], Args[1] };
856       if (F->getName() !=
857           Intrinsic::getName(Intrinsic::masked_store, Tys, F->getParent())) {
858         rename(F);
859         NewFn = Intrinsic::getDeclaration(F->getParent(),
860                                           Intrinsic::masked_store,
861                                           Tys);
862         return true;
863       }
864     }
865     // Renaming gather/scatter intrinsics with no address space overloading
866     // to the new overload which includes an address space
867     if (Name.startswith("masked.gather.")) {
868       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
869       if (F->getName() !=
870           Intrinsic::getName(Intrinsic::masked_gather, Tys, F->getParent())) {
871         rename(F);
872         NewFn = Intrinsic::getDeclaration(F->getParent(),
873                                           Intrinsic::masked_gather, Tys);
874         return true;
875       }
876     }
877     if (Name.startswith("masked.scatter.")) {
878       auto Args = F->getFunctionType()->params();
879       Type *Tys[] = {Args[0], Args[1]};
880       if (F->getName() !=
881           Intrinsic::getName(Intrinsic::masked_scatter, Tys, F->getParent())) {
882         rename(F);
883         NewFn = Intrinsic::getDeclaration(F->getParent(),
884                                           Intrinsic::masked_scatter, Tys);
885         return true;
886       }
887     }
888     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
889     // alignment parameter to embedding the alignment as an attribute of
890     // the pointer args.
891     if (Name.startswith("memcpy.") && F->arg_size() == 5) {
892       rename(F);
893       // Get the types of dest, src, and len
894       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
895       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
896                                         ParamTypes);
897       return true;
898     }
899     if (Name.startswith("memmove.") && F->arg_size() == 5) {
900       rename(F);
901       // Get the types of dest, src, and len
902       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
903       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
904                                         ParamTypes);
905       return true;
906     }
907     if (Name.startswith("memset.") && F->arg_size() == 5) {
908       rename(F);
909       // Get the types of dest, and len
910       const auto *FT = F->getFunctionType();
911       Type *ParamTypes[2] = {
912           FT->getParamType(0), // Dest
913           FT->getParamType(2)  // len
914       };
915       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
916                                         ParamTypes);
917       return true;
918     }
919     break;
920   }
921   case 'n': {
922     if (Name.startswith("nvvm.")) {
923       Name = Name.substr(5);
924 
925       // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
926       Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
927                               .Cases("brev32", "brev64", Intrinsic::bitreverse)
928                               .Case("clz.i", Intrinsic::ctlz)
929                               .Case("popc.i", Intrinsic::ctpop)
930                               .Default(Intrinsic::not_intrinsic);
931       if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
932         NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
933                                           {F->getReturnType()});
934         return true;
935       }
936 
937       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
938       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
939       //
940       // TODO: We could add lohi.i2d.
941       bool Expand = StringSwitch<bool>(Name)
942                         .Cases("abs.i", "abs.ll", true)
943                         .Cases("clz.ll", "popc.ll", "h2f", true)
944                         .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
945                         .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
946                         .StartsWith("atomic.load.add.f32.p", true)
947                         .StartsWith("atomic.load.add.f64.p", true)
948                         .Default(false);
949       if (Expand) {
950         NewFn = nullptr;
951         return true;
952       }
953     }
954     break;
955   }
956   case 'o':
957     // We only need to change the name to match the mangling including the
958     // address space.
959     if (Name.startswith("objectsize.")) {
960       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
961       if (F->arg_size() == 2 || F->arg_size() == 3 ||
962           F->getName() !=
963               Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
964         rename(F);
965         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
966                                           Tys);
967         return true;
968       }
969     }
970     break;
971 
972   case 'p':
973     if (Name == "prefetch") {
974       // Handle address space overloading.
975       Type *Tys[] = {F->arg_begin()->getType()};
976       if (F->getName() !=
977           Intrinsic::getName(Intrinsic::prefetch, Tys, F->getParent())) {
978         rename(F);
979         NewFn =
980             Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
981         return true;
982       }
983     } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
984       rename(F);
985       NewFn = Intrinsic::getDeclaration(F->getParent(),
986                                         Intrinsic::ptr_annotation,
987                                         F->arg_begin()->getType());
988       return true;
989     }
990     break;
991 
992   case 's':
993     if (Name == "stackprotectorcheck") {
994       NewFn = nullptr;
995       return true;
996     }
997     break;
998 
999   case 'v': {
1000     if (Name == "var.annotation" && F->arg_size() == 4) {
1001       rename(F);
1002       NewFn = Intrinsic::getDeclaration(F->getParent(),
1003                                         Intrinsic::var_annotation);
1004       return true;
1005     }
1006     break;
1007   }
1008 
1009   case 'x':
1010     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
1011       return true;
1012   }
1013   // Remangle our intrinsic since we upgrade the mangling
1014   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1015   if (Result != None) {
1016     NewFn = Result.getValue();
1017     return true;
1018   }
1019 
1020   //  This may not belong here. This function is effectively being overloaded
1021   //  to both detect an intrinsic which needs upgrading, and to provide the
1022   //  upgraded form of the intrinsic. We should perhaps have two separate
1023   //  functions for this.
1024   return false;
1025 }
1026 
1027 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
1028   NewFn = nullptr;
1029   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
1030   assert(F != NewFn && "Intrinsic function upgraded to the same function");
1031 
1032   // Upgrade intrinsic attributes.  This does not change the function.
1033   if (NewFn)
1034     F = NewFn;
1035   if (Intrinsic::ID id = F->getIntrinsicID())
1036     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1037   return Upgraded;
1038 }
1039 
1040 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1041   if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1042                           GV->getName() == "llvm.global_dtors")) ||
1043       !GV->hasInitializer())
1044     return nullptr;
1045   ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1046   if (!ATy)
1047     return nullptr;
1048   StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1049   if (!STy || STy->getNumElements() != 2)
1050     return nullptr;
1051 
1052   LLVMContext &C = GV->getContext();
1053   IRBuilder<> IRB(C);
1054   auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1055                                IRB.getInt8PtrTy());
1056   Constant *Init = GV->getInitializer();
1057   unsigned N = Init->getNumOperands();
1058   std::vector<Constant *> NewCtors(N);
1059   for (unsigned i = 0; i != N; ++i) {
1060     auto Ctor = cast<Constant>(Init->getOperand(i));
1061     NewCtors[i] = ConstantStruct::get(
1062         EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
1063         Constant::getNullValue(IRB.getInt8PtrTy()));
1064   }
1065   Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1066 
1067   return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1068                             NewInit, GV->getName());
1069 }
1070 
1071 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1072 // to byte shuffles.
1073 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
1074                                          Value *Op, unsigned Shift) {
1075   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1076   unsigned NumElts = ResultTy->getNumElements() * 8;
1077 
1078   // Bitcast from a 64-bit element type to a byte element type.
1079   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1080   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1081 
1082   // We'll be shuffling in zeroes.
1083   Value *Res = Constant::getNullValue(VecTy);
1084 
1085   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1086   // we'll just return the zero vector.
1087   if (Shift < 16) {
1088     int Idxs[64];
1089     // 256/512-bit version is split into 2/4 16-byte lanes.
1090     for (unsigned l = 0; l != NumElts; l += 16)
1091       for (unsigned i = 0; i != 16; ++i) {
1092         unsigned Idx = NumElts + i - Shift;
1093         if (Idx < NumElts)
1094           Idx -= NumElts - 16; // end of lane, switch operand.
1095         Idxs[l + i] = Idx + l;
1096       }
1097 
1098     Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
1099   }
1100 
1101   // Bitcast back to a 64-bit element type.
1102   return Builder.CreateBitCast(Res, ResultTy, "cast");
1103 }
1104 
1105 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1106 // to byte shuffles.
1107 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1108                                          unsigned Shift) {
1109   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1110   unsigned NumElts = ResultTy->getNumElements() * 8;
1111 
1112   // Bitcast from a 64-bit element type to a byte element type.
1113   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1114   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1115 
1116   // We'll be shuffling in zeroes.
1117   Value *Res = Constant::getNullValue(VecTy);
1118 
1119   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1120   // we'll just return the zero vector.
1121   if (Shift < 16) {
1122     int Idxs[64];
1123     // 256/512-bit version is split into 2/4 16-byte lanes.
1124     for (unsigned l = 0; l != NumElts; l += 16)
1125       for (unsigned i = 0; i != 16; ++i) {
1126         unsigned Idx = i + Shift;
1127         if (Idx >= 16)
1128           Idx += NumElts - 16; // end of lane, switch operand.
1129         Idxs[l + i] = Idx + l;
1130       }
1131 
1132     Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
1133   }
1134 
1135   // Bitcast back to a 64-bit element type.
1136   return Builder.CreateBitCast(Res, ResultTy, "cast");
1137 }
1138 
1139 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1140                             unsigned NumElts) {
1141   assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1142   llvm::VectorType *MaskTy = FixedVectorType::get(
1143       Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1144   Mask = Builder.CreateBitCast(Mask, MaskTy);
1145 
1146   // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1147   // i8 and we need to extract down to the right number of elements.
1148   if (NumElts <= 4) {
1149     int Indices[4];
1150     for (unsigned i = 0; i != NumElts; ++i)
1151       Indices[i] = i;
1152     Mask = Builder.CreateShuffleVector(
1153         Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
1154   }
1155 
1156   return Mask;
1157 }
1158 
1159 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
1160                             Value *Op0, Value *Op1) {
1161   // If the mask is all ones just emit the first operation.
1162   if (const auto *C = dyn_cast<Constant>(Mask))
1163     if (C->isAllOnesValue())
1164       return Op0;
1165 
1166   Mask = getX86MaskVec(Builder, Mask,
1167                        cast<FixedVectorType>(Op0->getType())->getNumElements());
1168   return Builder.CreateSelect(Mask, Op0, Op1);
1169 }
1170 
1171 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
1172                                   Value *Op0, Value *Op1) {
1173   // If the mask is all ones just emit the first operation.
1174   if (const auto *C = dyn_cast<Constant>(Mask))
1175     if (C->isAllOnesValue())
1176       return Op0;
1177 
1178   auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1179                                       Mask->getType()->getIntegerBitWidth());
1180   Mask = Builder.CreateBitCast(Mask, MaskTy);
1181   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1182   return Builder.CreateSelect(Mask, Op0, Op1);
1183 }
1184 
1185 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1186 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1187 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1188 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1189                                         Value *Op1, Value *Shift,
1190                                         Value *Passthru, Value *Mask,
1191                                         bool IsVALIGN) {
1192   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1193 
1194   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1195   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1196   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1197   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1198 
1199   // Mask the immediate for VALIGN.
1200   if (IsVALIGN)
1201     ShiftVal &= (NumElts - 1);
1202 
1203   // If palignr is shifting the pair of vectors more than the size of two
1204   // lanes, emit zero.
1205   if (ShiftVal >= 32)
1206     return llvm::Constant::getNullValue(Op0->getType());
1207 
1208   // If palignr is shifting the pair of input vectors more than one lane,
1209   // but less than two lanes, convert to shifting in zeroes.
1210   if (ShiftVal > 16) {
1211     ShiftVal -= 16;
1212     Op1 = Op0;
1213     Op0 = llvm::Constant::getNullValue(Op0->getType());
1214   }
1215 
1216   int Indices[64];
1217   // 256-bit palignr operates on 128-bit lanes so we need to handle that
1218   for (unsigned l = 0; l < NumElts; l += 16) {
1219     for (unsigned i = 0; i != 16; ++i) {
1220       unsigned Idx = ShiftVal + i;
1221       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1222         Idx += NumElts - 16; // End of lane, switch operand.
1223       Indices[l + i] = Idx + l;
1224     }
1225   }
1226 
1227   Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1228                                              makeArrayRef(Indices, NumElts),
1229                                              "palignr");
1230 
1231   return EmitX86Select(Builder, Mask, Align, Passthru);
1232 }
1233 
1234 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
1235                                           bool ZeroMask, bool IndexForm) {
1236   Type *Ty = CI.getType();
1237   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1238   unsigned EltWidth = Ty->getScalarSizeInBits();
1239   bool IsFloat = Ty->isFPOrFPVectorTy();
1240   Intrinsic::ID IID;
1241   if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1242     IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1243   else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1244     IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1245   else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1246     IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1247   else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1248     IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1249   else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1250     IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1251   else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1252     IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1253   else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1254     IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1255   else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1256     IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1257   else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1258     IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1259   else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1260     IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1261   else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1262     IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1263   else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1264     IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1265   else if (VecWidth == 128 && EltWidth == 16)
1266     IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1267   else if (VecWidth == 256 && EltWidth == 16)
1268     IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1269   else if (VecWidth == 512 && EltWidth == 16)
1270     IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1271   else if (VecWidth == 128 && EltWidth == 8)
1272     IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1273   else if (VecWidth == 256 && EltWidth == 8)
1274     IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1275   else if (VecWidth == 512 && EltWidth == 8)
1276     IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1277   else
1278     llvm_unreachable("Unexpected intrinsic");
1279 
1280   Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1281                     CI.getArgOperand(2) };
1282 
1283   // If this isn't index form we need to swap operand 0 and 1.
1284   if (!IndexForm)
1285     std::swap(Args[0], Args[1]);
1286 
1287   Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1288                                 Args);
1289   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1290                              : Builder.CreateBitCast(CI.getArgOperand(1),
1291                                                      Ty);
1292   return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1293 }
1294 
1295 static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallInst &CI,
1296                                          Intrinsic::ID IID) {
1297   Type *Ty = CI.getType();
1298   Value *Op0 = CI.getOperand(0);
1299   Value *Op1 = CI.getOperand(1);
1300   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1301   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1302 
1303   if (CI.arg_size() == 4) { // For masked intrinsics.
1304     Value *VecSrc = CI.getOperand(2);
1305     Value *Mask = CI.getOperand(3);
1306     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1307   }
1308   return Res;
1309 }
1310 
1311 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
1312                                bool IsRotateRight) {
1313   Type *Ty = CI.getType();
1314   Value *Src = CI.getArgOperand(0);
1315   Value *Amt = CI.getArgOperand(1);
1316 
1317   // Amount may be scalar immediate, in which case create a splat vector.
1318   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1319   // we only care about the lowest log2 bits anyway.
1320   if (Amt->getType() != Ty) {
1321     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1322     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1323     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1324   }
1325 
1326   Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1327   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1328   Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1329 
1330   if (CI.arg_size() == 4) { // For masked intrinsics.
1331     Value *VecSrc = CI.getOperand(2);
1332     Value *Mask = CI.getOperand(3);
1333     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1334   }
1335   return Res;
1336 }
1337 
1338 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1339                               bool IsSigned) {
1340   Type *Ty = CI.getType();
1341   Value *LHS = CI.getArgOperand(0);
1342   Value *RHS = CI.getArgOperand(1);
1343 
1344   CmpInst::Predicate Pred;
1345   switch (Imm) {
1346   case 0x0:
1347     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1348     break;
1349   case 0x1:
1350     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1351     break;
1352   case 0x2:
1353     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1354     break;
1355   case 0x3:
1356     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1357     break;
1358   case 0x4:
1359     Pred = ICmpInst::ICMP_EQ;
1360     break;
1361   case 0x5:
1362     Pred = ICmpInst::ICMP_NE;
1363     break;
1364   case 0x6:
1365     return Constant::getNullValue(Ty); // FALSE
1366   case 0x7:
1367     return Constant::getAllOnesValue(Ty); // TRUE
1368   default:
1369     llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1370   }
1371 
1372   Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1373   Value *Ext = Builder.CreateSExt(Cmp, Ty);
1374   return Ext;
1375 }
1376 
1377 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
1378                                     bool IsShiftRight, bool ZeroMask) {
1379   Type *Ty = CI.getType();
1380   Value *Op0 = CI.getArgOperand(0);
1381   Value *Op1 = CI.getArgOperand(1);
1382   Value *Amt = CI.getArgOperand(2);
1383 
1384   if (IsShiftRight)
1385     std::swap(Op0, Op1);
1386 
1387   // Amount may be scalar immediate, in which case create a splat vector.
1388   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1389   // we only care about the lowest log2 bits anyway.
1390   if (Amt->getType() != Ty) {
1391     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1392     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1393     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1394   }
1395 
1396   Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1397   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1398   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1399 
1400   unsigned NumArgs = CI.arg_size();
1401   if (NumArgs >= 4) { // For masked intrinsics.
1402     Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1403                     ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1404                                    CI.getArgOperand(0);
1405     Value *Mask = CI.getOperand(NumArgs - 1);
1406     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1407   }
1408   return Res;
1409 }
1410 
1411 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1412                                  Value *Ptr, Value *Data, Value *Mask,
1413                                  bool Aligned) {
1414   // Cast the pointer to the right type.
1415   Ptr = Builder.CreateBitCast(Ptr,
1416                               llvm::PointerType::getUnqual(Data->getType()));
1417   const Align Alignment =
1418       Aligned
1419           ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
1420           : Align(1);
1421 
1422   // If the mask is all ones just emit a regular store.
1423   if (const auto *C = dyn_cast<Constant>(Mask))
1424     if (C->isAllOnesValue())
1425       return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1426 
1427   // Convert the mask from an integer type to a vector of i1.
1428   unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1429   Mask = getX86MaskVec(Builder, Mask, NumElts);
1430   return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1431 }
1432 
1433 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1434                                 Value *Ptr, Value *Passthru, Value *Mask,
1435                                 bool Aligned) {
1436   Type *ValTy = Passthru->getType();
1437   // Cast the pointer to the right type.
1438   Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1439   const Align Alignment =
1440       Aligned
1441           ? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
1442                   8)
1443           : Align(1);
1444 
1445   // If the mask is all ones just emit a regular store.
1446   if (const auto *C = dyn_cast<Constant>(Mask))
1447     if (C->isAllOnesValue())
1448       return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1449 
1450   // Convert the mask from an integer type to a vector of i1.
1451   unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1452   Mask = getX86MaskVec(Builder, Mask, NumElts);
1453   return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1454 }
1455 
1456 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1457   Type *Ty = CI.getType();
1458   Value *Op0 = CI.getArgOperand(0);
1459   Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1460   Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1461   if (CI.arg_size() == 3)
1462     Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1463   return Res;
1464 }
1465 
1466 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1467   Type *Ty = CI.getType();
1468 
1469   // Arguments have a vXi32 type so cast to vXi64.
1470   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1471   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1472 
1473   if (IsSigned) {
1474     // Shift left then arithmetic shift right.
1475     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1476     LHS = Builder.CreateShl(LHS, ShiftAmt);
1477     LHS = Builder.CreateAShr(LHS, ShiftAmt);
1478     RHS = Builder.CreateShl(RHS, ShiftAmt);
1479     RHS = Builder.CreateAShr(RHS, ShiftAmt);
1480   } else {
1481     // Clear the upper bits.
1482     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1483     LHS = Builder.CreateAnd(LHS, Mask);
1484     RHS = Builder.CreateAnd(RHS, Mask);
1485   }
1486 
1487   Value *Res = Builder.CreateMul(LHS, RHS);
1488 
1489   if (CI.arg_size() == 4)
1490     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1491 
1492   return Res;
1493 }
1494 
1495 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1496 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1497                                      Value *Mask) {
1498   unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1499   if (Mask) {
1500     const auto *C = dyn_cast<Constant>(Mask);
1501     if (!C || !C->isAllOnesValue())
1502       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1503   }
1504 
1505   if (NumElts < 8) {
1506     int Indices[8];
1507     for (unsigned i = 0; i != NumElts; ++i)
1508       Indices[i] = i;
1509     for (unsigned i = NumElts; i != 8; ++i)
1510       Indices[i] = NumElts + i % NumElts;
1511     Vec = Builder.CreateShuffleVector(Vec,
1512                                       Constant::getNullValue(Vec->getType()),
1513                                       Indices);
1514   }
1515   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1516 }
1517 
1518 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1519                                    unsigned CC, bool Signed) {
1520   Value *Op0 = CI.getArgOperand(0);
1521   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1522 
1523   Value *Cmp;
1524   if (CC == 3) {
1525     Cmp = Constant::getNullValue(
1526         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1527   } else if (CC == 7) {
1528     Cmp = Constant::getAllOnesValue(
1529         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1530   } else {
1531     ICmpInst::Predicate Pred;
1532     switch (CC) {
1533     default: llvm_unreachable("Unknown condition code");
1534     case 0: Pred = ICmpInst::ICMP_EQ;  break;
1535     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1536     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1537     case 4: Pred = ICmpInst::ICMP_NE;  break;
1538     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1539     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1540     }
1541     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1542   }
1543 
1544   Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1545 
1546   return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1547 }
1548 
1549 // Replace a masked intrinsic with an older unmasked intrinsic.
1550 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1551                                     Intrinsic::ID IID) {
1552   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1553   Value *Rep = Builder.CreateCall(Intrin,
1554                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
1555   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1556 }
1557 
1558 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1559   Value* A = CI.getArgOperand(0);
1560   Value* B = CI.getArgOperand(1);
1561   Value* Src = CI.getArgOperand(2);
1562   Value* Mask = CI.getArgOperand(3);
1563 
1564   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1565   Value* Cmp = Builder.CreateIsNotNull(AndNode);
1566   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1567   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1568   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1569   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1570 }
1571 
1572 
1573 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1574   Value* Op = CI.getArgOperand(0);
1575   Type* ReturnOp = CI.getType();
1576   unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1577   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1578   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1579 }
1580 
1581 // Replace intrinsic with unmasked version and a select.
1582 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1583                                       CallInst &CI, Value *&Rep) {
1584   Name = Name.substr(12); // Remove avx512.mask.
1585 
1586   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1587   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1588   Intrinsic::ID IID;
1589   if (Name.startswith("max.p")) {
1590     if (VecWidth == 128 && EltWidth == 32)
1591       IID = Intrinsic::x86_sse_max_ps;
1592     else if (VecWidth == 128 && EltWidth == 64)
1593       IID = Intrinsic::x86_sse2_max_pd;
1594     else if (VecWidth == 256 && EltWidth == 32)
1595       IID = Intrinsic::x86_avx_max_ps_256;
1596     else if (VecWidth == 256 && EltWidth == 64)
1597       IID = Intrinsic::x86_avx_max_pd_256;
1598     else
1599       llvm_unreachable("Unexpected intrinsic");
1600   } else if (Name.startswith("min.p")) {
1601     if (VecWidth == 128 && EltWidth == 32)
1602       IID = Intrinsic::x86_sse_min_ps;
1603     else if (VecWidth == 128 && EltWidth == 64)
1604       IID = Intrinsic::x86_sse2_min_pd;
1605     else if (VecWidth == 256 && EltWidth == 32)
1606       IID = Intrinsic::x86_avx_min_ps_256;
1607     else if (VecWidth == 256 && EltWidth == 64)
1608       IID = Intrinsic::x86_avx_min_pd_256;
1609     else
1610       llvm_unreachable("Unexpected intrinsic");
1611   } else if (Name.startswith("pshuf.b.")) {
1612     if (VecWidth == 128)
1613       IID = Intrinsic::x86_ssse3_pshuf_b_128;
1614     else if (VecWidth == 256)
1615       IID = Intrinsic::x86_avx2_pshuf_b;
1616     else if (VecWidth == 512)
1617       IID = Intrinsic::x86_avx512_pshuf_b_512;
1618     else
1619       llvm_unreachable("Unexpected intrinsic");
1620   } else if (Name.startswith("pmul.hr.sw.")) {
1621     if (VecWidth == 128)
1622       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1623     else if (VecWidth == 256)
1624       IID = Intrinsic::x86_avx2_pmul_hr_sw;
1625     else if (VecWidth == 512)
1626       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1627     else
1628       llvm_unreachable("Unexpected intrinsic");
1629   } else if (Name.startswith("pmulh.w.")) {
1630     if (VecWidth == 128)
1631       IID = Intrinsic::x86_sse2_pmulh_w;
1632     else if (VecWidth == 256)
1633       IID = Intrinsic::x86_avx2_pmulh_w;
1634     else if (VecWidth == 512)
1635       IID = Intrinsic::x86_avx512_pmulh_w_512;
1636     else
1637       llvm_unreachable("Unexpected intrinsic");
1638   } else if (Name.startswith("pmulhu.w.")) {
1639     if (VecWidth == 128)
1640       IID = Intrinsic::x86_sse2_pmulhu_w;
1641     else if (VecWidth == 256)
1642       IID = Intrinsic::x86_avx2_pmulhu_w;
1643     else if (VecWidth == 512)
1644       IID = Intrinsic::x86_avx512_pmulhu_w_512;
1645     else
1646       llvm_unreachable("Unexpected intrinsic");
1647   } else if (Name.startswith("pmaddw.d.")) {
1648     if (VecWidth == 128)
1649       IID = Intrinsic::x86_sse2_pmadd_wd;
1650     else if (VecWidth == 256)
1651       IID = Intrinsic::x86_avx2_pmadd_wd;
1652     else if (VecWidth == 512)
1653       IID = Intrinsic::x86_avx512_pmaddw_d_512;
1654     else
1655       llvm_unreachable("Unexpected intrinsic");
1656   } else if (Name.startswith("pmaddubs.w.")) {
1657     if (VecWidth == 128)
1658       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1659     else if (VecWidth == 256)
1660       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1661     else if (VecWidth == 512)
1662       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1663     else
1664       llvm_unreachable("Unexpected intrinsic");
1665   } else if (Name.startswith("packsswb.")) {
1666     if (VecWidth == 128)
1667       IID = Intrinsic::x86_sse2_packsswb_128;
1668     else if (VecWidth == 256)
1669       IID = Intrinsic::x86_avx2_packsswb;
1670     else if (VecWidth == 512)
1671       IID = Intrinsic::x86_avx512_packsswb_512;
1672     else
1673       llvm_unreachable("Unexpected intrinsic");
1674   } else if (Name.startswith("packssdw.")) {
1675     if (VecWidth == 128)
1676       IID = Intrinsic::x86_sse2_packssdw_128;
1677     else if (VecWidth == 256)
1678       IID = Intrinsic::x86_avx2_packssdw;
1679     else if (VecWidth == 512)
1680       IID = Intrinsic::x86_avx512_packssdw_512;
1681     else
1682       llvm_unreachable("Unexpected intrinsic");
1683   } else if (Name.startswith("packuswb.")) {
1684     if (VecWidth == 128)
1685       IID = Intrinsic::x86_sse2_packuswb_128;
1686     else if (VecWidth == 256)
1687       IID = Intrinsic::x86_avx2_packuswb;
1688     else if (VecWidth == 512)
1689       IID = Intrinsic::x86_avx512_packuswb_512;
1690     else
1691       llvm_unreachable("Unexpected intrinsic");
1692   } else if (Name.startswith("packusdw.")) {
1693     if (VecWidth == 128)
1694       IID = Intrinsic::x86_sse41_packusdw;
1695     else if (VecWidth == 256)
1696       IID = Intrinsic::x86_avx2_packusdw;
1697     else if (VecWidth == 512)
1698       IID = Intrinsic::x86_avx512_packusdw_512;
1699     else
1700       llvm_unreachable("Unexpected intrinsic");
1701   } else if (Name.startswith("vpermilvar.")) {
1702     if (VecWidth == 128 && EltWidth == 32)
1703       IID = Intrinsic::x86_avx_vpermilvar_ps;
1704     else if (VecWidth == 128 && EltWidth == 64)
1705       IID = Intrinsic::x86_avx_vpermilvar_pd;
1706     else if (VecWidth == 256 && EltWidth == 32)
1707       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1708     else if (VecWidth == 256 && EltWidth == 64)
1709       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1710     else if (VecWidth == 512 && EltWidth == 32)
1711       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1712     else if (VecWidth == 512 && EltWidth == 64)
1713       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1714     else
1715       llvm_unreachable("Unexpected intrinsic");
1716   } else if (Name == "cvtpd2dq.256") {
1717     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1718   } else if (Name == "cvtpd2ps.256") {
1719     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1720   } else if (Name == "cvttpd2dq.256") {
1721     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1722   } else if (Name == "cvttps2dq.128") {
1723     IID = Intrinsic::x86_sse2_cvttps2dq;
1724   } else if (Name == "cvttps2dq.256") {
1725     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1726   } else if (Name.startswith("permvar.")) {
1727     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1728     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1729       IID = Intrinsic::x86_avx2_permps;
1730     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1731       IID = Intrinsic::x86_avx2_permd;
1732     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1733       IID = Intrinsic::x86_avx512_permvar_df_256;
1734     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1735       IID = Intrinsic::x86_avx512_permvar_di_256;
1736     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1737       IID = Intrinsic::x86_avx512_permvar_sf_512;
1738     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1739       IID = Intrinsic::x86_avx512_permvar_si_512;
1740     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1741       IID = Intrinsic::x86_avx512_permvar_df_512;
1742     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1743       IID = Intrinsic::x86_avx512_permvar_di_512;
1744     else if (VecWidth == 128 && EltWidth == 16)
1745       IID = Intrinsic::x86_avx512_permvar_hi_128;
1746     else if (VecWidth == 256 && EltWidth == 16)
1747       IID = Intrinsic::x86_avx512_permvar_hi_256;
1748     else if (VecWidth == 512 && EltWidth == 16)
1749       IID = Intrinsic::x86_avx512_permvar_hi_512;
1750     else if (VecWidth == 128 && EltWidth == 8)
1751       IID = Intrinsic::x86_avx512_permvar_qi_128;
1752     else if (VecWidth == 256 && EltWidth == 8)
1753       IID = Intrinsic::x86_avx512_permvar_qi_256;
1754     else if (VecWidth == 512 && EltWidth == 8)
1755       IID = Intrinsic::x86_avx512_permvar_qi_512;
1756     else
1757       llvm_unreachable("Unexpected intrinsic");
1758   } else if (Name.startswith("dbpsadbw.")) {
1759     if (VecWidth == 128)
1760       IID = Intrinsic::x86_avx512_dbpsadbw_128;
1761     else if (VecWidth == 256)
1762       IID = Intrinsic::x86_avx512_dbpsadbw_256;
1763     else if (VecWidth == 512)
1764       IID = Intrinsic::x86_avx512_dbpsadbw_512;
1765     else
1766       llvm_unreachable("Unexpected intrinsic");
1767   } else if (Name.startswith("pmultishift.qb.")) {
1768     if (VecWidth == 128)
1769       IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1770     else if (VecWidth == 256)
1771       IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1772     else if (VecWidth == 512)
1773       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1774     else
1775       llvm_unreachable("Unexpected intrinsic");
1776   } else if (Name.startswith("conflict.")) {
1777     if (Name[9] == 'd' && VecWidth == 128)
1778       IID = Intrinsic::x86_avx512_conflict_d_128;
1779     else if (Name[9] == 'd' && VecWidth == 256)
1780       IID = Intrinsic::x86_avx512_conflict_d_256;
1781     else if (Name[9] == 'd' && VecWidth == 512)
1782       IID = Intrinsic::x86_avx512_conflict_d_512;
1783     else if (Name[9] == 'q' && VecWidth == 128)
1784       IID = Intrinsic::x86_avx512_conflict_q_128;
1785     else if (Name[9] == 'q' && VecWidth == 256)
1786       IID = Intrinsic::x86_avx512_conflict_q_256;
1787     else if (Name[9] == 'q' && VecWidth == 512)
1788       IID = Intrinsic::x86_avx512_conflict_q_512;
1789     else
1790       llvm_unreachable("Unexpected intrinsic");
1791   } else if (Name.startswith("pavg.")) {
1792     if (Name[5] == 'b' && VecWidth == 128)
1793       IID = Intrinsic::x86_sse2_pavg_b;
1794     else if (Name[5] == 'b' && VecWidth == 256)
1795       IID = Intrinsic::x86_avx2_pavg_b;
1796     else if (Name[5] == 'b' && VecWidth == 512)
1797       IID = Intrinsic::x86_avx512_pavg_b_512;
1798     else if (Name[5] == 'w' && VecWidth == 128)
1799       IID = Intrinsic::x86_sse2_pavg_w;
1800     else if (Name[5] == 'w' && VecWidth == 256)
1801       IID = Intrinsic::x86_avx2_pavg_w;
1802     else if (Name[5] == 'w' && VecWidth == 512)
1803       IID = Intrinsic::x86_avx512_pavg_w_512;
1804     else
1805       llvm_unreachable("Unexpected intrinsic");
1806   } else
1807     return false;
1808 
1809   SmallVector<Value *, 4> Args(CI.args());
1810   Args.pop_back();
1811   Args.pop_back();
1812   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1813                            Args);
1814   unsigned NumArgs = CI.arg_size();
1815   Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1816                       CI.getArgOperand(NumArgs - 2));
1817   return true;
1818 }
1819 
1820 /// Upgrade comment in call to inline asm that represents an objc retain release
1821 /// marker.
1822 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1823   size_t Pos;
1824   if (AsmStr->find("mov\tfp") == 0 &&
1825       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1826       (Pos = AsmStr->find("# marker")) != std::string::npos) {
1827     AsmStr->replace(Pos, 1, ";");
1828   }
1829 }
1830 
1831 static Value *UpgradeARMIntrinsicCall(StringRef Name, CallInst *CI, Function *F,
1832                                       IRBuilder<> &Builder) {
1833   if (Name == "mve.vctp64.old") {
1834     // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
1835     // correct type.
1836     Value *VCTP = Builder.CreateCall(
1837         Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
1838         CI->getArgOperand(0), CI->getName());
1839     Value *C1 = Builder.CreateCall(
1840         Intrinsic::getDeclaration(
1841             F->getParent(), Intrinsic::arm_mve_pred_v2i,
1842             {VectorType::get(Builder.getInt1Ty(), 2, false)}),
1843         VCTP);
1844     return Builder.CreateCall(
1845         Intrinsic::getDeclaration(
1846             F->getParent(), Intrinsic::arm_mve_pred_i2v,
1847             {VectorType::get(Builder.getInt1Ty(), 4, false)}),
1848         C1);
1849   } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
1850              Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
1851              Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
1852              Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
1853              Name == "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
1854              Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
1855              Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
1856              Name == "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
1857              Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
1858              Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
1859              Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
1860              Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
1861              Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
1862              Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
1863     std::vector<Type *> Tys;
1864     unsigned ID = CI->getIntrinsicID();
1865     Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
1866     switch (ID) {
1867     case Intrinsic::arm_mve_mull_int_predicated:
1868     case Intrinsic::arm_mve_vqdmull_predicated:
1869     case Intrinsic::arm_mve_vldr_gather_base_predicated:
1870       Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
1871       break;
1872     case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
1873     case Intrinsic::arm_mve_vstr_scatter_base_predicated:
1874     case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
1875       Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
1876              V2I1Ty};
1877       break;
1878     case Intrinsic::arm_mve_vldr_gather_offset_predicated:
1879       Tys = {CI->getType(), CI->getOperand(0)->getType(),
1880              CI->getOperand(1)->getType(), V2I1Ty};
1881       break;
1882     case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
1883       Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
1884              CI->getOperand(2)->getType(), V2I1Ty};
1885       break;
1886     case Intrinsic::arm_cde_vcx1q_predicated:
1887     case Intrinsic::arm_cde_vcx1qa_predicated:
1888     case Intrinsic::arm_cde_vcx2q_predicated:
1889     case Intrinsic::arm_cde_vcx2qa_predicated:
1890     case Intrinsic::arm_cde_vcx3q_predicated:
1891     case Intrinsic::arm_cde_vcx3qa_predicated:
1892       Tys = {CI->getOperand(1)->getType(), V2I1Ty};
1893       break;
1894     default:
1895       llvm_unreachable("Unhandled Intrinsic!");
1896     }
1897 
1898     std::vector<Value *> Ops;
1899     for (Value *Op : CI->args()) {
1900       Type *Ty = Op->getType();
1901       if (Ty->getScalarSizeInBits() == 1) {
1902         Value *C1 = Builder.CreateCall(
1903             Intrinsic::getDeclaration(
1904                 F->getParent(), Intrinsic::arm_mve_pred_v2i,
1905                 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
1906             Op);
1907         Op = Builder.CreateCall(
1908             Intrinsic::getDeclaration(F->getParent(),
1909                                       Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
1910             C1);
1911       }
1912       Ops.push_back(Op);
1913     }
1914 
1915     Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1916     return Builder.CreateCall(Fn, Ops, CI->getName());
1917   }
1918   llvm_unreachable("Unknown function for ARM CallInst upgrade.");
1919 }
1920 
1921 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1922 /// provided to seamlessly integrate with existing context.
1923 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1924   Function *F = CI->getCalledFunction();
1925   LLVMContext &C = CI->getContext();
1926   IRBuilder<> Builder(C);
1927   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1928 
1929   assert(F && "Intrinsic call is not direct?");
1930 
1931   if (!NewFn) {
1932     // Get the Function's name.
1933     StringRef Name = F->getName();
1934 
1935     assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1936     Name = Name.substr(5);
1937 
1938     bool IsX86 = Name.startswith("x86.");
1939     if (IsX86)
1940       Name = Name.substr(4);
1941     bool IsNVVM = Name.startswith("nvvm.");
1942     if (IsNVVM)
1943       Name = Name.substr(5);
1944     bool IsARM = Name.startswith("arm.");
1945     if (IsARM)
1946       Name = Name.substr(4);
1947 
1948     if (IsX86 && Name.startswith("sse4a.movnt.")) {
1949       Module *M = F->getParent();
1950       SmallVector<Metadata *, 1> Elts;
1951       Elts.push_back(
1952           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1953       MDNode *Node = MDNode::get(C, Elts);
1954 
1955       Value *Arg0 = CI->getArgOperand(0);
1956       Value *Arg1 = CI->getArgOperand(1);
1957 
1958       // Nontemporal (unaligned) store of the 0'th element of the float/double
1959       // vector.
1960       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1961       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1962       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1963       Value *Extract =
1964           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1965 
1966       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
1967       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1968 
1969       // Remove intrinsic.
1970       CI->eraseFromParent();
1971       return;
1972     }
1973 
1974     if (IsX86 && (Name.startswith("avx.movnt.") ||
1975                   Name.startswith("avx512.storent."))) {
1976       Module *M = F->getParent();
1977       SmallVector<Metadata *, 1> Elts;
1978       Elts.push_back(
1979           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1980       MDNode *Node = MDNode::get(C, Elts);
1981 
1982       Value *Arg0 = CI->getArgOperand(0);
1983       Value *Arg1 = CI->getArgOperand(1);
1984 
1985       // Convert the type of the pointer to a pointer to the stored type.
1986       Value *BC = Builder.CreateBitCast(Arg0,
1987                                         PointerType::getUnqual(Arg1->getType()),
1988                                         "cast");
1989       StoreInst *SI = Builder.CreateAlignedStore(
1990           Arg1, BC,
1991           Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
1992       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1993 
1994       // Remove intrinsic.
1995       CI->eraseFromParent();
1996       return;
1997     }
1998 
1999     if (IsX86 && Name == "sse2.storel.dq") {
2000       Value *Arg0 = CI->getArgOperand(0);
2001       Value *Arg1 = CI->getArgOperand(1);
2002 
2003       auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2004       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2005       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2006       Value *BC = Builder.CreateBitCast(Arg0,
2007                                         PointerType::getUnqual(Elt->getType()),
2008                                         "cast");
2009       Builder.CreateAlignedStore(Elt, BC, Align(1));
2010 
2011       // Remove intrinsic.
2012       CI->eraseFromParent();
2013       return;
2014     }
2015 
2016     if (IsX86 && (Name.startswith("sse.storeu.") ||
2017                   Name.startswith("sse2.storeu.") ||
2018                   Name.startswith("avx.storeu."))) {
2019       Value *Arg0 = CI->getArgOperand(0);
2020       Value *Arg1 = CI->getArgOperand(1);
2021 
2022       Arg0 = Builder.CreateBitCast(Arg0,
2023                                    PointerType::getUnqual(Arg1->getType()),
2024                                    "cast");
2025       Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2026 
2027       // Remove intrinsic.
2028       CI->eraseFromParent();
2029       return;
2030     }
2031 
2032     if (IsX86 && Name == "avx512.mask.store.ss") {
2033       Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2034       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2035                          Mask, false);
2036 
2037       // Remove intrinsic.
2038       CI->eraseFromParent();
2039       return;
2040     }
2041 
2042     if (IsX86 && (Name.startswith("avx512.mask.store"))) {
2043       // "avx512.mask.storeu." or "avx512.mask.store."
2044       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2045       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2046                          CI->getArgOperand(2), Aligned);
2047 
2048       // Remove intrinsic.
2049       CI->eraseFromParent();
2050       return;
2051     }
2052 
2053     Value *Rep;
2054     // Upgrade packed integer vector compare intrinsics to compare instructions.
2055     if (IsX86 && (Name.startswith("sse2.pcmp") ||
2056                   Name.startswith("avx2.pcmp"))) {
2057       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2058       bool CmpEq = Name[9] == 'e';
2059       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2060                                CI->getArgOperand(0), CI->getArgOperand(1));
2061       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2062     } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
2063       Type *ExtTy = Type::getInt32Ty(C);
2064       if (CI->getOperand(0)->getType()->isIntegerTy(8))
2065         ExtTy = Type::getInt64Ty(C);
2066       unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2067                          ExtTy->getPrimitiveSizeInBits();
2068       Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2069       Rep = Builder.CreateVectorSplat(NumElts, Rep);
2070     } else if (IsX86 && (Name == "sse.sqrt.ss" ||
2071                          Name == "sse2.sqrt.sd")) {
2072       Value *Vec = CI->getArgOperand(0);
2073       Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2074       Function *Intr = Intrinsic::getDeclaration(F->getParent(),
2075                                                  Intrinsic::sqrt, Elt0->getType());
2076       Elt0 = Builder.CreateCall(Intr, Elt0);
2077       Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2078     } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
2079                          Name.startswith("sse2.sqrt.p") ||
2080                          Name.startswith("sse.sqrt.p"))) {
2081       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2082                                                          Intrinsic::sqrt,
2083                                                          CI->getType()),
2084                                {CI->getArgOperand(0)});
2085     } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
2086       if (CI->arg_size() == 4 &&
2087           (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2088            cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2089         Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2090                                             : Intrinsic::x86_avx512_sqrt_pd_512;
2091 
2092         Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
2093         Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2094                                                            IID), Args);
2095       } else {
2096         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2097                                                            Intrinsic::sqrt,
2098                                                            CI->getType()),
2099                                  {CI->getArgOperand(0)});
2100       }
2101       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2102                           CI->getArgOperand(1));
2103     } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
2104                          Name.startswith("avx512.ptestnm"))) {
2105       Value *Op0 = CI->getArgOperand(0);
2106       Value *Op1 = CI->getArgOperand(1);
2107       Value *Mask = CI->getArgOperand(2);
2108       Rep = Builder.CreateAnd(Op0, Op1);
2109       llvm::Type *Ty = Op0->getType();
2110       Value *Zero = llvm::Constant::getNullValue(Ty);
2111       ICmpInst::Predicate Pred =
2112         Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2113       Rep = Builder.CreateICmp(Pred, Rep, Zero);
2114       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
2115     } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
2116       unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2117                              ->getNumElements();
2118       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2119       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2120                           CI->getArgOperand(1));
2121     } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
2122       unsigned NumElts = CI->getType()->getScalarSizeInBits();
2123       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2124       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2125       int Indices[64];
2126       for (unsigned i = 0; i != NumElts; ++i)
2127         Indices[i] = i;
2128 
2129       // First extract half of each vector. This gives better codegen than
2130       // doing it in a single shuffle.
2131       LHS = Builder.CreateShuffleVector(LHS, LHS,
2132                                         makeArrayRef(Indices, NumElts / 2));
2133       RHS = Builder.CreateShuffleVector(RHS, RHS,
2134                                         makeArrayRef(Indices, NumElts / 2));
2135       // Concat the vectors.
2136       // NOTE: Operands have to be swapped to match intrinsic definition.
2137       Rep = Builder.CreateShuffleVector(RHS, LHS,
2138                                         makeArrayRef(Indices, NumElts));
2139       Rep = Builder.CreateBitCast(Rep, CI->getType());
2140     } else if (IsX86 && Name == "avx512.kand.w") {
2141       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2142       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2143       Rep = Builder.CreateAnd(LHS, RHS);
2144       Rep = Builder.CreateBitCast(Rep, CI->getType());
2145     } else if (IsX86 && Name == "avx512.kandn.w") {
2146       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2147       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2148       LHS = Builder.CreateNot(LHS);
2149       Rep = Builder.CreateAnd(LHS, RHS);
2150       Rep = Builder.CreateBitCast(Rep, CI->getType());
2151     } else if (IsX86 && Name == "avx512.kor.w") {
2152       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2153       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2154       Rep = Builder.CreateOr(LHS, RHS);
2155       Rep = Builder.CreateBitCast(Rep, CI->getType());
2156     } else if (IsX86 && Name == "avx512.kxor.w") {
2157       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2158       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2159       Rep = Builder.CreateXor(LHS, RHS);
2160       Rep = Builder.CreateBitCast(Rep, CI->getType());
2161     } else if (IsX86 && Name == "avx512.kxnor.w") {
2162       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2163       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2164       LHS = Builder.CreateNot(LHS);
2165       Rep = Builder.CreateXor(LHS, RHS);
2166       Rep = Builder.CreateBitCast(Rep, CI->getType());
2167     } else if (IsX86 && Name == "avx512.knot.w") {
2168       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2169       Rep = Builder.CreateNot(Rep);
2170       Rep = Builder.CreateBitCast(Rep, CI->getType());
2171     } else if (IsX86 &&
2172                (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2173       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2174       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2175       Rep = Builder.CreateOr(LHS, RHS);
2176       Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2177       Value *C;
2178       if (Name[14] == 'c')
2179         C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2180       else
2181         C = ConstantInt::getNullValue(Builder.getInt16Ty());
2182       Rep = Builder.CreateICmpEQ(Rep, C);
2183       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2184     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2185                          Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2186                          Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2187                          Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2188       Type *I32Ty = Type::getInt32Ty(C);
2189       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2190                                                  ConstantInt::get(I32Ty, 0));
2191       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2192                                                  ConstantInt::get(I32Ty, 0));
2193       Value *EltOp;
2194       if (Name.contains(".add."))
2195         EltOp = Builder.CreateFAdd(Elt0, Elt1);
2196       else if (Name.contains(".sub."))
2197         EltOp = Builder.CreateFSub(Elt0, Elt1);
2198       else if (Name.contains(".mul."))
2199         EltOp = Builder.CreateFMul(Elt0, Elt1);
2200       else
2201         EltOp = Builder.CreateFDiv(Elt0, Elt1);
2202       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2203                                         ConstantInt::get(I32Ty, 0));
2204     } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
2205       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2206       bool CmpEq = Name[16] == 'e';
2207       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2208     } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
2209       Type *OpTy = CI->getArgOperand(0)->getType();
2210       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2211       Intrinsic::ID IID;
2212       switch (VecWidth) {
2213       default: llvm_unreachable("Unexpected intrinsic");
2214       case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2215       case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2216       case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2217       }
2218 
2219       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2220                                { CI->getOperand(0), CI->getArgOperand(1) });
2221       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2222     } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
2223       Type *OpTy = CI->getArgOperand(0)->getType();
2224       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2225       unsigned EltWidth = OpTy->getScalarSizeInBits();
2226       Intrinsic::ID IID;
2227       if (VecWidth == 128 && EltWidth == 32)
2228         IID = Intrinsic::x86_avx512_fpclass_ps_128;
2229       else if (VecWidth == 256 && EltWidth == 32)
2230         IID = Intrinsic::x86_avx512_fpclass_ps_256;
2231       else if (VecWidth == 512 && EltWidth == 32)
2232         IID = Intrinsic::x86_avx512_fpclass_ps_512;
2233       else if (VecWidth == 128 && EltWidth == 64)
2234         IID = Intrinsic::x86_avx512_fpclass_pd_128;
2235       else if (VecWidth == 256 && EltWidth == 64)
2236         IID = Intrinsic::x86_avx512_fpclass_pd_256;
2237       else if (VecWidth == 512 && EltWidth == 64)
2238         IID = Intrinsic::x86_avx512_fpclass_pd_512;
2239       else
2240         llvm_unreachable("Unexpected intrinsic");
2241 
2242       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2243                                { CI->getOperand(0), CI->getArgOperand(1) });
2244       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2245     } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
2246       SmallVector<Value *, 4> Args(CI->args());
2247       Type *OpTy = Args[0]->getType();
2248       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2249       unsigned EltWidth = OpTy->getScalarSizeInBits();
2250       Intrinsic::ID IID;
2251       if (VecWidth == 128 && EltWidth == 32)
2252         IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2253       else if (VecWidth == 256 && EltWidth == 32)
2254         IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2255       else if (VecWidth == 512 && EltWidth == 32)
2256         IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2257       else if (VecWidth == 128 && EltWidth == 64)
2258         IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2259       else if (VecWidth == 256 && EltWidth == 64)
2260         IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2261       else if (VecWidth == 512 && EltWidth == 64)
2262         IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2263       else
2264         llvm_unreachable("Unexpected intrinsic");
2265 
2266       Value *Mask = Constant::getAllOnesValue(CI->getType());
2267       if (VecWidth == 512)
2268         std::swap(Mask, Args.back());
2269       Args.push_back(Mask);
2270 
2271       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2272                                Args);
2273     } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
2274       // Integer compare intrinsics.
2275       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2276       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2277     } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2278       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2279       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2280     } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2281                          Name.startswith("avx512.cvtw2mask.") ||
2282                          Name.startswith("avx512.cvtd2mask.") ||
2283                          Name.startswith("avx512.cvtq2mask."))) {
2284       Value *Op = CI->getArgOperand(0);
2285       Value *Zero = llvm::Constant::getNullValue(Op->getType());
2286       Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2287       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2288     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2289                         Name == "ssse3.pabs.w.128" ||
2290                         Name == "ssse3.pabs.d.128" ||
2291                         Name.startswith("avx2.pabs") ||
2292                         Name.startswith("avx512.mask.pabs"))) {
2293       Rep = upgradeAbs(Builder, *CI);
2294     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2295                          Name == "sse2.pmaxs.w" ||
2296                          Name == "sse41.pmaxsd" ||
2297                          Name.startswith("avx2.pmaxs") ||
2298                          Name.startswith("avx512.mask.pmaxs"))) {
2299       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2300     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2301                          Name == "sse41.pmaxuw" ||
2302                          Name == "sse41.pmaxud" ||
2303                          Name.startswith("avx2.pmaxu") ||
2304                          Name.startswith("avx512.mask.pmaxu"))) {
2305       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2306     } else if (IsX86 && (Name == "sse41.pminsb" ||
2307                          Name == "sse2.pmins.w" ||
2308                          Name == "sse41.pminsd" ||
2309                          Name.startswith("avx2.pmins") ||
2310                          Name.startswith("avx512.mask.pmins"))) {
2311       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2312     } else if (IsX86 && (Name == "sse2.pminu.b" ||
2313                          Name == "sse41.pminuw" ||
2314                          Name == "sse41.pminud" ||
2315                          Name.startswith("avx2.pminu") ||
2316                          Name.startswith("avx512.mask.pminu"))) {
2317       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2318     } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2319                          Name == "avx2.pmulu.dq" ||
2320                          Name == "avx512.pmulu.dq.512" ||
2321                          Name.startswith("avx512.mask.pmulu.dq."))) {
2322       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2323     } else if (IsX86 && (Name == "sse41.pmuldq" ||
2324                          Name == "avx2.pmul.dq" ||
2325                          Name == "avx512.pmul.dq.512" ||
2326                          Name.startswith("avx512.mask.pmul.dq."))) {
2327       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2328     } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2329                          Name == "sse2.cvtsi2sd" ||
2330                          Name == "sse.cvtsi642ss" ||
2331                          Name == "sse2.cvtsi642sd")) {
2332       Rep = Builder.CreateSIToFP(
2333           CI->getArgOperand(1),
2334           cast<VectorType>(CI->getType())->getElementType());
2335       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2336     } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2337       Rep = Builder.CreateUIToFP(
2338           CI->getArgOperand(1),
2339           cast<VectorType>(CI->getType())->getElementType());
2340       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2341     } else if (IsX86 && Name == "sse2.cvtss2sd") {
2342       Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2343       Rep = Builder.CreateFPExt(
2344           Rep, cast<VectorType>(CI->getType())->getElementType());
2345       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2346     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2347                          Name == "sse2.cvtdq2ps" ||
2348                          Name == "avx.cvtdq2.pd.256" ||
2349                          Name == "avx.cvtdq2.ps.256" ||
2350                          Name.startswith("avx512.mask.cvtdq2pd.") ||
2351                          Name.startswith("avx512.mask.cvtudq2pd.") ||
2352                          Name.startswith("avx512.mask.cvtdq2ps.") ||
2353                          Name.startswith("avx512.mask.cvtudq2ps.") ||
2354                          Name.startswith("avx512.mask.cvtqq2pd.") ||
2355                          Name.startswith("avx512.mask.cvtuqq2pd.") ||
2356                          Name == "avx512.mask.cvtqq2ps.256" ||
2357                          Name == "avx512.mask.cvtqq2ps.512" ||
2358                          Name == "avx512.mask.cvtuqq2ps.256" ||
2359                          Name == "avx512.mask.cvtuqq2ps.512" ||
2360                          Name == "sse2.cvtps2pd" ||
2361                          Name == "avx.cvt.ps2.pd.256" ||
2362                          Name == "avx512.mask.cvtps2pd.128" ||
2363                          Name == "avx512.mask.cvtps2pd.256")) {
2364       auto *DstTy = cast<FixedVectorType>(CI->getType());
2365       Rep = CI->getArgOperand(0);
2366       auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2367 
2368       unsigned NumDstElts = DstTy->getNumElements();
2369       if (NumDstElts < SrcTy->getNumElements()) {
2370         assert(NumDstElts == 2 && "Unexpected vector size");
2371         Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2372       }
2373 
2374       bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2375       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2376       if (IsPS2PD)
2377         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2378       else if (CI->arg_size() == 4 &&
2379                (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2380                 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2381         Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2382                                        : Intrinsic::x86_avx512_sitofp_round;
2383         Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2384                                                 { DstTy, SrcTy });
2385         Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2386       } else {
2387         Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2388                          : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2389       }
2390 
2391       if (CI->arg_size() >= 3)
2392         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2393                             CI->getArgOperand(1));
2394     } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
2395                          Name.startswith("vcvtph2ps."))) {
2396       auto *DstTy = cast<FixedVectorType>(CI->getType());
2397       Rep = CI->getArgOperand(0);
2398       auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2399       unsigned NumDstElts = DstTy->getNumElements();
2400       if (NumDstElts != SrcTy->getNumElements()) {
2401         assert(NumDstElts == 4 && "Unexpected vector size");
2402         Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2403       }
2404       Rep = Builder.CreateBitCast(
2405           Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2406       Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2407       if (CI->arg_size() >= 3)
2408         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2409                             CI->getArgOperand(1));
2410     } else if (IsX86 && Name.startswith("avx512.mask.load")) {
2411       // "avx512.mask.loadu." or "avx512.mask.load."
2412       bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2413       Rep =
2414           UpgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2415                             CI->getArgOperand(2), Aligned);
2416     } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2417       auto *ResultTy = cast<FixedVectorType>(CI->getType());
2418       Type *PtrTy = ResultTy->getElementType();
2419 
2420       // Cast the pointer to element type.
2421       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2422                                          llvm::PointerType::getUnqual(PtrTy));
2423 
2424       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2425                                      ResultTy->getNumElements());
2426 
2427       Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2428                                                 Intrinsic::masked_expandload,
2429                                                 ResultTy);
2430       Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2431     } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2432       auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2433       Type *PtrTy = ResultTy->getElementType();
2434 
2435       // Cast the pointer to element type.
2436       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2437                                          llvm::PointerType::getUnqual(PtrTy));
2438 
2439       Value *MaskVec =
2440           getX86MaskVec(Builder, CI->getArgOperand(2),
2441                         cast<FixedVectorType>(ResultTy)->getNumElements());
2442 
2443       Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2444                                                 Intrinsic::masked_compressstore,
2445                                                 ResultTy);
2446       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2447     } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2448                          Name.startswith("avx512.mask.expand."))) {
2449       auto *ResultTy = cast<FixedVectorType>(CI->getType());
2450 
2451       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2452                                      ResultTy->getNumElements());
2453 
2454       bool IsCompress = Name[12] == 'c';
2455       Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2456                                      : Intrinsic::x86_avx512_mask_expand;
2457       Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2458       Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2459                                        MaskVec });
2460     } else if (IsX86 && Name.startswith("xop.vpcom")) {
2461       bool IsSigned;
2462       if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2463           Name.endswith("uq"))
2464         IsSigned = false;
2465       else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2466                Name.endswith("q"))
2467         IsSigned = true;
2468       else
2469         llvm_unreachable("Unknown suffix");
2470 
2471       unsigned Imm;
2472       if (CI->arg_size() == 3) {
2473         Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2474       } else {
2475         Name = Name.substr(9); // strip off "xop.vpcom"
2476         if (Name.startswith("lt"))
2477           Imm = 0;
2478         else if (Name.startswith("le"))
2479           Imm = 1;
2480         else if (Name.startswith("gt"))
2481           Imm = 2;
2482         else if (Name.startswith("ge"))
2483           Imm = 3;
2484         else if (Name.startswith("eq"))
2485           Imm = 4;
2486         else if (Name.startswith("ne"))
2487           Imm = 5;
2488         else if (Name.startswith("false"))
2489           Imm = 6;
2490         else if (Name.startswith("true"))
2491           Imm = 7;
2492         else
2493           llvm_unreachable("Unknown condition");
2494       }
2495 
2496       Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2497     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2498       Value *Sel = CI->getArgOperand(2);
2499       Value *NotSel = Builder.CreateNot(Sel);
2500       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2501       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2502       Rep = Builder.CreateOr(Sel0, Sel1);
2503     } else if (IsX86 && (Name.startswith("xop.vprot") ||
2504                          Name.startswith("avx512.prol") ||
2505                          Name.startswith("avx512.mask.prol"))) {
2506       Rep = upgradeX86Rotate(Builder, *CI, false);
2507     } else if (IsX86 && (Name.startswith("avx512.pror") ||
2508                          Name.startswith("avx512.mask.pror"))) {
2509       Rep = upgradeX86Rotate(Builder, *CI, true);
2510     } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2511                          Name.startswith("avx512.mask.vpshld") ||
2512                          Name.startswith("avx512.maskz.vpshld"))) {
2513       bool ZeroMask = Name[11] == 'z';
2514       Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2515     } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2516                          Name.startswith("avx512.mask.vpshrd") ||
2517                          Name.startswith("avx512.maskz.vpshrd"))) {
2518       bool ZeroMask = Name[11] == 'z';
2519       Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2520     } else if (IsX86 && Name == "sse42.crc32.64.8") {
2521       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2522                                                Intrinsic::x86_sse42_crc32_32_8);
2523       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2524       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2525       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2526     } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2527                          Name.startswith("avx512.vbroadcast.s"))) {
2528       // Replace broadcasts with a series of insertelements.
2529       auto *VecTy = cast<FixedVectorType>(CI->getType());
2530       Type *EltTy = VecTy->getElementType();
2531       unsigned EltNum = VecTy->getNumElements();
2532       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2533                                           EltTy->getPointerTo());
2534       Value *Load = Builder.CreateLoad(EltTy, Cast);
2535       Type *I32Ty = Type::getInt32Ty(C);
2536       Rep = PoisonValue::get(VecTy);
2537       for (unsigned I = 0; I < EltNum; ++I)
2538         Rep = Builder.CreateInsertElement(Rep, Load,
2539                                           ConstantInt::get(I32Ty, I));
2540     } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2541                          Name.startswith("sse41.pmovzx") ||
2542                          Name.startswith("avx2.pmovsx") ||
2543                          Name.startswith("avx2.pmovzx") ||
2544                          Name.startswith("avx512.mask.pmovsx") ||
2545                          Name.startswith("avx512.mask.pmovzx"))) {
2546       auto *DstTy = cast<FixedVectorType>(CI->getType());
2547       unsigned NumDstElts = DstTy->getNumElements();
2548 
2549       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2550       SmallVector<int, 8> ShuffleMask(NumDstElts);
2551       for (unsigned i = 0; i != NumDstElts; ++i)
2552         ShuffleMask[i] = i;
2553 
2554       Value *SV =
2555           Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2556 
2557       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2558       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2559                    : Builder.CreateZExt(SV, DstTy);
2560       // If there are 3 arguments, it's a masked intrinsic so we need a select.
2561       if (CI->arg_size() == 3)
2562         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2563                             CI->getArgOperand(1));
2564     } else if (Name == "avx512.mask.pmov.qd.256" ||
2565                Name == "avx512.mask.pmov.qd.512" ||
2566                Name == "avx512.mask.pmov.wb.256" ||
2567                Name == "avx512.mask.pmov.wb.512") {
2568       Type *Ty = CI->getArgOperand(1)->getType();
2569       Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2570       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2571                           CI->getArgOperand(1));
2572     } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2573                          Name == "avx2.vbroadcasti128")) {
2574       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2575       Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2576       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2577       auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2578       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2579                                             PointerType::getUnqual(VT));
2580       Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2581       if (NumSrcElts == 2)
2582         Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2583       else
2584         Rep = Builder.CreateShuffleVector(
2585             Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2586     } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2587                          Name.startswith("avx512.mask.shuf.f"))) {
2588       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2589       Type *VT = CI->getType();
2590       unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2591       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2592       unsigned ControlBitsMask = NumLanes - 1;
2593       unsigned NumControlBits = NumLanes / 2;
2594       SmallVector<int, 8> ShuffleMask(0);
2595 
2596       for (unsigned l = 0; l != NumLanes; ++l) {
2597         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2598         // We actually need the other source.
2599         if (l >= NumLanes / 2)
2600           LaneMask += NumLanes;
2601         for (unsigned i = 0; i != NumElementsInLane; ++i)
2602           ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2603       }
2604       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2605                                         CI->getArgOperand(1), ShuffleMask);
2606       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2607                           CI->getArgOperand(3));
2608     }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2609                          Name.startswith("avx512.mask.broadcasti"))) {
2610       unsigned NumSrcElts =
2611           cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2612               ->getNumElements();
2613       unsigned NumDstElts =
2614           cast<FixedVectorType>(CI->getType())->getNumElements();
2615 
2616       SmallVector<int, 8> ShuffleMask(NumDstElts);
2617       for (unsigned i = 0; i != NumDstElts; ++i)
2618         ShuffleMask[i] = i % NumSrcElts;
2619 
2620       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2621                                         CI->getArgOperand(0),
2622                                         ShuffleMask);
2623       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2624                           CI->getArgOperand(1));
2625     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2626                          Name.startswith("avx2.vbroadcast") ||
2627                          Name.startswith("avx512.pbroadcast") ||
2628                          Name.startswith("avx512.mask.broadcast.s"))) {
2629       // Replace vp?broadcasts with a vector shuffle.
2630       Value *Op = CI->getArgOperand(0);
2631       ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2632       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2633       SmallVector<int, 8> M;
2634       ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
2635       Rep = Builder.CreateShuffleVector(Op, M);
2636 
2637       if (CI->arg_size() == 3)
2638         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2639                             CI->getArgOperand(1));
2640     } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2641                          Name.startswith("avx2.padds.") ||
2642                          Name.startswith("avx512.padds.") ||
2643                          Name.startswith("avx512.mask.padds."))) {
2644       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2645     } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
2646                          Name.startswith("avx2.psubs.") ||
2647                          Name.startswith("avx512.psubs.") ||
2648                          Name.startswith("avx512.mask.psubs."))) {
2649       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2650     } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2651                          Name.startswith("avx2.paddus.") ||
2652                          Name.startswith("avx512.mask.paddus."))) {
2653       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
2654     } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
2655                          Name.startswith("avx2.psubus.") ||
2656                          Name.startswith("avx512.mask.psubus."))) {
2657       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
2658     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2659       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2660                                       CI->getArgOperand(1),
2661                                       CI->getArgOperand(2),
2662                                       CI->getArgOperand(3),
2663                                       CI->getArgOperand(4),
2664                                       false);
2665     } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2666       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2667                                       CI->getArgOperand(1),
2668                                       CI->getArgOperand(2),
2669                                       CI->getArgOperand(3),
2670                                       CI->getArgOperand(4),
2671                                       true);
2672     } else if (IsX86 && (Name == "sse2.psll.dq" ||
2673                          Name == "avx2.psll.dq")) {
2674       // 128/256-bit shift left specified in bits.
2675       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2676       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2677                                        Shift / 8); // Shift is in bits.
2678     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2679                          Name == "avx2.psrl.dq")) {
2680       // 128/256-bit shift right specified in bits.
2681       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2682       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2683                                        Shift / 8); // Shift is in bits.
2684     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2685                          Name == "avx2.psll.dq.bs" ||
2686                          Name == "avx512.psll.dq.512")) {
2687       // 128/256/512-bit shift left specified in bytes.
2688       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2689       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2690     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2691                          Name == "avx2.psrl.dq.bs" ||
2692                          Name == "avx512.psrl.dq.512")) {
2693       // 128/256/512-bit shift right specified in bytes.
2694       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2695       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2696     } else if (IsX86 && (Name == "sse41.pblendw" ||
2697                          Name.startswith("sse41.blendp") ||
2698                          Name.startswith("avx.blend.p") ||
2699                          Name == "avx2.pblendw" ||
2700                          Name.startswith("avx2.pblendd."))) {
2701       Value *Op0 = CI->getArgOperand(0);
2702       Value *Op1 = CI->getArgOperand(1);
2703       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2704       auto *VecTy = cast<FixedVectorType>(CI->getType());
2705       unsigned NumElts = VecTy->getNumElements();
2706 
2707       SmallVector<int, 16> Idxs(NumElts);
2708       for (unsigned i = 0; i != NumElts; ++i)
2709         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2710 
2711       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2712     } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2713                          Name == "avx2.vinserti128" ||
2714                          Name.startswith("avx512.mask.insert"))) {
2715       Value *Op0 = CI->getArgOperand(0);
2716       Value *Op1 = CI->getArgOperand(1);
2717       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2718       unsigned DstNumElts =
2719           cast<FixedVectorType>(CI->getType())->getNumElements();
2720       unsigned SrcNumElts =
2721           cast<FixedVectorType>(Op1->getType())->getNumElements();
2722       unsigned Scale = DstNumElts / SrcNumElts;
2723 
2724       // Mask off the high bits of the immediate value; hardware ignores those.
2725       Imm = Imm % Scale;
2726 
2727       // Extend the second operand into a vector the size of the destination.
2728       SmallVector<int, 8> Idxs(DstNumElts);
2729       for (unsigned i = 0; i != SrcNumElts; ++i)
2730         Idxs[i] = i;
2731       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2732         Idxs[i] = SrcNumElts;
2733       Rep = Builder.CreateShuffleVector(Op1, Idxs);
2734 
2735       // Insert the second operand into the first operand.
2736 
2737       // Note that there is no guarantee that instruction lowering will actually
2738       // produce a vinsertf128 instruction for the created shuffles. In
2739       // particular, the 0 immediate case involves no lane changes, so it can
2740       // be handled as a blend.
2741 
2742       // Example of shuffle mask for 32-bit elements:
2743       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
2744       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
2745 
2746       // First fill with identify mask.
2747       for (unsigned i = 0; i != DstNumElts; ++i)
2748         Idxs[i] = i;
2749       // Then replace the elements where we need to insert.
2750       for (unsigned i = 0; i != SrcNumElts; ++i)
2751         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2752       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2753 
2754       // If the intrinsic has a mask operand, handle that.
2755       if (CI->arg_size() == 5)
2756         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2757                             CI->getArgOperand(3));
2758     } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2759                          Name == "avx2.vextracti128" ||
2760                          Name.startswith("avx512.mask.vextract"))) {
2761       Value *Op0 = CI->getArgOperand(0);
2762       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2763       unsigned DstNumElts =
2764           cast<FixedVectorType>(CI->getType())->getNumElements();
2765       unsigned SrcNumElts =
2766           cast<FixedVectorType>(Op0->getType())->getNumElements();
2767       unsigned Scale = SrcNumElts / DstNumElts;
2768 
2769       // Mask off the high bits of the immediate value; hardware ignores those.
2770       Imm = Imm % Scale;
2771 
2772       // Get indexes for the subvector of the input vector.
2773       SmallVector<int, 8> Idxs(DstNumElts);
2774       for (unsigned i = 0; i != DstNumElts; ++i) {
2775         Idxs[i] = i + (Imm * DstNumElts);
2776       }
2777       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2778 
2779       // If the intrinsic has a mask operand, handle that.
2780       if (CI->arg_size() == 4)
2781         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2782                             CI->getArgOperand(2));
2783     } else if (!IsX86 && Name == "stackprotectorcheck") {
2784       Rep = nullptr;
2785     } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2786                          Name.startswith("avx512.mask.perm.di."))) {
2787       Value *Op0 = CI->getArgOperand(0);
2788       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2789       auto *VecTy = cast<FixedVectorType>(CI->getType());
2790       unsigned NumElts = VecTy->getNumElements();
2791 
2792       SmallVector<int, 8> Idxs(NumElts);
2793       for (unsigned i = 0; i != NumElts; ++i)
2794         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2795 
2796       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2797 
2798       if (CI->arg_size() == 4)
2799         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2800                             CI->getArgOperand(2));
2801     } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2802                          Name == "avx2.vperm2i128")) {
2803       // The immediate permute control byte looks like this:
2804       //    [1:0] - select 128 bits from sources for low half of destination
2805       //    [2]   - ignore
2806       //    [3]   - zero low half of destination
2807       //    [5:4] - select 128 bits from sources for high half of destination
2808       //    [6]   - ignore
2809       //    [7]   - zero high half of destination
2810 
2811       uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2812 
2813       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2814       unsigned HalfSize = NumElts / 2;
2815       SmallVector<int, 8> ShuffleMask(NumElts);
2816 
2817       // Determine which operand(s) are actually in use for this instruction.
2818       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2819       Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2820 
2821       // If needed, replace operands based on zero mask.
2822       V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2823       V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2824 
2825       // Permute low half of result.
2826       unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2827       for (unsigned i = 0; i < HalfSize; ++i)
2828         ShuffleMask[i] = StartIndex + i;
2829 
2830       // Permute high half of result.
2831       StartIndex = (Imm & 0x10) ? HalfSize : 0;
2832       for (unsigned i = 0; i < HalfSize; ++i)
2833         ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2834 
2835       Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2836 
2837     } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2838                          Name == "sse2.pshuf.d" ||
2839                          Name.startswith("avx512.mask.vpermil.p") ||
2840                          Name.startswith("avx512.mask.pshuf.d."))) {
2841       Value *Op0 = CI->getArgOperand(0);
2842       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2843       auto *VecTy = cast<FixedVectorType>(CI->getType());
2844       unsigned NumElts = VecTy->getNumElements();
2845       // Calculate the size of each index in the immediate.
2846       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2847       unsigned IdxMask = ((1 << IdxSize) - 1);
2848 
2849       SmallVector<int, 8> Idxs(NumElts);
2850       // Lookup the bits for this element, wrapping around the immediate every
2851       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2852       // to offset by the first index of each group.
2853       for (unsigned i = 0; i != NumElts; ++i)
2854         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2855 
2856       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2857 
2858       if (CI->arg_size() == 4)
2859         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2860                             CI->getArgOperand(2));
2861     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2862                          Name.startswith("avx512.mask.pshufl.w."))) {
2863       Value *Op0 = CI->getArgOperand(0);
2864       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2865       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2866 
2867       SmallVector<int, 16> Idxs(NumElts);
2868       for (unsigned l = 0; l != NumElts; l += 8) {
2869         for (unsigned i = 0; i != 4; ++i)
2870           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2871         for (unsigned i = 4; i != 8; ++i)
2872           Idxs[i + l] = i + l;
2873       }
2874 
2875       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2876 
2877       if (CI->arg_size() == 4)
2878         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2879                             CI->getArgOperand(2));
2880     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2881                          Name.startswith("avx512.mask.pshufh.w."))) {
2882       Value *Op0 = CI->getArgOperand(0);
2883       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2884       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2885 
2886       SmallVector<int, 16> Idxs(NumElts);
2887       for (unsigned l = 0; l != NumElts; l += 8) {
2888         for (unsigned i = 0; i != 4; ++i)
2889           Idxs[i + l] = i + l;
2890         for (unsigned i = 0; i != 4; ++i)
2891           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2892       }
2893 
2894       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2895 
2896       if (CI->arg_size() == 4)
2897         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2898                             CI->getArgOperand(2));
2899     } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2900       Value *Op0 = CI->getArgOperand(0);
2901       Value *Op1 = CI->getArgOperand(1);
2902       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2903       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2904 
2905       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2906       unsigned HalfLaneElts = NumLaneElts / 2;
2907 
2908       SmallVector<int, 16> Idxs(NumElts);
2909       for (unsigned i = 0; i != NumElts; ++i) {
2910         // Base index is the starting element of the lane.
2911         Idxs[i] = i - (i % NumLaneElts);
2912         // If we are half way through the lane switch to the other source.
2913         if ((i % NumLaneElts) >= HalfLaneElts)
2914           Idxs[i] += NumElts;
2915         // Now select the specific element. By adding HalfLaneElts bits from
2916         // the immediate. Wrapping around the immediate every 8-bits.
2917         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2918       }
2919 
2920       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2921 
2922       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2923                           CI->getArgOperand(3));
2924     } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2925                          Name.startswith("avx512.mask.movshdup") ||
2926                          Name.startswith("avx512.mask.movsldup"))) {
2927       Value *Op0 = CI->getArgOperand(0);
2928       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2929       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2930 
2931       unsigned Offset = 0;
2932       if (Name.startswith("avx512.mask.movshdup."))
2933         Offset = 1;
2934 
2935       SmallVector<int, 16> Idxs(NumElts);
2936       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2937         for (unsigned i = 0; i != NumLaneElts; i += 2) {
2938           Idxs[i + l + 0] = i + l + Offset;
2939           Idxs[i + l + 1] = i + l + Offset;
2940         }
2941 
2942       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2943 
2944       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2945                           CI->getArgOperand(1));
2946     } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2947                          Name.startswith("avx512.mask.unpckl."))) {
2948       Value *Op0 = CI->getArgOperand(0);
2949       Value *Op1 = CI->getArgOperand(1);
2950       int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2951       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2952 
2953       SmallVector<int, 64> Idxs(NumElts);
2954       for (int l = 0; l != NumElts; l += NumLaneElts)
2955         for (int i = 0; i != NumLaneElts; ++i)
2956           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2957 
2958       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2959 
2960       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2961                           CI->getArgOperand(2));
2962     } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2963                          Name.startswith("avx512.mask.unpckh."))) {
2964       Value *Op0 = CI->getArgOperand(0);
2965       Value *Op1 = CI->getArgOperand(1);
2966       int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2967       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2968 
2969       SmallVector<int, 64> Idxs(NumElts);
2970       for (int l = 0; l != NumElts; l += NumLaneElts)
2971         for (int i = 0; i != NumLaneElts; ++i)
2972           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2973 
2974       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2975 
2976       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2977                           CI->getArgOperand(2));
2978     } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2979                          Name.startswith("avx512.mask.pand."))) {
2980       VectorType *FTy = cast<VectorType>(CI->getType());
2981       VectorType *ITy = VectorType::getInteger(FTy);
2982       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2983                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2984       Rep = Builder.CreateBitCast(Rep, FTy);
2985       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2986                           CI->getArgOperand(2));
2987     } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2988                          Name.startswith("avx512.mask.pandn."))) {
2989       VectorType *FTy = cast<VectorType>(CI->getType());
2990       VectorType *ITy = VectorType::getInteger(FTy);
2991       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2992       Rep = Builder.CreateAnd(Rep,
2993                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2994       Rep = Builder.CreateBitCast(Rep, FTy);
2995       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2996                           CI->getArgOperand(2));
2997     } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2998                          Name.startswith("avx512.mask.por."))) {
2999       VectorType *FTy = cast<VectorType>(CI->getType());
3000       VectorType *ITy = VectorType::getInteger(FTy);
3001       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3002                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3003       Rep = Builder.CreateBitCast(Rep, FTy);
3004       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3005                           CI->getArgOperand(2));
3006     } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
3007                          Name.startswith("avx512.mask.pxor."))) {
3008       VectorType *FTy = cast<VectorType>(CI->getType());
3009       VectorType *ITy = VectorType::getInteger(FTy);
3010       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3011                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3012       Rep = Builder.CreateBitCast(Rep, FTy);
3013       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3014                           CI->getArgOperand(2));
3015     } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
3016       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3017       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3018                           CI->getArgOperand(2));
3019     } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
3020       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3021       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3022                           CI->getArgOperand(2));
3023     } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
3024       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3025       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3026                           CI->getArgOperand(2));
3027     } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
3028       if (Name.endswith(".512")) {
3029         Intrinsic::ID IID;
3030         if (Name[17] == 's')
3031           IID = Intrinsic::x86_avx512_add_ps_512;
3032         else
3033           IID = Intrinsic::x86_avx512_add_pd_512;
3034 
3035         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3036                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3037                                    CI->getArgOperand(4) });
3038       } else {
3039         Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3040       }
3041       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3042                           CI->getArgOperand(2));
3043     } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
3044       if (Name.endswith(".512")) {
3045         Intrinsic::ID IID;
3046         if (Name[17] == 's')
3047           IID = Intrinsic::x86_avx512_div_ps_512;
3048         else
3049           IID = Intrinsic::x86_avx512_div_pd_512;
3050 
3051         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3052                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3053                                    CI->getArgOperand(4) });
3054       } else {
3055         Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3056       }
3057       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3058                           CI->getArgOperand(2));
3059     } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
3060       if (Name.endswith(".512")) {
3061         Intrinsic::ID IID;
3062         if (Name[17] == 's')
3063           IID = Intrinsic::x86_avx512_mul_ps_512;
3064         else
3065           IID = Intrinsic::x86_avx512_mul_pd_512;
3066 
3067         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3068                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3069                                    CI->getArgOperand(4) });
3070       } else {
3071         Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3072       }
3073       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3074                           CI->getArgOperand(2));
3075     } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
3076       if (Name.endswith(".512")) {
3077         Intrinsic::ID IID;
3078         if (Name[17] == 's')
3079           IID = Intrinsic::x86_avx512_sub_ps_512;
3080         else
3081           IID = Intrinsic::x86_avx512_sub_pd_512;
3082 
3083         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3084                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3085                                    CI->getArgOperand(4) });
3086       } else {
3087         Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3088       }
3089       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3090                           CI->getArgOperand(2));
3091     } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
3092                          Name.startswith("avx512.mask.min.p")) &&
3093                Name.drop_front(18) == ".512") {
3094       bool IsDouble = Name[17] == 'd';
3095       bool IsMin = Name[13] == 'i';
3096       static const Intrinsic::ID MinMaxTbl[2][2] = {
3097         { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
3098         { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
3099       };
3100       Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3101 
3102       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3103                                { CI->getArgOperand(0), CI->getArgOperand(1),
3104                                  CI->getArgOperand(4) });
3105       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3106                           CI->getArgOperand(2));
3107     } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
3108       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
3109                                                          Intrinsic::ctlz,
3110                                                          CI->getType()),
3111                                { CI->getArgOperand(0), Builder.getInt1(false) });
3112       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3113                           CI->getArgOperand(1));
3114     } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
3115       bool IsImmediate = Name[16] == 'i' ||
3116                          (Name.size() > 18 && Name[18] == 'i');
3117       bool IsVariable = Name[16] == 'v';
3118       char Size = Name[16] == '.' ? Name[17] :
3119                   Name[17] == '.' ? Name[18] :
3120                   Name[18] == '.' ? Name[19] :
3121                                     Name[20];
3122 
3123       Intrinsic::ID IID;
3124       if (IsVariable && Name[17] != '.') {
3125         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3126           IID = Intrinsic::x86_avx2_psllv_q;
3127         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3128           IID = Intrinsic::x86_avx2_psllv_q_256;
3129         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3130           IID = Intrinsic::x86_avx2_psllv_d;
3131         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3132           IID = Intrinsic::x86_avx2_psllv_d_256;
3133         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3134           IID = Intrinsic::x86_avx512_psllv_w_128;
3135         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3136           IID = Intrinsic::x86_avx512_psllv_w_256;
3137         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3138           IID = Intrinsic::x86_avx512_psllv_w_512;
3139         else
3140           llvm_unreachable("Unexpected size");
3141       } else if (Name.endswith(".128")) {
3142         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3143           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3144                             : Intrinsic::x86_sse2_psll_d;
3145         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3146           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3147                             : Intrinsic::x86_sse2_psll_q;
3148         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3149           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3150                             : Intrinsic::x86_sse2_psll_w;
3151         else
3152           llvm_unreachable("Unexpected size");
3153       } else if (Name.endswith(".256")) {
3154         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3155           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3156                             : Intrinsic::x86_avx2_psll_d;
3157         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3158           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3159                             : Intrinsic::x86_avx2_psll_q;
3160         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3161           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3162                             : Intrinsic::x86_avx2_psll_w;
3163         else
3164           llvm_unreachable("Unexpected size");
3165       } else {
3166         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3167           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3168                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
3169                               Intrinsic::x86_avx512_psll_d_512;
3170         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3171           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3172                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
3173                               Intrinsic::x86_avx512_psll_q_512;
3174         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3175           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3176                             : Intrinsic::x86_avx512_psll_w_512;
3177         else
3178           llvm_unreachable("Unexpected size");
3179       }
3180 
3181       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3182     } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
3183       bool IsImmediate = Name[16] == 'i' ||
3184                          (Name.size() > 18 && Name[18] == 'i');
3185       bool IsVariable = Name[16] == 'v';
3186       char Size = Name[16] == '.' ? Name[17] :
3187                   Name[17] == '.' ? Name[18] :
3188                   Name[18] == '.' ? Name[19] :
3189                                     Name[20];
3190 
3191       Intrinsic::ID IID;
3192       if (IsVariable && Name[17] != '.') {
3193         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3194           IID = Intrinsic::x86_avx2_psrlv_q;
3195         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3196           IID = Intrinsic::x86_avx2_psrlv_q_256;
3197         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3198           IID = Intrinsic::x86_avx2_psrlv_d;
3199         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3200           IID = Intrinsic::x86_avx2_psrlv_d_256;
3201         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3202           IID = Intrinsic::x86_avx512_psrlv_w_128;
3203         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3204           IID = Intrinsic::x86_avx512_psrlv_w_256;
3205         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3206           IID = Intrinsic::x86_avx512_psrlv_w_512;
3207         else
3208           llvm_unreachable("Unexpected size");
3209       } else if (Name.endswith(".128")) {
3210         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3211           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3212                             : Intrinsic::x86_sse2_psrl_d;
3213         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3214           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3215                             : Intrinsic::x86_sse2_psrl_q;
3216         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3217           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3218                             : Intrinsic::x86_sse2_psrl_w;
3219         else
3220           llvm_unreachable("Unexpected size");
3221       } else if (Name.endswith(".256")) {
3222         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3223           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3224                             : Intrinsic::x86_avx2_psrl_d;
3225         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3226           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3227                             : Intrinsic::x86_avx2_psrl_q;
3228         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3229           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3230                             : Intrinsic::x86_avx2_psrl_w;
3231         else
3232           llvm_unreachable("Unexpected size");
3233       } else {
3234         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3235           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3236                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
3237                               Intrinsic::x86_avx512_psrl_d_512;
3238         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3239           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3240                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
3241                               Intrinsic::x86_avx512_psrl_q_512;
3242         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3243           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3244                             : Intrinsic::x86_avx512_psrl_w_512;
3245         else
3246           llvm_unreachable("Unexpected size");
3247       }
3248 
3249       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3250     } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
3251       bool IsImmediate = Name[16] == 'i' ||
3252                          (Name.size() > 18 && Name[18] == 'i');
3253       bool IsVariable = Name[16] == 'v';
3254       char Size = Name[16] == '.' ? Name[17] :
3255                   Name[17] == '.' ? Name[18] :
3256                   Name[18] == '.' ? Name[19] :
3257                                     Name[20];
3258 
3259       Intrinsic::ID IID;
3260       if (IsVariable && Name[17] != '.') {
3261         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3262           IID = Intrinsic::x86_avx2_psrav_d;
3263         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3264           IID = Intrinsic::x86_avx2_psrav_d_256;
3265         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3266           IID = Intrinsic::x86_avx512_psrav_w_128;
3267         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3268           IID = Intrinsic::x86_avx512_psrav_w_256;
3269         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3270           IID = Intrinsic::x86_avx512_psrav_w_512;
3271         else
3272           llvm_unreachable("Unexpected size");
3273       } else if (Name.endswith(".128")) {
3274         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3275           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3276                             : Intrinsic::x86_sse2_psra_d;
3277         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3278           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3279                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
3280                               Intrinsic::x86_avx512_psra_q_128;
3281         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3282           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3283                             : Intrinsic::x86_sse2_psra_w;
3284         else
3285           llvm_unreachable("Unexpected size");
3286       } else if (Name.endswith(".256")) {
3287         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3288           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3289                             : Intrinsic::x86_avx2_psra_d;
3290         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3291           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3292                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
3293                               Intrinsic::x86_avx512_psra_q_256;
3294         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3295           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3296                             : Intrinsic::x86_avx2_psra_w;
3297         else
3298           llvm_unreachable("Unexpected size");
3299       } else {
3300         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3301           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3302                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
3303                               Intrinsic::x86_avx512_psra_d_512;
3304         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3305           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3306                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
3307                               Intrinsic::x86_avx512_psra_q_512;
3308         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3309           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3310                             : Intrinsic::x86_avx512_psra_w_512;
3311         else
3312           llvm_unreachable("Unexpected size");
3313       }
3314 
3315       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3316     } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3317       Rep = upgradeMaskedMove(Builder, *CI);
3318     } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3319       Rep = UpgradeMaskToInt(Builder, *CI);
3320     } else if (IsX86 && Name.endswith(".movntdqa")) {
3321       Module *M = F->getParent();
3322       MDNode *Node = MDNode::get(
3323           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3324 
3325       Value *Ptr = CI->getArgOperand(0);
3326 
3327       // Convert the type of the pointer to a pointer to the stored type.
3328       Value *BC = Builder.CreateBitCast(
3329           Ptr, PointerType::getUnqual(CI->getType()), "cast");
3330       LoadInst *LI = Builder.CreateAlignedLoad(
3331           CI->getType(), BC,
3332           Align(CI->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
3333       LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3334       Rep = LI;
3335     } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3336                          Name.startswith("fma.vfmsub.") ||
3337                          Name.startswith("fma.vfnmadd.") ||
3338                          Name.startswith("fma.vfnmsub."))) {
3339       bool NegMul = Name[6] == 'n';
3340       bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3341       bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3342 
3343       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3344                        CI->getArgOperand(2) };
3345 
3346       if (IsScalar) {
3347         Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3348         Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3349         Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3350       }
3351 
3352       if (NegMul && !IsScalar)
3353         Ops[0] = Builder.CreateFNeg(Ops[0]);
3354       if (NegMul && IsScalar)
3355         Ops[1] = Builder.CreateFNeg(Ops[1]);
3356       if (NegAcc)
3357         Ops[2] = Builder.CreateFNeg(Ops[2]);
3358 
3359       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3360                                                          Intrinsic::fma,
3361                                                          Ops[0]->getType()),
3362                                Ops);
3363 
3364       if (IsScalar)
3365         Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3366                                           (uint64_t)0);
3367     } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3368       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3369                        CI->getArgOperand(2) };
3370 
3371       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3372       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3373       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3374 
3375       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3376                                                          Intrinsic::fma,
3377                                                          Ops[0]->getType()),
3378                                Ops);
3379 
3380       Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3381                                         Rep, (uint64_t)0);
3382     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3383                          Name.startswith("avx512.maskz.vfmadd.s") ||
3384                          Name.startswith("avx512.mask3.vfmadd.s") ||
3385                          Name.startswith("avx512.mask3.vfmsub.s") ||
3386                          Name.startswith("avx512.mask3.vfnmsub.s"))) {
3387       bool IsMask3 = Name[11] == '3';
3388       bool IsMaskZ = Name[11] == 'z';
3389       // Drop the "avx512.mask." to make it easier.
3390       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3391       bool NegMul = Name[2] == 'n';
3392       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3393 
3394       Value *A = CI->getArgOperand(0);
3395       Value *B = CI->getArgOperand(1);
3396       Value *C = CI->getArgOperand(2);
3397 
3398       if (NegMul && (IsMask3 || IsMaskZ))
3399         A = Builder.CreateFNeg(A);
3400       if (NegMul && !(IsMask3 || IsMaskZ))
3401         B = Builder.CreateFNeg(B);
3402       if (NegAcc)
3403         C = Builder.CreateFNeg(C);
3404 
3405       A = Builder.CreateExtractElement(A, (uint64_t)0);
3406       B = Builder.CreateExtractElement(B, (uint64_t)0);
3407       C = Builder.CreateExtractElement(C, (uint64_t)0);
3408 
3409       if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3410           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3411         Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3412 
3413         Intrinsic::ID IID;
3414         if (Name.back() == 'd')
3415           IID = Intrinsic::x86_avx512_vfmadd_f64;
3416         else
3417           IID = Intrinsic::x86_avx512_vfmadd_f32;
3418         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3419         Rep = Builder.CreateCall(FMA, Ops);
3420       } else {
3421         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3422                                                   Intrinsic::fma,
3423                                                   A->getType());
3424         Rep = Builder.CreateCall(FMA, { A, B, C });
3425       }
3426 
3427       Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3428                         IsMask3 ? C : A;
3429 
3430       // For Mask3 with NegAcc, we need to create a new extractelement that
3431       // avoids the negation above.
3432       if (NegAcc && IsMask3)
3433         PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3434                                                 (uint64_t)0);
3435 
3436       Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3437                                 Rep, PassThru);
3438       Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3439                                         Rep, (uint64_t)0);
3440     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3441                          Name.startswith("avx512.mask.vfnmadd.p") ||
3442                          Name.startswith("avx512.mask.vfnmsub.p") ||
3443                          Name.startswith("avx512.mask3.vfmadd.p") ||
3444                          Name.startswith("avx512.mask3.vfmsub.p") ||
3445                          Name.startswith("avx512.mask3.vfnmsub.p") ||
3446                          Name.startswith("avx512.maskz.vfmadd.p"))) {
3447       bool IsMask3 = Name[11] == '3';
3448       bool IsMaskZ = Name[11] == 'z';
3449       // Drop the "avx512.mask." to make it easier.
3450       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3451       bool NegMul = Name[2] == 'n';
3452       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3453 
3454       Value *A = CI->getArgOperand(0);
3455       Value *B = CI->getArgOperand(1);
3456       Value *C = CI->getArgOperand(2);
3457 
3458       if (NegMul && (IsMask3 || IsMaskZ))
3459         A = Builder.CreateFNeg(A);
3460       if (NegMul && !(IsMask3 || IsMaskZ))
3461         B = Builder.CreateFNeg(B);
3462       if (NegAcc)
3463         C = Builder.CreateFNeg(C);
3464 
3465       if (CI->arg_size() == 5 &&
3466           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3467            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3468         Intrinsic::ID IID;
3469         // Check the character before ".512" in string.
3470         if (Name[Name.size()-5] == 's')
3471           IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3472         else
3473           IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3474 
3475         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3476                                  { A, B, C, CI->getArgOperand(4) });
3477       } else {
3478         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3479                                                   Intrinsic::fma,
3480                                                   A->getType());
3481         Rep = Builder.CreateCall(FMA, { A, B, C });
3482       }
3483 
3484       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3485                         IsMask3 ? CI->getArgOperand(2) :
3486                                   CI->getArgOperand(0);
3487 
3488       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3489     } else if (IsX86 &&  Name.startswith("fma.vfmsubadd.p")) {
3490       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3491       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3492       Intrinsic::ID IID;
3493       if (VecWidth == 128 && EltWidth == 32)
3494         IID = Intrinsic::x86_fma_vfmaddsub_ps;
3495       else if (VecWidth == 256 && EltWidth == 32)
3496         IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3497       else if (VecWidth == 128 && EltWidth == 64)
3498         IID = Intrinsic::x86_fma_vfmaddsub_pd;
3499       else if (VecWidth == 256 && EltWidth == 64)
3500         IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3501       else
3502         llvm_unreachable("Unexpected intrinsic");
3503 
3504       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3505                        CI->getArgOperand(2) };
3506       Ops[2] = Builder.CreateFNeg(Ops[2]);
3507       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3508                                Ops);
3509     } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3510                          Name.startswith("avx512.mask3.vfmaddsub.p") ||
3511                          Name.startswith("avx512.maskz.vfmaddsub.p") ||
3512                          Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3513       bool IsMask3 = Name[11] == '3';
3514       bool IsMaskZ = Name[11] == 'z';
3515       // Drop the "avx512.mask." to make it easier.
3516       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3517       bool IsSubAdd = Name[3] == 's';
3518       if (CI->arg_size() == 5) {
3519         Intrinsic::ID IID;
3520         // Check the character before ".512" in string.
3521         if (Name[Name.size()-5] == 's')
3522           IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3523         else
3524           IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3525 
3526         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3527                          CI->getArgOperand(2), CI->getArgOperand(4) };
3528         if (IsSubAdd)
3529           Ops[2] = Builder.CreateFNeg(Ops[2]);
3530 
3531         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3532                                  Ops);
3533       } else {
3534         int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3535 
3536         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3537                          CI->getArgOperand(2) };
3538 
3539         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3540                                                   Ops[0]->getType());
3541         Value *Odd = Builder.CreateCall(FMA, Ops);
3542         Ops[2] = Builder.CreateFNeg(Ops[2]);
3543         Value *Even = Builder.CreateCall(FMA, Ops);
3544 
3545         if (IsSubAdd)
3546           std::swap(Even, Odd);
3547 
3548         SmallVector<int, 32> Idxs(NumElts);
3549         for (int i = 0; i != NumElts; ++i)
3550           Idxs[i] = i + (i % 2) * NumElts;
3551 
3552         Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3553       }
3554 
3555       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3556                         IsMask3 ? CI->getArgOperand(2) :
3557                                   CI->getArgOperand(0);
3558 
3559       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3560     } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3561                          Name.startswith("avx512.maskz.pternlog."))) {
3562       bool ZeroMask = Name[11] == 'z';
3563       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3564       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3565       Intrinsic::ID IID;
3566       if (VecWidth == 128 && EltWidth == 32)
3567         IID = Intrinsic::x86_avx512_pternlog_d_128;
3568       else if (VecWidth == 256 && EltWidth == 32)
3569         IID = Intrinsic::x86_avx512_pternlog_d_256;
3570       else if (VecWidth == 512 && EltWidth == 32)
3571         IID = Intrinsic::x86_avx512_pternlog_d_512;
3572       else if (VecWidth == 128 && EltWidth == 64)
3573         IID = Intrinsic::x86_avx512_pternlog_q_128;
3574       else if (VecWidth == 256 && EltWidth == 64)
3575         IID = Intrinsic::x86_avx512_pternlog_q_256;
3576       else if (VecWidth == 512 && EltWidth == 64)
3577         IID = Intrinsic::x86_avx512_pternlog_q_512;
3578       else
3579         llvm_unreachable("Unexpected intrinsic");
3580 
3581       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3582                         CI->getArgOperand(2), CI->getArgOperand(3) };
3583       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3584                                Args);
3585       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3586                                  : CI->getArgOperand(0);
3587       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3588     } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3589                          Name.startswith("avx512.maskz.vpmadd52"))) {
3590       bool ZeroMask = Name[11] == 'z';
3591       bool High = Name[20] == 'h' || Name[21] == 'h';
3592       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3593       Intrinsic::ID IID;
3594       if (VecWidth == 128 && !High)
3595         IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3596       else if (VecWidth == 256 && !High)
3597         IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3598       else if (VecWidth == 512 && !High)
3599         IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3600       else if (VecWidth == 128 && High)
3601         IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3602       else if (VecWidth == 256 && High)
3603         IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3604       else if (VecWidth == 512 && High)
3605         IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3606       else
3607         llvm_unreachable("Unexpected intrinsic");
3608 
3609       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3610                         CI->getArgOperand(2) };
3611       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3612                                Args);
3613       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3614                                  : CI->getArgOperand(0);
3615       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3616     } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3617                          Name.startswith("avx512.mask.vpermt2var.") ||
3618                          Name.startswith("avx512.maskz.vpermt2var."))) {
3619       bool ZeroMask = Name[11] == 'z';
3620       bool IndexForm = Name[17] == 'i';
3621       Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3622     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3623                          Name.startswith("avx512.maskz.vpdpbusd.") ||
3624                          Name.startswith("avx512.mask.vpdpbusds.") ||
3625                          Name.startswith("avx512.maskz.vpdpbusds."))) {
3626       bool ZeroMask = Name[11] == 'z';
3627       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3628       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3629       Intrinsic::ID IID;
3630       if (VecWidth == 128 && !IsSaturating)
3631         IID = Intrinsic::x86_avx512_vpdpbusd_128;
3632       else if (VecWidth == 256 && !IsSaturating)
3633         IID = Intrinsic::x86_avx512_vpdpbusd_256;
3634       else if (VecWidth == 512 && !IsSaturating)
3635         IID = Intrinsic::x86_avx512_vpdpbusd_512;
3636       else if (VecWidth == 128 && IsSaturating)
3637         IID = Intrinsic::x86_avx512_vpdpbusds_128;
3638       else if (VecWidth == 256 && IsSaturating)
3639         IID = Intrinsic::x86_avx512_vpdpbusds_256;
3640       else if (VecWidth == 512 && IsSaturating)
3641         IID = Intrinsic::x86_avx512_vpdpbusds_512;
3642       else
3643         llvm_unreachable("Unexpected intrinsic");
3644 
3645       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3646                         CI->getArgOperand(2)  };
3647       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3648                                Args);
3649       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3650                                  : CI->getArgOperand(0);
3651       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3652     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3653                          Name.startswith("avx512.maskz.vpdpwssd.") ||
3654                          Name.startswith("avx512.mask.vpdpwssds.") ||
3655                          Name.startswith("avx512.maskz.vpdpwssds."))) {
3656       bool ZeroMask = Name[11] == 'z';
3657       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3658       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3659       Intrinsic::ID IID;
3660       if (VecWidth == 128 && !IsSaturating)
3661         IID = Intrinsic::x86_avx512_vpdpwssd_128;
3662       else if (VecWidth == 256 && !IsSaturating)
3663         IID = Intrinsic::x86_avx512_vpdpwssd_256;
3664       else if (VecWidth == 512 && !IsSaturating)
3665         IID = Intrinsic::x86_avx512_vpdpwssd_512;
3666       else if (VecWidth == 128 && IsSaturating)
3667         IID = Intrinsic::x86_avx512_vpdpwssds_128;
3668       else if (VecWidth == 256 && IsSaturating)
3669         IID = Intrinsic::x86_avx512_vpdpwssds_256;
3670       else if (VecWidth == 512 && IsSaturating)
3671         IID = Intrinsic::x86_avx512_vpdpwssds_512;
3672       else
3673         llvm_unreachable("Unexpected intrinsic");
3674 
3675       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3676                         CI->getArgOperand(2)  };
3677       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3678                                Args);
3679       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3680                                  : CI->getArgOperand(0);
3681       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3682     } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3683                          Name == "addcarry.u32" || Name == "addcarry.u64" ||
3684                          Name == "subborrow.u32" || Name == "subborrow.u64")) {
3685       Intrinsic::ID IID;
3686       if (Name[0] == 'a' && Name.back() == '2')
3687         IID = Intrinsic::x86_addcarry_32;
3688       else if (Name[0] == 'a' && Name.back() == '4')
3689         IID = Intrinsic::x86_addcarry_64;
3690       else if (Name[0] == 's' && Name.back() == '2')
3691         IID = Intrinsic::x86_subborrow_32;
3692       else if (Name[0] == 's' && Name.back() == '4')
3693         IID = Intrinsic::x86_subborrow_64;
3694       else
3695         llvm_unreachable("Unexpected intrinsic");
3696 
3697       // Make a call with 3 operands.
3698       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3699                         CI->getArgOperand(2)};
3700       Value *NewCall = Builder.CreateCall(
3701                                 Intrinsic::getDeclaration(CI->getModule(), IID),
3702                                 Args);
3703 
3704       // Extract the second result and store it.
3705       Value *Data = Builder.CreateExtractValue(NewCall, 1);
3706       // Cast the pointer to the right type.
3707       Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3708                                  llvm::PointerType::getUnqual(Data->getType()));
3709       Builder.CreateAlignedStore(Data, Ptr, Align(1));
3710       // Replace the original call result with the first result of the new call.
3711       Value *CF = Builder.CreateExtractValue(NewCall, 0);
3712 
3713       CI->replaceAllUsesWith(CF);
3714       Rep = nullptr;
3715     } else if (IsX86 && Name.startswith("avx512.mask.") &&
3716                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3717       // Rep will be updated by the call in the condition.
3718     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3719       Value *Arg = CI->getArgOperand(0);
3720       Value *Neg = Builder.CreateNeg(Arg, "neg");
3721       Value *Cmp = Builder.CreateICmpSGE(
3722           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3723       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3724     } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3725                           Name.startswith("atomic.load.add.f64.p"))) {
3726       Value *Ptr = CI->getArgOperand(0);
3727       Value *Val = CI->getArgOperand(1);
3728       Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
3729                                     AtomicOrdering::SequentiallyConsistent);
3730     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3731                           Name == "max.ui" || Name == "max.ull")) {
3732       Value *Arg0 = CI->getArgOperand(0);
3733       Value *Arg1 = CI->getArgOperand(1);
3734       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3735                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3736                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3737       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3738     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3739                           Name == "min.ui" || Name == "min.ull")) {
3740       Value *Arg0 = CI->getArgOperand(0);
3741       Value *Arg1 = CI->getArgOperand(1);
3742       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3743                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3744                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3745       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3746     } else if (IsNVVM && Name == "clz.ll") {
3747       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3748       Value *Arg = CI->getArgOperand(0);
3749       Value *Ctlz = Builder.CreateCall(
3750           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3751                                     {Arg->getType()}),
3752           {Arg, Builder.getFalse()}, "ctlz");
3753       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3754     } else if (IsNVVM && Name == "popc.ll") {
3755       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3756       // i64.
3757       Value *Arg = CI->getArgOperand(0);
3758       Value *Popc = Builder.CreateCall(
3759           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3760                                     {Arg->getType()}),
3761           Arg, "ctpop");
3762       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3763     } else if (IsNVVM && Name == "h2f") {
3764       Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3765                                    F->getParent(), Intrinsic::convert_from_fp16,
3766                                    {Builder.getFloatTy()}),
3767                                CI->getArgOperand(0), "h2f");
3768     } else if (IsARM) {
3769       Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder);
3770     } else {
3771       llvm_unreachable("Unknown function for CallInst upgrade.");
3772     }
3773 
3774     if (Rep)
3775       CI->replaceAllUsesWith(Rep);
3776     CI->eraseFromParent();
3777     return;
3778   }
3779 
3780   const auto &DefaultCase = [&NewFn, &CI]() -> void {
3781     // Handle generic mangling change, but nothing else
3782     assert(
3783         (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3784         "Unknown function for CallInst upgrade and isn't just a name change");
3785     CI->setCalledFunction(NewFn);
3786   };
3787   CallInst *NewCall = nullptr;
3788   switch (NewFn->getIntrinsicID()) {
3789   default: {
3790     DefaultCase();
3791     return;
3792   }
3793   case Intrinsic::arm_neon_vld1:
3794   case Intrinsic::arm_neon_vld2:
3795   case Intrinsic::arm_neon_vld3:
3796   case Intrinsic::arm_neon_vld4:
3797   case Intrinsic::arm_neon_vld2lane:
3798   case Intrinsic::arm_neon_vld3lane:
3799   case Intrinsic::arm_neon_vld4lane:
3800   case Intrinsic::arm_neon_vst1:
3801   case Intrinsic::arm_neon_vst2:
3802   case Intrinsic::arm_neon_vst3:
3803   case Intrinsic::arm_neon_vst4:
3804   case Intrinsic::arm_neon_vst2lane:
3805   case Intrinsic::arm_neon_vst3lane:
3806   case Intrinsic::arm_neon_vst4lane: {
3807     SmallVector<Value *, 4> Args(CI->args());
3808     NewCall = Builder.CreateCall(NewFn, Args);
3809     break;
3810   }
3811 
3812   case Intrinsic::arm_neon_bfdot:
3813   case Intrinsic::arm_neon_bfmmla:
3814   case Intrinsic::arm_neon_bfmlalb:
3815   case Intrinsic::arm_neon_bfmlalt:
3816   case Intrinsic::aarch64_neon_bfdot:
3817   case Intrinsic::aarch64_neon_bfmmla:
3818   case Intrinsic::aarch64_neon_bfmlalb:
3819   case Intrinsic::aarch64_neon_bfmlalt: {
3820     SmallVector<Value *, 3> Args;
3821     assert(CI->arg_size() == 3 &&
3822            "Mismatch between function args and call args");
3823     size_t OperandWidth =
3824         CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
3825     assert((OperandWidth == 64 || OperandWidth == 128) &&
3826            "Unexpected operand width");
3827     Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
3828     auto Iter = CI->args().begin();
3829     Args.push_back(*Iter++);
3830     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3831     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3832     NewCall = Builder.CreateCall(NewFn, Args);
3833     break;
3834   }
3835 
3836   case Intrinsic::bitreverse:
3837     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3838     break;
3839 
3840   case Intrinsic::ctlz:
3841   case Intrinsic::cttz:
3842     assert(CI->arg_size() == 1 &&
3843            "Mismatch between function args and call args");
3844     NewCall =
3845         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3846     break;
3847 
3848   case Intrinsic::objectsize: {
3849     Value *NullIsUnknownSize =
3850         CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
3851     Value *Dynamic =
3852         CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3853     NewCall = Builder.CreateCall(
3854         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3855     break;
3856   }
3857 
3858   case Intrinsic::ctpop:
3859     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3860     break;
3861 
3862   case Intrinsic::convert_from_fp16:
3863     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3864     break;
3865 
3866   case Intrinsic::dbg_value:
3867     // Upgrade from the old version that had an extra offset argument.
3868     assert(CI->arg_size() == 4);
3869     // Drop nonzero offsets instead of attempting to upgrade them.
3870     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3871       if (Offset->isZeroValue()) {
3872         NewCall = Builder.CreateCall(
3873             NewFn,
3874             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3875         break;
3876       }
3877     CI->eraseFromParent();
3878     return;
3879 
3880   case Intrinsic::ptr_annotation:
3881     // Upgrade from versions that lacked the annotation attribute argument.
3882     assert(CI->arg_size() == 4 &&
3883            "Before LLVM 12.0 this intrinsic took four arguments");
3884     // Create a new call with an added null annotation attribute argument.
3885     NewCall = Builder.CreateCall(
3886         NewFn,
3887         {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3888          CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3889     NewCall->takeName(CI);
3890     CI->replaceAllUsesWith(NewCall);
3891     CI->eraseFromParent();
3892     return;
3893 
3894   case Intrinsic::var_annotation:
3895     // Upgrade from versions that lacked the annotation attribute argument.
3896     assert(CI->arg_size() == 4 &&
3897            "Before LLVM 12.0 this intrinsic took four arguments");
3898     // Create a new call with an added null annotation attribute argument.
3899     NewCall = Builder.CreateCall(
3900         NewFn,
3901         {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3902          CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3903     CI->eraseFromParent();
3904     return;
3905 
3906   case Intrinsic::x86_xop_vfrcz_ss:
3907   case Intrinsic::x86_xop_vfrcz_sd:
3908     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3909     break;
3910 
3911   case Intrinsic::x86_xop_vpermil2pd:
3912   case Intrinsic::x86_xop_vpermil2ps:
3913   case Intrinsic::x86_xop_vpermil2pd_256:
3914   case Intrinsic::x86_xop_vpermil2ps_256: {
3915     SmallVector<Value *, 4> Args(CI->args());
3916     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3917     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3918     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3919     NewCall = Builder.CreateCall(NewFn, Args);
3920     break;
3921   }
3922 
3923   case Intrinsic::x86_sse41_ptestc:
3924   case Intrinsic::x86_sse41_ptestz:
3925   case Intrinsic::x86_sse41_ptestnzc: {
3926     // The arguments for these intrinsics used to be v4f32, and changed
3927     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3928     // So, the only thing required is a bitcast for both arguments.
3929     // First, check the arguments have the old type.
3930     Value *Arg0 = CI->getArgOperand(0);
3931     if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
3932       return;
3933 
3934     // Old intrinsic, add bitcasts
3935     Value *Arg1 = CI->getArgOperand(1);
3936 
3937     auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
3938 
3939     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3940     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3941 
3942     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3943     break;
3944   }
3945 
3946   case Intrinsic::x86_rdtscp: {
3947     // This used to take 1 arguments. If we have no arguments, it is already
3948     // upgraded.
3949     if (CI->getNumOperands() == 0)
3950       return;
3951 
3952     NewCall = Builder.CreateCall(NewFn);
3953     // Extract the second result and store it.
3954     Value *Data = Builder.CreateExtractValue(NewCall, 1);
3955     // Cast the pointer to the right type.
3956     Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3957                                  llvm::PointerType::getUnqual(Data->getType()));
3958     Builder.CreateAlignedStore(Data, Ptr, Align(1));
3959     // Replace the original call result with the first result of the new call.
3960     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3961 
3962     NewCall->takeName(CI);
3963     CI->replaceAllUsesWith(TSC);
3964     CI->eraseFromParent();
3965     return;
3966   }
3967 
3968   case Intrinsic::x86_sse41_insertps:
3969   case Intrinsic::x86_sse41_dppd:
3970   case Intrinsic::x86_sse41_dpps:
3971   case Intrinsic::x86_sse41_mpsadbw:
3972   case Intrinsic::x86_avx_dp_ps_256:
3973   case Intrinsic::x86_avx2_mpsadbw: {
3974     // Need to truncate the last argument from i32 to i8 -- this argument models
3975     // an inherently 8-bit immediate operand to these x86 instructions.
3976     SmallVector<Value *, 4> Args(CI->args());
3977 
3978     // Replace the last argument with a trunc.
3979     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3980     NewCall = Builder.CreateCall(NewFn, Args);
3981     break;
3982   }
3983 
3984   case Intrinsic::x86_avx512_mask_cmp_pd_128:
3985   case Intrinsic::x86_avx512_mask_cmp_pd_256:
3986   case Intrinsic::x86_avx512_mask_cmp_pd_512:
3987   case Intrinsic::x86_avx512_mask_cmp_ps_128:
3988   case Intrinsic::x86_avx512_mask_cmp_ps_256:
3989   case Intrinsic::x86_avx512_mask_cmp_ps_512: {
3990     SmallVector<Value *, 4> Args(CI->args());
3991     unsigned NumElts =
3992         cast<FixedVectorType>(Args[0]->getType())->getNumElements();
3993     Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
3994 
3995     NewCall = Builder.CreateCall(NewFn, Args);
3996     Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
3997 
3998     NewCall->takeName(CI);
3999     CI->replaceAllUsesWith(Res);
4000     CI->eraseFromParent();
4001     return;
4002   }
4003 
4004   case Intrinsic::thread_pointer: {
4005     NewCall = Builder.CreateCall(NewFn, {});
4006     break;
4007   }
4008 
4009   case Intrinsic::invariant_start:
4010   case Intrinsic::invariant_end: {
4011     SmallVector<Value *, 4> Args(CI->args());
4012     NewCall = Builder.CreateCall(NewFn, Args);
4013     break;
4014   }
4015   case Intrinsic::masked_load:
4016   case Intrinsic::masked_store:
4017   case Intrinsic::masked_gather:
4018   case Intrinsic::masked_scatter: {
4019     SmallVector<Value *, 4> Args(CI->args());
4020     NewCall = Builder.CreateCall(NewFn, Args);
4021     NewCall->copyMetadata(*CI);
4022     break;
4023   }
4024 
4025   case Intrinsic::memcpy:
4026   case Intrinsic::memmove:
4027   case Intrinsic::memset: {
4028     // We have to make sure that the call signature is what we're expecting.
4029     // We only want to change the old signatures by removing the alignment arg:
4030     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4031     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4032     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4033     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
4034     // Note: i8*'s in the above can be any pointer type
4035     if (CI->arg_size() != 5) {
4036       DefaultCase();
4037       return;
4038     }
4039     // Remove alignment argument (3), and add alignment attributes to the
4040     // dest/src pointers.
4041     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4042                       CI->getArgOperand(2), CI->getArgOperand(4)};
4043     NewCall = Builder.CreateCall(NewFn, Args);
4044     auto *MemCI = cast<MemIntrinsic>(NewCall);
4045     // All mem intrinsics support dest alignment.
4046     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4047     MemCI->setDestAlignment(Align->getMaybeAlignValue());
4048     // Memcpy/Memmove also support source alignment.
4049     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4050       MTI->setSourceAlignment(Align->getMaybeAlignValue());
4051     break;
4052   }
4053   }
4054   assert(NewCall && "Should have either set this variable or returned through "
4055                     "the default case");
4056   NewCall->takeName(CI);
4057   CI->replaceAllUsesWith(NewCall);
4058   CI->eraseFromParent();
4059 }
4060 
4061 void llvm::UpgradeCallsToIntrinsic(Function *F) {
4062   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4063 
4064   // Check if this function should be upgraded and get the replacement function
4065   // if there is one.
4066   Function *NewFn;
4067   if (UpgradeIntrinsicFunction(F, NewFn)) {
4068     // Replace all users of the old function with the new function or new
4069     // instructions. This is not a range loop because the call is deleted.
4070     for (User *U : make_early_inc_range(F->users()))
4071       if (CallInst *CI = dyn_cast<CallInst>(U))
4072         UpgradeIntrinsicCall(CI, NewFn);
4073 
4074     // Remove old function, no longer used, from the module.
4075     F->eraseFromParent();
4076   }
4077 }
4078 
4079 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
4080   // Check if the tag uses struct-path aware TBAA format.
4081   if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
4082     return &MD;
4083 
4084   auto &Context = MD.getContext();
4085   if (MD.getNumOperands() == 3) {
4086     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4087     MDNode *ScalarType = MDNode::get(Context, Elts);
4088     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4089     Metadata *Elts2[] = {ScalarType, ScalarType,
4090                          ConstantAsMetadata::get(
4091                              Constant::getNullValue(Type::getInt64Ty(Context))),
4092                          MD.getOperand(2)};
4093     return MDNode::get(Context, Elts2);
4094   }
4095   // Create a MDNode <MD, MD, offset 0>
4096   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
4097                                     Type::getInt64Ty(Context)))};
4098   return MDNode::get(Context, Elts);
4099 }
4100 
4101 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4102                                       Instruction *&Temp) {
4103   if (Opc != Instruction::BitCast)
4104     return nullptr;
4105 
4106   Temp = nullptr;
4107   Type *SrcTy = V->getType();
4108   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4109       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4110     LLVMContext &Context = V->getContext();
4111 
4112     // We have no information about target data layout, so we assume that
4113     // the maximum pointer size is 64bit.
4114     Type *MidTy = Type::getInt64Ty(Context);
4115     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4116 
4117     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4118   }
4119 
4120   return nullptr;
4121 }
4122 
4123 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4124   if (Opc != Instruction::BitCast)
4125     return nullptr;
4126 
4127   Type *SrcTy = C->getType();
4128   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4129       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4130     LLVMContext &Context = C->getContext();
4131 
4132     // We have no information about target data layout, so we assume that
4133     // the maximum pointer size is 64bit.
4134     Type *MidTy = Type::getInt64Ty(Context);
4135 
4136     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
4137                                      DestTy);
4138   }
4139 
4140   return nullptr;
4141 }
4142 
4143 /// Check the debug info version number, if it is out-dated, drop the debug
4144 /// info. Return true if module is modified.
4145 bool llvm::UpgradeDebugInfo(Module &M) {
4146   unsigned Version = getDebugMetadataVersionFromModule(M);
4147   if (Version == DEBUG_METADATA_VERSION) {
4148     bool BrokenDebugInfo = false;
4149     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4150       report_fatal_error("Broken module found, compilation aborted!");
4151     if (!BrokenDebugInfo)
4152       // Everything is ok.
4153       return false;
4154     else {
4155       // Diagnose malformed debug info.
4156       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
4157       M.getContext().diagnose(Diag);
4158     }
4159   }
4160   bool Modified = StripDebugInfo(M);
4161   if (Modified && Version != DEBUG_METADATA_VERSION) {
4162     // Diagnose a version mismatch.
4163     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4164     M.getContext().diagnose(DiagVersion);
4165   }
4166   return Modified;
4167 }
4168 
4169 /// This checks for objc retain release marker which should be upgraded. It
4170 /// returns true if module is modified.
4171 static bool UpgradeRetainReleaseMarker(Module &M) {
4172   bool Changed = false;
4173   const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4174   NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4175   if (ModRetainReleaseMarker) {
4176     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4177     if (Op) {
4178       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4179       if (ID) {
4180         SmallVector<StringRef, 4> ValueComp;
4181         ID->getString().split(ValueComp, "#");
4182         if (ValueComp.size() == 2) {
4183           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4184           ID = MDString::get(M.getContext(), NewValue);
4185         }
4186         M.addModuleFlag(Module::Error, MarkerKey, ID);
4187         M.eraseNamedMetadata(ModRetainReleaseMarker);
4188         Changed = true;
4189       }
4190     }
4191   }
4192   return Changed;
4193 }
4194 
4195 void llvm::UpgradeARCRuntime(Module &M) {
4196   // This lambda converts normal function calls to ARC runtime functions to
4197   // intrinsic calls.
4198   auto UpgradeToIntrinsic = [&](const char *OldFunc,
4199                                 llvm::Intrinsic::ID IntrinsicFunc) {
4200     Function *Fn = M.getFunction(OldFunc);
4201 
4202     if (!Fn)
4203       return;
4204 
4205     Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4206 
4207     for (User *U : make_early_inc_range(Fn->users())) {
4208       CallInst *CI = dyn_cast<CallInst>(U);
4209       if (!CI || CI->getCalledFunction() != Fn)
4210         continue;
4211 
4212       IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4213       FunctionType *NewFuncTy = NewFn->getFunctionType();
4214       SmallVector<Value *, 2> Args;
4215 
4216       // Don't upgrade the intrinsic if it's not valid to bitcast the return
4217       // value to the return type of the old function.
4218       if (NewFuncTy->getReturnType() != CI->getType() &&
4219           !CastInst::castIsValid(Instruction::BitCast, CI,
4220                                  NewFuncTy->getReturnType()))
4221         continue;
4222 
4223       bool InvalidCast = false;
4224 
4225       for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
4226         Value *Arg = CI->getArgOperand(I);
4227 
4228         // Bitcast argument to the parameter type of the new function if it's
4229         // not a variadic argument.
4230         if (I < NewFuncTy->getNumParams()) {
4231           // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4232           // to the parameter type of the new function.
4233           if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4234                                      NewFuncTy->getParamType(I))) {
4235             InvalidCast = true;
4236             break;
4237           }
4238           Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4239         }
4240         Args.push_back(Arg);
4241       }
4242 
4243       if (InvalidCast)
4244         continue;
4245 
4246       // Create a call instruction that calls the new function.
4247       CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4248       NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4249       NewCall->takeName(CI);
4250 
4251       // Bitcast the return value back to the type of the old call.
4252       Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4253 
4254       if (!CI->use_empty())
4255         CI->replaceAllUsesWith(NewRetVal);
4256       CI->eraseFromParent();
4257     }
4258 
4259     if (Fn->use_empty())
4260       Fn->eraseFromParent();
4261   };
4262 
4263   // Unconditionally convert a call to "clang.arc.use" to a call to
4264   // "llvm.objc.clang.arc.use".
4265   UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
4266 
4267   // Upgrade the retain release marker. If there is no need to upgrade
4268   // the marker, that means either the module is already new enough to contain
4269   // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
4270   if (!UpgradeRetainReleaseMarker(M))
4271     return;
4272 
4273   std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
4274       {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
4275       {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
4276       {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
4277       {"objc_autoreleaseReturnValue",
4278        llvm::Intrinsic::objc_autoreleaseReturnValue},
4279       {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
4280       {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
4281       {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
4282       {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
4283       {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
4284       {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
4285       {"objc_release", llvm::Intrinsic::objc_release},
4286       {"objc_retain", llvm::Intrinsic::objc_retain},
4287       {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
4288       {"objc_retainAutoreleaseReturnValue",
4289        llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
4290       {"objc_retainAutoreleasedReturnValue",
4291        llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
4292       {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
4293       {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
4294       {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
4295       {"objc_unsafeClaimAutoreleasedReturnValue",
4296        llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
4297       {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
4298       {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
4299       {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
4300       {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
4301       {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
4302       {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
4303       {"objc_arc_annotation_topdown_bbstart",
4304        llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
4305       {"objc_arc_annotation_topdown_bbend",
4306        llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
4307       {"objc_arc_annotation_bottomup_bbstart",
4308        llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
4309       {"objc_arc_annotation_bottomup_bbend",
4310        llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
4311 
4312   for (auto &I : RuntimeFuncs)
4313     UpgradeToIntrinsic(I.first, I.second);
4314 }
4315 
4316 bool llvm::UpgradeModuleFlags(Module &M) {
4317   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4318   if (!ModFlags)
4319     return false;
4320 
4321   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4322   bool HasSwiftVersionFlag = false;
4323   uint8_t SwiftMajorVersion, SwiftMinorVersion;
4324   uint32_t SwiftABIVersion;
4325   auto Int8Ty = Type::getInt8Ty(M.getContext());
4326   auto Int32Ty = Type::getInt32Ty(M.getContext());
4327 
4328   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4329     MDNode *Op = ModFlags->getOperand(I);
4330     if (Op->getNumOperands() != 3)
4331       continue;
4332     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4333     if (!ID)
4334       continue;
4335     if (ID->getString() == "Objective-C Image Info Version")
4336       HasObjCFlag = true;
4337     if (ID->getString() == "Objective-C Class Properties")
4338       HasClassProperties = true;
4339     // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
4340     // field was Error and now they are Max.
4341     if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
4342       if (auto *Behavior =
4343               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4344         if (Behavior->getLimitedValue() == Module::Error) {
4345           Type *Int32Ty = Type::getInt32Ty(M.getContext());
4346           Metadata *Ops[3] = {
4347               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
4348               MDString::get(M.getContext(), ID->getString()),
4349               Op->getOperand(2)};
4350           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4351           Changed = true;
4352         }
4353       }
4354     }
4355     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
4356     // section name so that llvm-lto will not complain about mismatching
4357     // module flags that is functionally the same.
4358     if (ID->getString() == "Objective-C Image Info Section") {
4359       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
4360         SmallVector<StringRef, 4> ValueComp;
4361         Value->getString().split(ValueComp, " ");
4362         if (ValueComp.size() != 1) {
4363           std::string NewValue;
4364           for (auto &S : ValueComp)
4365             NewValue += S.str();
4366           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
4367                               MDString::get(M.getContext(), NewValue)};
4368           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4369           Changed = true;
4370         }
4371       }
4372     }
4373 
4374     // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
4375     // If the higher bits are set, it adds new module flag for swift info.
4376     if (ID->getString() == "Objective-C Garbage Collection") {
4377       auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
4378       if (Md) {
4379         assert(Md->getValue() && "Expected non-empty metadata");
4380         auto Type = Md->getValue()->getType();
4381         if (Type == Int8Ty)
4382           continue;
4383         unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
4384         if ((Val & 0xff) != Val) {
4385           HasSwiftVersionFlag = true;
4386           SwiftABIVersion = (Val & 0xff00) >> 8;
4387           SwiftMajorVersion = (Val & 0xff000000) >> 24;
4388           SwiftMinorVersion = (Val & 0xff0000) >> 16;
4389         }
4390         Metadata *Ops[3] = {
4391           ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
4392           Op->getOperand(1),
4393           ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
4394         ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4395         Changed = true;
4396       }
4397     }
4398   }
4399 
4400   // "Objective-C Class Properties" is recently added for Objective-C. We
4401   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
4402   // flag of value 0, so we can correclty downgrade this flag when trying to
4403   // link an ObjC bitcode without this module flag with an ObjC bitcode with
4404   // this module flag.
4405   if (HasObjCFlag && !HasClassProperties) {
4406     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
4407                     (uint32_t)0);
4408     Changed = true;
4409   }
4410 
4411   if (HasSwiftVersionFlag) {
4412     M.addModuleFlag(Module::Error, "Swift ABI Version",
4413                     SwiftABIVersion);
4414     M.addModuleFlag(Module::Error, "Swift Major Version",
4415                     ConstantInt::get(Int8Ty, SwiftMajorVersion));
4416     M.addModuleFlag(Module::Error, "Swift Minor Version",
4417                     ConstantInt::get(Int8Ty, SwiftMinorVersion));
4418     Changed = true;
4419   }
4420 
4421   return Changed;
4422 }
4423 
4424 void llvm::UpgradeSectionAttributes(Module &M) {
4425   auto TrimSpaces = [](StringRef Section) -> std::string {
4426     SmallVector<StringRef, 5> Components;
4427     Section.split(Components, ',');
4428 
4429     SmallString<32> Buffer;
4430     raw_svector_ostream OS(Buffer);
4431 
4432     for (auto Component : Components)
4433       OS << ',' << Component.trim();
4434 
4435     return std::string(OS.str().substr(1));
4436   };
4437 
4438   for (auto &GV : M.globals()) {
4439     if (!GV.hasSection())
4440       continue;
4441 
4442     StringRef Section = GV.getSection();
4443 
4444     if (!Section.startswith("__DATA, __objc_catlist"))
4445       continue;
4446 
4447     // __DATA, __objc_catlist, regular, no_dead_strip
4448     // __DATA,__objc_catlist,regular,no_dead_strip
4449     GV.setSection(TrimSpaces(Section));
4450   }
4451 }
4452 
4453 namespace {
4454 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
4455 // callsites within a function that did not also have the strictfp attribute.
4456 // Since 10.0, if strict FP semantics are needed within a function, the
4457 // function must have the strictfp attribute and all calls within the function
4458 // must also have the strictfp attribute. This latter restriction is
4459 // necessary to prevent unwanted libcall simplification when a function is
4460 // being cloned (such as for inlining).
4461 //
4462 // The "dangling" strictfp attribute usage was only used to prevent constant
4463 // folding and other libcall simplification. The nobuiltin attribute on the
4464 // callsite has the same effect.
4465 struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
4466   StrictFPUpgradeVisitor() {}
4467 
4468   void visitCallBase(CallBase &Call) {
4469     if (!Call.isStrictFP())
4470       return;
4471     if (isa<ConstrainedFPIntrinsic>(&Call))
4472       return;
4473     // If we get here, the caller doesn't have the strictfp attribute
4474     // but this callsite does. Replace the strictfp attribute with nobuiltin.
4475     Call.removeFnAttr(Attribute::StrictFP);
4476     Call.addFnAttr(Attribute::NoBuiltin);
4477   }
4478 };
4479 } // namespace
4480 
4481 void llvm::UpgradeFunctionAttributes(Function &F) {
4482   // If a function definition doesn't have the strictfp attribute,
4483   // convert any callsite strictfp attributes to nobuiltin.
4484   if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
4485     StrictFPUpgradeVisitor SFPV;
4486     SFPV.visit(F);
4487   }
4488 
4489   if (F.getCallingConv() == CallingConv::X86_INTR &&
4490       !F.arg_empty() && !F.hasParamAttribute(0, Attribute::ByVal)) {
4491     Type *ByValTy = cast<PointerType>(F.getArg(0)->getType())->getElementType();
4492     Attribute NewAttr = Attribute::getWithByValType(F.getContext(), ByValTy);
4493     F.addParamAttr(0, NewAttr);
4494   }
4495 
4496   // Remove all incompatibile attributes from function.
4497   F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
4498   for (auto &Arg : F.args())
4499     Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
4500 }
4501 
4502 static bool isOldLoopArgument(Metadata *MD) {
4503   auto *T = dyn_cast_or_null<MDTuple>(MD);
4504   if (!T)
4505     return false;
4506   if (T->getNumOperands() < 1)
4507     return false;
4508   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
4509   if (!S)
4510     return false;
4511   return S->getString().startswith("llvm.vectorizer.");
4512 }
4513 
4514 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
4515   StringRef OldPrefix = "llvm.vectorizer.";
4516   assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
4517 
4518   if (OldTag == "llvm.vectorizer.unroll")
4519     return MDString::get(C, "llvm.loop.interleave.count");
4520 
4521   return MDString::get(
4522       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
4523              .str());
4524 }
4525 
4526 static Metadata *upgradeLoopArgument(Metadata *MD) {
4527   auto *T = dyn_cast_or_null<MDTuple>(MD);
4528   if (!T)
4529     return MD;
4530   if (T->getNumOperands() < 1)
4531     return MD;
4532   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
4533   if (!OldTag)
4534     return MD;
4535   if (!OldTag->getString().startswith("llvm.vectorizer."))
4536     return MD;
4537 
4538   // This has an old tag.  Upgrade it.
4539   SmallVector<Metadata *, 8> Ops;
4540   Ops.reserve(T->getNumOperands());
4541   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
4542   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
4543     Ops.push_back(T->getOperand(I));
4544 
4545   return MDTuple::get(T->getContext(), Ops);
4546 }
4547 
4548 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
4549   auto *T = dyn_cast<MDTuple>(&N);
4550   if (!T)
4551     return &N;
4552 
4553   if (none_of(T->operands(), isOldLoopArgument))
4554     return &N;
4555 
4556   SmallVector<Metadata *, 8> Ops;
4557   Ops.reserve(T->getNumOperands());
4558   for (Metadata *MD : T->operands())
4559     Ops.push_back(upgradeLoopArgument(MD));
4560 
4561   return MDTuple::get(T->getContext(), Ops);
4562 }
4563 
4564 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
4565   Triple T(TT);
4566   // For AMDGPU we uprgrade older DataLayouts to include the default globals
4567   // address space of 1.
4568   if (T.isAMDGPU() && !DL.contains("-G") && !DL.startswith("G")) {
4569     return DL.empty() ? std::string("G1") : (DL + "-G1").str();
4570   }
4571 
4572   std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
4573   // If X86, and the datalayout matches the expected format, add pointer size
4574   // address spaces to the datalayout.
4575   if (!T.isX86() || DL.contains(AddrSpaces))
4576     return std::string(DL);
4577 
4578   SmallVector<StringRef, 4> Groups;
4579   Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
4580   if (!R.match(DL, &Groups))
4581     return std::string(DL);
4582 
4583   return (Groups[1] + AddrSpaces + Groups[3]).str();
4584 }
4585 
4586 void llvm::UpgradeAttributes(AttrBuilder &B) {
4587   StringRef FramePointer;
4588   if (B.contains("no-frame-pointer-elim")) {
4589     // The value can be "true" or "false".
4590     for (const auto &I : B.td_attrs())
4591       if (I.first == "no-frame-pointer-elim")
4592         FramePointer = I.second == "true" ? "all" : "none";
4593     B.removeAttribute("no-frame-pointer-elim");
4594   }
4595   if (B.contains("no-frame-pointer-elim-non-leaf")) {
4596     // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
4597     if (FramePointer != "all")
4598       FramePointer = "non-leaf";
4599     B.removeAttribute("no-frame-pointer-elim-non-leaf");
4600   }
4601   if (!FramePointer.empty())
4602     B.addAttribute("frame-pointer", FramePointer);
4603 
4604   if (B.contains("null-pointer-is-valid")) {
4605     // The value can be "true" or "false".
4606     bool NullPointerIsValid = false;
4607     for (const auto &I : B.td_attrs())
4608       if (I.first == "null-pointer-is-valid")
4609         NullPointerIsValid = I.second == "true";
4610     B.removeAttribute("null-pointer-is-valid");
4611     if (NullPointerIsValid)
4612       B.addAttribute(Attribute::NullPointerIsValid);
4613   }
4614 }
4615