xref: /llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp (revision 403897484f939cffd9b813eb0b759d7113f5295b)
1 //===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the library calls simplifier. It does not implement
10 // any pass, but can't be used by other passes to do simplifications.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/Analysis/ConstantFolding.h"
19 #include "llvm/Analysis/Loads.h"
20 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
21 #include "llvm/Analysis/TargetLibraryInfo.h"
22 #include "llvm/Analysis/ValueTracking.h"
23 #include "llvm/IR/AttributeMask.h"
24 #include "llvm/IR/DataLayout.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/IR/PatternMatch.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/KnownBits.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/TargetParser/Triple.h"
36 #include "llvm/Transforms/Utils/BuildLibCalls.h"
37 #include "llvm/Transforms/Utils/Local.h"
38 #include "llvm/Transforms/Utils/SizeOpts.h"
39 
40 #include <cmath>
41 
42 using namespace llvm;
43 using namespace PatternMatch;
44 
45 static cl::opt<bool>
46     EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
47                          cl::init(false),
48                          cl::desc("Enable unsafe double to float "
49                                   "shrinking for math lib calls"));
50 
51 // Enable conversion of operator new calls with a MemProf hot or cold hint
52 // to an operator new call that takes a hot/cold hint. Off by default since
53 // not all allocators currently support this extension.
54 static cl::opt<bool>
55     OptimizeHotColdNew("optimize-hot-cold-new", cl::Hidden, cl::init(false),
56                        cl::desc("Enable hot/cold operator new library calls"));
57 static cl::opt<bool> OptimizeExistingHotColdNew(
58     "optimize-existing-hot-cold-new", cl::Hidden, cl::init(false),
59     cl::desc(
60         "Enable optimization of existing hot/cold operator new library calls"));
61 
62 namespace {
63 
64 // Specialized parser to ensure the hint is an 8 bit value (we can't specify
65 // uint8_t to opt<> as that is interpreted to mean that we are passing a char
66 // option with a specific set of values.
67 struct HotColdHintParser : public cl::parser<unsigned> {
68   HotColdHintParser(cl::Option &O) : cl::parser<unsigned>(O) {}
69 
70   bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, unsigned &Value) {
71     if (Arg.getAsInteger(0, Value))
72       return O.error("'" + Arg + "' value invalid for uint argument!");
73 
74     if (Value > 255)
75       return O.error("'" + Arg + "' value must be in the range [0, 255]!");
76 
77     return false;
78   }
79 };
80 
81 } // end anonymous namespace
82 
83 // Hot/cold operator new takes an 8 bit hotness hint, where 0 is the coldest
84 // and 255 is the hottest. Default to 1 value away from the coldest and hottest
85 // hints, so that the compiler hinted allocations are slightly less strong than
86 // manually inserted hints at the two extremes.
87 static cl::opt<unsigned, false, HotColdHintParser> ColdNewHintValue(
88     "cold-new-hint-value", cl::Hidden, cl::init(1),
89     cl::desc("Value to pass to hot/cold operator new for cold allocation"));
90 static cl::opt<unsigned, false, HotColdHintParser>
91     NotColdNewHintValue("notcold-new-hint-value", cl::Hidden, cl::init(128),
92                         cl::desc("Value to pass to hot/cold operator new for "
93                                  "notcold (warm) allocation"));
94 static cl::opt<unsigned, false, HotColdHintParser> HotNewHintValue(
95     "hot-new-hint-value", cl::Hidden, cl::init(254),
96     cl::desc("Value to pass to hot/cold operator new for hot allocation"));
97 
98 //===----------------------------------------------------------------------===//
99 // Helper Functions
100 //===----------------------------------------------------------------------===//
101 
102 static bool ignoreCallingConv(LibFunc Func) {
103   return Func == LibFunc_abs || Func == LibFunc_labs ||
104          Func == LibFunc_llabs || Func == LibFunc_strlen;
105 }
106 
107 /// Return true if it is only used in equality comparisons with With.
108 static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
109   for (User *U : V->users()) {
110     if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
111       if (IC->isEquality() && IC->getOperand(1) == With)
112         continue;
113     // Unknown instruction.
114     return false;
115   }
116   return true;
117 }
118 
119 static bool callHasFloatingPointArgument(const CallInst *CI) {
120   return any_of(CI->operands(), [](const Use &OI) {
121     return OI->getType()->isFloatingPointTy();
122   });
123 }
124 
125 static bool callHasFP128Argument(const CallInst *CI) {
126   return any_of(CI->operands(), [](const Use &OI) {
127     return OI->getType()->isFP128Ty();
128   });
129 }
130 
131 // Convert the entire string Str representing an integer in Base, up to
132 // the terminating nul if present, to a constant according to the rules
133 // of strtoul[l] or, when AsSigned is set, of strtol[l].  On success
134 // return the result, otherwise null.
135 // The function assumes the string is encoded in ASCII and carefully
136 // avoids converting sequences (including "") that the corresponding
137 // library call might fail and set errno for.
138 static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr,
139                               uint64_t Base, bool AsSigned, IRBuilderBase &B) {
140   if (Base < 2 || Base > 36)
141     if (Base != 0)
142       // Fail for an invalid base (required by POSIX).
143       return nullptr;
144 
145   // Current offset into the original string to reflect in EndPtr.
146   size_t Offset = 0;
147   // Strip leading whitespace.
148   for ( ; Offset != Str.size(); ++Offset)
149     if (!isSpace((unsigned char)Str[Offset])) {
150       Str = Str.substr(Offset);
151       break;
152     }
153 
154   if (Str.empty())
155     // Fail for empty subject sequences (POSIX allows but doesn't require
156     // strtol[l]/strtoul[l] to fail with EINVAL).
157     return nullptr;
158 
159   // Strip but remember the sign.
160   bool Negate = Str[0] == '-';
161   if (Str[0] == '-' || Str[0] == '+') {
162     Str = Str.drop_front();
163     if (Str.empty())
164       // Fail for a sign with nothing after it.
165       return nullptr;
166     ++Offset;
167   }
168 
169   // Set Max to the absolute value of the minimum (for signed), or
170   // to the maximum (for unsigned) value representable in the type.
171   Type *RetTy = CI->getType();
172   unsigned NBits = RetTy->getPrimitiveSizeInBits();
173   uint64_t Max = AsSigned && Negate ? 1 : 0;
174   Max += AsSigned ? maxIntN(NBits) : maxUIntN(NBits);
175 
176   // Autodetect Base if it's zero and consume the "0x" prefix.
177   if (Str.size() > 1) {
178     if (Str[0] == '0') {
179       if (toUpper((unsigned char)Str[1]) == 'X') {
180         if (Str.size() == 2 || (Base && Base != 16))
181           // Fail if Base doesn't allow the "0x" prefix or for the prefix
182           // alone that implementations like BSD set errno to EINVAL for.
183           return nullptr;
184 
185         Str = Str.drop_front(2);
186         Offset += 2;
187         Base = 16;
188       }
189       else if (Base == 0)
190         Base = 8;
191     } else if (Base == 0)
192       Base = 10;
193   }
194   else if (Base == 0)
195     Base = 10;
196 
197   // Convert the rest of the subject sequence, not including the sign,
198   // to its uint64_t representation (this assumes the source character
199   // set is ASCII).
200   uint64_t Result = 0;
201   for (unsigned i = 0; i != Str.size(); ++i) {
202     unsigned char DigVal = Str[i];
203     if (isDigit(DigVal))
204       DigVal = DigVal - '0';
205     else {
206       DigVal = toUpper(DigVal);
207       if (isAlpha(DigVal))
208         DigVal = DigVal - 'A' + 10;
209       else
210         return nullptr;
211     }
212 
213     if (DigVal >= Base)
214       // Fail if the digit is not valid in the Base.
215       return nullptr;
216 
217     // Add the digit and fail if the result is not representable in
218     // the (unsigned form of the) destination type.
219     bool VFlow;
220     Result = SaturatingMultiplyAdd(Result, Base, (uint64_t)DigVal, &VFlow);
221     if (VFlow || Result > Max)
222       return nullptr;
223   }
224 
225   if (EndPtr) {
226     // Store the pointer to the end.
227     Value *Off = B.getInt64(Offset + Str.size());
228     Value *StrBeg = CI->getArgOperand(0);
229     Value *StrEnd = B.CreateInBoundsGEP(B.getInt8Ty(), StrBeg, Off, "endptr");
230     B.CreateStore(StrEnd, EndPtr);
231   }
232 
233   if (Negate)
234     // Unsigned negation doesn't overflow.
235     Result = -Result;
236 
237   return ConstantInt::get(RetTy, Result);
238 }
239 
240 static bool isOnlyUsedInComparisonWithZero(Value *V) {
241   for (User *U : V->users()) {
242     if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
243       if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
244         if (C->isNullValue())
245           continue;
246     // Unknown instruction.
247     return false;
248   }
249   return true;
250 }
251 
252 static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
253                                  const DataLayout &DL) {
254   if (!isOnlyUsedInComparisonWithZero(CI))
255     return false;
256 
257   if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL))
258     return false;
259 
260   if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory))
261     return false;
262 
263   return true;
264 }
265 
266 static void annotateDereferenceableBytes(CallInst *CI,
267                                          ArrayRef<unsigned> ArgNos,
268                                          uint64_t DereferenceableBytes) {
269   const Function *F = CI->getCaller();
270   if (!F)
271     return;
272   for (unsigned ArgNo : ArgNos) {
273     uint64_t DerefBytes = DereferenceableBytes;
274     unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace();
275     if (!llvm::NullPointerIsDefined(F, AS) ||
276         CI->paramHasAttr(ArgNo, Attribute::NonNull))
277       DerefBytes = std::max(CI->getParamDereferenceableOrNullBytes(ArgNo),
278                             DereferenceableBytes);
279 
280     if (CI->getParamDereferenceableBytes(ArgNo) < DerefBytes) {
281       CI->removeParamAttr(ArgNo, Attribute::Dereferenceable);
282       if (!llvm::NullPointerIsDefined(F, AS) ||
283           CI->paramHasAttr(ArgNo, Attribute::NonNull))
284         CI->removeParamAttr(ArgNo, Attribute::DereferenceableOrNull);
285       CI->addParamAttr(ArgNo, Attribute::getWithDereferenceableBytes(
286                                   CI->getContext(), DerefBytes));
287     }
288   }
289 }
290 
291 static void annotateNonNullNoUndefBasedOnAccess(CallInst *CI,
292                                          ArrayRef<unsigned> ArgNos) {
293   Function *F = CI->getCaller();
294   if (!F)
295     return;
296 
297   for (unsigned ArgNo : ArgNos) {
298     if (!CI->paramHasAttr(ArgNo, Attribute::NoUndef))
299       CI->addParamAttr(ArgNo, Attribute::NoUndef);
300 
301     if (!CI->paramHasAttr(ArgNo, Attribute::NonNull)) {
302       unsigned AS =
303           CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace();
304       if (llvm::NullPointerIsDefined(F, AS))
305         continue;
306       CI->addParamAttr(ArgNo, Attribute::NonNull);
307     }
308 
309     annotateDereferenceableBytes(CI, ArgNo, 1);
310   }
311 }
312 
313 static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef<unsigned> ArgNos,
314                                Value *Size, const DataLayout &DL) {
315   if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size)) {
316     annotateNonNullNoUndefBasedOnAccess(CI, ArgNos);
317     annotateDereferenceableBytes(CI, ArgNos, LenC->getZExtValue());
318   } else if (isKnownNonZero(Size, DL)) {
319     annotateNonNullNoUndefBasedOnAccess(CI, ArgNos);
320     const APInt *X, *Y;
321     uint64_t DerefMin = 1;
322     if (match(Size, m_Select(m_Value(), m_APInt(X), m_APInt(Y)))) {
323       DerefMin = std::min(X->getZExtValue(), Y->getZExtValue());
324       annotateDereferenceableBytes(CI, ArgNos, DerefMin);
325     }
326   }
327 }
328 
329 // Copy CallInst "flags" like musttail, notail, and tail. Return New param for
330 // easier chaining. Calls to emit* and B.createCall should probably be wrapped
331 // in this function when New is created to replace Old. Callers should take
332 // care to check Old.isMustTailCall() if they aren't replacing Old directly
333 // with New.
334 static Value *copyFlags(const CallInst &Old, Value *New) {
335   assert(!Old.isMustTailCall() && "do not copy musttail call flags");
336   assert(!Old.isNoTailCall() && "do not copy notail call flags");
337   if (auto *NewCI = dyn_cast_or_null<CallInst>(New))
338     NewCI->setTailCallKind(Old.getTailCallKind());
339   return New;
340 }
341 
342 static Value *mergeAttributesAndFlags(CallInst *NewCI, const CallInst &Old) {
343   NewCI->setAttributes(AttributeList::get(
344       NewCI->getContext(), {NewCI->getAttributes(), Old.getAttributes()}));
345   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
346   return copyFlags(Old, NewCI);
347 }
348 
349 // Helper to avoid truncating the length if size_t is 32-bits.
350 static StringRef substr(StringRef Str, uint64_t Len) {
351   return Len >= Str.size() ? Str : Str.substr(0, Len);
352 }
353 
354 //===----------------------------------------------------------------------===//
355 // String and Memory Library Call Optimizations
356 //===----------------------------------------------------------------------===//
357 
358 Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilderBase &B) {
359   // Extract some information from the instruction
360   Value *Dst = CI->getArgOperand(0);
361   Value *Src = CI->getArgOperand(1);
362   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
363 
364   // See if we can get the length of the input string.
365   uint64_t Len = GetStringLength(Src);
366   if (Len)
367     annotateDereferenceableBytes(CI, 1, Len);
368   else
369     return nullptr;
370   --Len; // Unbias length.
371 
372   // Handle the simple, do-nothing case: strcat(x, "") -> x
373   if (Len == 0)
374     return Dst;
375 
376   return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, Len, B));
377 }
378 
379 Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
380                                            IRBuilderBase &B) {
381   // We need to find the end of the destination string.  That's where the
382   // memory is to be moved to. We just generate a call to strlen.
383   Value *DstLen = emitStrLen(Dst, B, DL, TLI);
384   if (!DstLen)
385     return nullptr;
386 
387   // Now that we have the destination's length, we must index into the
388   // destination's pointer to get the actual memcpy destination (end of
389   // the string .. we're concatenating).
390   Value *CpyDst = B.CreateInBoundsGEP(B.getInt8Ty(), Dst, DstLen, "endptr");
391 
392   // We have enough information to now generate the memcpy call to do the
393   // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
394   B.CreateMemCpy(
395       CpyDst, Align(1), Src, Align(1),
396       ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1));
397   return Dst;
398 }
399 
400 Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilderBase &B) {
401   // Extract some information from the instruction.
402   Value *Dst = CI->getArgOperand(0);
403   Value *Src = CI->getArgOperand(1);
404   Value *Size = CI->getArgOperand(2);
405   uint64_t Len;
406   annotateNonNullNoUndefBasedOnAccess(CI, 0);
407   if (isKnownNonZero(Size, DL))
408     annotateNonNullNoUndefBasedOnAccess(CI, 1);
409 
410   // We don't do anything if length is not constant.
411   ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size);
412   if (LengthArg) {
413     Len = LengthArg->getZExtValue();
414     // strncat(x, c, 0) -> x
415     if (!Len)
416       return Dst;
417   } else {
418     return nullptr;
419   }
420 
421   // See if we can get the length of the input string.
422   uint64_t SrcLen = GetStringLength(Src);
423   if (SrcLen) {
424     annotateDereferenceableBytes(CI, 1, SrcLen);
425     --SrcLen; // Unbias length.
426   } else {
427     return nullptr;
428   }
429 
430   // strncat(x, "", c) -> x
431   if (SrcLen == 0)
432     return Dst;
433 
434   // We don't optimize this case.
435   if (Len < SrcLen)
436     return nullptr;
437 
438   // strncat(x, s, c) -> strcat(x, s)
439   // s is constant so the strcat can be optimized further.
440   return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, SrcLen, B));
441 }
442 
443 // Helper to transform memchr(S, C, N) == S to N && *S == C and, when
444 // NBytes is null, strchr(S, C) to *S == C.  A precondition of the function
445 // is that either S is dereferenceable or the value of N is nonzero.
446 static Value* memChrToCharCompare(CallInst *CI, Value *NBytes,
447                                   IRBuilderBase &B, const DataLayout &DL)
448 {
449   Value *Src = CI->getArgOperand(0);
450   Value *CharVal = CI->getArgOperand(1);
451 
452   // Fold memchr(A, C, N) == A to N && *A == C.
453   Type *CharTy = B.getInt8Ty();
454   Value *Char0 = B.CreateLoad(CharTy, Src);
455   CharVal = B.CreateTrunc(CharVal, CharTy);
456   Value *Cmp = B.CreateICmpEQ(Char0, CharVal, "char0cmp");
457 
458   if (NBytes) {
459     Value *Zero = ConstantInt::get(NBytes->getType(), 0);
460     Value *And = B.CreateICmpNE(NBytes, Zero);
461     Cmp = B.CreateLogicalAnd(And, Cmp);
462   }
463 
464   Value *NullPtr = Constant::getNullValue(CI->getType());
465   return B.CreateSelect(Cmp, Src, NullPtr);
466 }
467 
468 Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
469   Value *SrcStr = CI->getArgOperand(0);
470   Value *CharVal = CI->getArgOperand(1);
471   annotateNonNullNoUndefBasedOnAccess(CI, 0);
472 
473   if (isOnlyUsedInEqualityComparison(CI, SrcStr))
474     return memChrToCharCompare(CI, nullptr, B, DL);
475 
476   // If the second operand is non-constant, see if we can compute the length
477   // of the input string and turn this into memchr.
478   ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
479   if (!CharC) {
480     uint64_t Len = GetStringLength(SrcStr);
481     if (Len)
482       annotateDereferenceableBytes(CI, 0, Len);
483     else
484       return nullptr;
485 
486     Function *Callee = CI->getCalledFunction();
487     FunctionType *FT = Callee->getFunctionType();
488     unsigned IntBits = TLI->getIntSize();
489     if (!FT->getParamType(1)->isIntegerTy(IntBits)) // memchr needs 'int'.
490       return nullptr;
491 
492     unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
493     Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
494     return copyFlags(*CI,
495                      emitMemChr(SrcStr, CharVal, // include nul.
496                                 ConstantInt::get(SizeTTy, Len), B,
497                                 DL, TLI));
498   }
499 
500   if (CharC->isZero()) {
501     Value *NullPtr = Constant::getNullValue(CI->getType());
502     if (isOnlyUsedInEqualityComparison(CI, NullPtr))
503       // Pre-empt the transformation to strlen below and fold
504       // strchr(A, '\0') == null to false.
505       return B.CreateIntToPtr(B.getTrue(), CI->getType());
506   }
507 
508   // Otherwise, the character is a constant, see if the first argument is
509   // a string literal.  If so, we can constant fold.
510   StringRef Str;
511   if (!getConstantStringInfo(SrcStr, Str)) {
512     if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
513       if (Value *StrLen = emitStrLen(SrcStr, B, DL, TLI))
514         return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, StrLen, "strchr");
515     return nullptr;
516   }
517 
518   // Compute the offset, make sure to handle the case when we're searching for
519   // zero (a weird way to spell strlen).
520   size_t I = (0xFF & CharC->getSExtValue()) == 0
521                  ? Str.size()
522                  : Str.find(CharC->getSExtValue());
523   if (I == StringRef::npos) // Didn't find the char.  strchr returns null.
524     return Constant::getNullValue(CI->getType());
525 
526   // strchr(s+n,c)  -> gep(s+n+i,c)
527   return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr");
528 }
529 
530 Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilderBase &B) {
531   Value *SrcStr = CI->getArgOperand(0);
532   Value *CharVal = CI->getArgOperand(1);
533   ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
534   annotateNonNullNoUndefBasedOnAccess(CI, 0);
535 
536   StringRef Str;
537   if (!getConstantStringInfo(SrcStr, Str)) {
538     // strrchr(s, 0) -> strchr(s, 0)
539     if (CharC && CharC->isZero())
540       return copyFlags(*CI, emitStrChr(SrcStr, '\0', B, TLI));
541     return nullptr;
542   }
543 
544   unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
545   Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
546 
547   // Try to expand strrchr to the memrchr nonstandard extension if it's
548   // available, or simply fail otherwise.
549   uint64_t NBytes = Str.size() + 1;   // Include the terminating nul.
550   Value *Size = ConstantInt::get(SizeTTy, NBytes);
551   return copyFlags(*CI, emitMemRChr(SrcStr, CharVal, Size, B, DL, TLI));
552 }
553 
554 Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) {
555   Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
556   if (Str1P == Str2P) // strcmp(x,x)  -> 0
557     return ConstantInt::get(CI->getType(), 0);
558 
559   StringRef Str1, Str2;
560   bool HasStr1 = getConstantStringInfo(Str1P, Str1);
561   bool HasStr2 = getConstantStringInfo(Str2P, Str2);
562 
563   // strcmp(x, y)  -> cnst  (if both x and y are constant strings)
564   if (HasStr1 && HasStr2)
565     return ConstantInt::get(CI->getType(),
566                             std::clamp(Str1.compare(Str2), -1, 1));
567 
568   if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
569     return B.CreateNeg(B.CreateZExt(
570         B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
571 
572   if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
573     return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
574                         CI->getType());
575 
576   // strcmp(P, "x") -> memcmp(P, "x", 2)
577   uint64_t Len1 = GetStringLength(Str1P);
578   if (Len1)
579     annotateDereferenceableBytes(CI, 0, Len1);
580   uint64_t Len2 = GetStringLength(Str2P);
581   if (Len2)
582     annotateDereferenceableBytes(CI, 1, Len2);
583 
584   if (Len1 && Len2) {
585     return copyFlags(
586         *CI, emitMemCmp(Str1P, Str2P,
587                         ConstantInt::get(DL.getIntPtrType(CI->getContext()),
588                                          std::min(Len1, Len2)),
589                         B, DL, TLI));
590   }
591 
592   // strcmp to memcmp
593   if (!HasStr1 && HasStr2) {
594     if (canTransformToMemCmp(CI, Str1P, Len2, DL))
595       return copyFlags(
596           *CI,
597           emitMemCmp(Str1P, Str2P,
598                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2),
599                      B, DL, TLI));
600   } else if (HasStr1 && !HasStr2) {
601     if (canTransformToMemCmp(CI, Str2P, Len1, DL))
602       return copyFlags(
603           *CI,
604           emitMemCmp(Str1P, Str2P,
605                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1),
606                      B, DL, TLI));
607   }
608 
609   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
610   return nullptr;
611 }
612 
613 // Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant
614 // arrays LHS and RHS and nonconstant Size.
615 static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS,
616                                     Value *Size, bool StrNCmp,
617                                     IRBuilderBase &B, const DataLayout &DL);
618 
619 Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) {
620   Value *Str1P = CI->getArgOperand(0);
621   Value *Str2P = CI->getArgOperand(1);
622   Value *Size = CI->getArgOperand(2);
623   if (Str1P == Str2P) // strncmp(x,x,n)  -> 0
624     return ConstantInt::get(CI->getType(), 0);
625 
626   if (isKnownNonZero(Size, DL))
627     annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
628   // Get the length argument if it is constant.
629   uint64_t Length;
630   if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size))
631     Length = LengthArg->getZExtValue();
632   else
633     return optimizeMemCmpVarSize(CI, Str1P, Str2P, Size, true, B, DL);
634 
635   if (Length == 0) // strncmp(x,y,0)   -> 0
636     return ConstantInt::get(CI->getType(), 0);
637 
638   if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
639     return copyFlags(*CI, emitMemCmp(Str1P, Str2P, Size, B, DL, TLI));
640 
641   StringRef Str1, Str2;
642   bool HasStr1 = getConstantStringInfo(Str1P, Str1);
643   bool HasStr2 = getConstantStringInfo(Str2P, Str2);
644 
645   // strncmp(x, y)  -> cnst  (if both x and y are constant strings)
646   if (HasStr1 && HasStr2) {
647     // Avoid truncating the 64-bit Length to 32 bits in ILP32.
648     StringRef SubStr1 = substr(Str1, Length);
649     StringRef SubStr2 = substr(Str2, Length);
650     return ConstantInt::get(CI->getType(),
651                             std::clamp(SubStr1.compare(SubStr2), -1, 1));
652   }
653 
654   if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
655     return B.CreateNeg(B.CreateZExt(
656         B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
657 
658   if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
659     return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
660                         CI->getType());
661 
662   uint64_t Len1 = GetStringLength(Str1P);
663   if (Len1)
664     annotateDereferenceableBytes(CI, 0, Len1);
665   uint64_t Len2 = GetStringLength(Str2P);
666   if (Len2)
667     annotateDereferenceableBytes(CI, 1, Len2);
668 
669   // strncmp to memcmp
670   if (!HasStr1 && HasStr2) {
671     Len2 = std::min(Len2, Length);
672     if (canTransformToMemCmp(CI, Str1P, Len2, DL))
673       return copyFlags(
674           *CI,
675           emitMemCmp(Str1P, Str2P,
676                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2),
677                      B, DL, TLI));
678   } else if (HasStr1 && !HasStr2) {
679     Len1 = std::min(Len1, Length);
680     if (canTransformToMemCmp(CI, Str2P, Len1, DL))
681       return copyFlags(
682           *CI,
683           emitMemCmp(Str1P, Str2P,
684                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1),
685                      B, DL, TLI));
686   }
687 
688   return nullptr;
689 }
690 
691 Value *LibCallSimplifier::optimizeStrNDup(CallInst *CI, IRBuilderBase &B) {
692   Value *Src = CI->getArgOperand(0);
693   ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
694   uint64_t SrcLen = GetStringLength(Src);
695   if (SrcLen && Size) {
696     annotateDereferenceableBytes(CI, 0, SrcLen);
697     if (SrcLen <= Size->getZExtValue() + 1)
698       return copyFlags(*CI, emitStrDup(Src, B, TLI));
699   }
700 
701   return nullptr;
702 }
703 
704 Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) {
705   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
706   if (Dst == Src) // strcpy(x,x)  -> x
707     return Src;
708 
709   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
710   // See if we can get the length of the input string.
711   uint64_t Len = GetStringLength(Src);
712   if (Len)
713     annotateDereferenceableBytes(CI, 1, Len);
714   else
715     return nullptr;
716 
717   // We have enough information to now generate the memcpy call to do the
718   // copy for us.  Make a memcpy to copy the nul byte with align = 1.
719   CallInst *NewCI =
720       B.CreateMemCpy(Dst, Align(1), Src, Align(1),
721                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
722   mergeAttributesAndFlags(NewCI, *CI);
723   return Dst;
724 }
725 
726 Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
727   Function *Callee = CI->getCalledFunction();
728   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
729 
730   // stpcpy(d,s) -> strcpy(d,s) if the result is not used.
731   if (CI->use_empty())
732     return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI));
733 
734   if (Dst == Src) { // stpcpy(x,x)  -> x+strlen(x)
735     Value *StrLen = emitStrLen(Src, B, DL, TLI);
736     return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
737   }
738 
739   // See if we can get the length of the input string.
740   uint64_t Len = GetStringLength(Src);
741   if (Len)
742     annotateDereferenceableBytes(CI, 1, Len);
743   else
744     return nullptr;
745 
746   Type *PT = Callee->getFunctionType()->getParamType(0);
747   Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
748   Value *DstEnd = B.CreateInBoundsGEP(
749       B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
750 
751   // We have enough information to now generate the memcpy call to do the
752   // copy for us.  Make a memcpy to copy the nul byte with align = 1.
753   CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), LenV);
754   mergeAttributesAndFlags(NewCI, *CI);
755   return DstEnd;
756 }
757 
758 // Optimize a call to size_t strlcpy(char*, const char*, size_t).
759 
760 Value *LibCallSimplifier::optimizeStrLCpy(CallInst *CI, IRBuilderBase &B) {
761   Value *Size = CI->getArgOperand(2);
762   if (isKnownNonZero(Size, DL))
763     // Like snprintf, the function stores into the destination only when
764     // the size argument is nonzero.
765     annotateNonNullNoUndefBasedOnAccess(CI, 0);
766   // The function reads the source argument regardless of Size (it returns
767   // its length).
768   annotateNonNullNoUndefBasedOnAccess(CI, 1);
769 
770   uint64_t NBytes;
771   if (ConstantInt *SizeC = dyn_cast<ConstantInt>(Size))
772     NBytes = SizeC->getZExtValue();
773   else
774     return nullptr;
775 
776   Value *Dst = CI->getArgOperand(0);
777   Value *Src = CI->getArgOperand(1);
778   if (NBytes <= 1) {
779     if (NBytes == 1)
780       // For a call to strlcpy(D, S, 1) first store a nul in *D.
781       B.CreateStore(B.getInt8(0), Dst);
782 
783     // Transform strlcpy(D, S, 0) to a call to strlen(S).
784     return copyFlags(*CI, emitStrLen(Src, B, DL, TLI));
785   }
786 
787   // Try to determine the length of the source, substituting its size
788   // when it's not nul-terminated (as it's required to be) to avoid
789   // reading past its end.
790   StringRef Str;
791   if (!getConstantStringInfo(Src, Str, /*TrimAtNul=*/false))
792     return nullptr;
793 
794   uint64_t SrcLen = Str.find('\0');
795   // Set if the terminating nul should be copied by the call to memcpy
796   // below.
797   bool NulTerm = SrcLen < NBytes;
798 
799   if (NulTerm)
800     // Overwrite NBytes with the number of bytes to copy, including
801     // the terminating nul.
802     NBytes = SrcLen + 1;
803   else {
804     // Set the length of the source for the function to return to its
805     // size, and cap NBytes at the same.
806     SrcLen = std::min(SrcLen, uint64_t(Str.size()));
807     NBytes = std::min(NBytes - 1, SrcLen);
808   }
809 
810   if (SrcLen == 0) {
811     // Transform strlcpy(D, "", N) to (*D = '\0, 0).
812     B.CreateStore(B.getInt8(0), Dst);
813     return ConstantInt::get(CI->getType(), 0);
814   }
815 
816   Function *Callee = CI->getCalledFunction();
817   Type *PT = Callee->getFunctionType()->getParamType(0);
818   // Transform strlcpy(D, S, N) to memcpy(D, S, N') where N' is the lower
819   // bound on strlen(S) + 1 and N, optionally followed by a nul store to
820   // D[N' - 1] if necessary.
821   CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
822                         ConstantInt::get(DL.getIntPtrType(PT), NBytes));
823   mergeAttributesAndFlags(NewCI, *CI);
824 
825   if (!NulTerm) {
826     Value *EndOff = ConstantInt::get(CI->getType(), NBytes);
827     Value *EndPtr = B.CreateInBoundsGEP(B.getInt8Ty(), Dst, EndOff);
828     B.CreateStore(B.getInt8(0), EndPtr);
829   }
830 
831   // Like snprintf, strlcpy returns the number of nonzero bytes that would
832   // have been copied if the bound had been sufficiently big (which in this
833   // case is strlen(Src)).
834   return ConstantInt::get(CI->getType(), SrcLen);
835 }
836 
837 // Optimize a call CI to either stpncpy when RetEnd is true, or to strncpy
838 // otherwise.
839 Value *LibCallSimplifier::optimizeStringNCpy(CallInst *CI, bool RetEnd,
840                                              IRBuilderBase &B) {
841   Function *Callee = CI->getCalledFunction();
842   Value *Dst = CI->getArgOperand(0);
843   Value *Src = CI->getArgOperand(1);
844   Value *Size = CI->getArgOperand(2);
845 
846   if (isKnownNonZero(Size, DL)) {
847     // Both st{p,r}ncpy(D, S, N) access the source and destination arrays
848     // only when N is nonzero.
849     annotateNonNullNoUndefBasedOnAccess(CI, 0);
850     annotateNonNullNoUndefBasedOnAccess(CI, 1);
851   }
852 
853   // If the "bound" argument is known set N to it.  Otherwise set it to
854   // UINT64_MAX and handle it later.
855   uint64_t N = UINT64_MAX;
856   if (ConstantInt *SizeC = dyn_cast<ConstantInt>(Size))
857     N = SizeC->getZExtValue();
858 
859   if (N == 0)
860     // Fold st{p,r}ncpy(D, S, 0) to D.
861     return Dst;
862 
863   if (N == 1) {
864     Type *CharTy = B.getInt8Ty();
865     Value *CharVal = B.CreateLoad(CharTy, Src, "stxncpy.char0");
866     B.CreateStore(CharVal, Dst);
867     if (!RetEnd)
868       // Transform strncpy(D, S, 1) to return (*D = *S), D.
869       return Dst;
870 
871     // Transform stpncpy(D, S, 1) to return (*D = *S) ? D + 1 : D.
872     Value *ZeroChar = ConstantInt::get(CharTy, 0);
873     Value *Cmp = B.CreateICmpEQ(CharVal, ZeroChar, "stpncpy.char0cmp");
874 
875     Value *Off1 = B.getInt32(1);
876     Value *EndPtr = B.CreateInBoundsGEP(CharTy, Dst, Off1, "stpncpy.end");
877     return B.CreateSelect(Cmp, Dst, EndPtr, "stpncpy.sel");
878   }
879 
880   // If the length of the input string is known set SrcLen to it.
881   uint64_t SrcLen = GetStringLength(Src);
882   if (SrcLen)
883     annotateDereferenceableBytes(CI, 1, SrcLen);
884   else
885     return nullptr;
886 
887   --SrcLen; // Unbias length.
888 
889   if (SrcLen == 0) {
890     // Transform st{p,r}ncpy(D, "", N) to memset(D, '\0', N) for any N.
891     Align MemSetAlign =
892       CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne();
893     CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, MemSetAlign);
894     AttrBuilder ArgAttrs(CI->getContext(), CI->getAttributes().getParamAttrs(0));
895     NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
896         CI->getContext(), 0, ArgAttrs));
897     copyFlags(*CI, NewCI);
898     return Dst;
899   }
900 
901   if (N > SrcLen + 1) {
902     if (N > 128)
903       // Bail if N is large or unknown.
904       return nullptr;
905 
906     // st{p,r}ncpy(D, "a", N) -> memcpy(D, "a\0\0\0", N) for N <= 128.
907     StringRef Str;
908     if (!getConstantStringInfo(Src, Str))
909       return nullptr;
910     std::string SrcStr = Str.str();
911     // Create a bigger, nul-padded array with the same length, SrcLen,
912     // as the original string.
913     SrcStr.resize(N, '\0');
914     Src = B.CreateGlobalString(SrcStr, "str", /*AddressSpace=*/0,
915                                /*M=*/nullptr, /*AddNull=*/false);
916   }
917 
918   Type *PT = Callee->getFunctionType()->getParamType(0);
919   // st{p,r}ncpy(D, S, N) -> memcpy(align 1 D, align 1 S, N) when both
920   // S and N are constant.
921   CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
922                                    ConstantInt::get(DL.getIntPtrType(PT), N));
923   mergeAttributesAndFlags(NewCI, *CI);
924   if (!RetEnd)
925     return Dst;
926 
927   // stpncpy(D, S, N) returns the address of the first null in D if it writes
928   // one, otherwise D + N.
929   Value *Off = B.getInt64(std::min(SrcLen, N));
930   return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, Off, "endptr");
931 }
932 
933 Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
934                                                unsigned CharSize,
935                                                Value *Bound) {
936   Value *Src = CI->getArgOperand(0);
937   Type *CharTy = B.getIntNTy(CharSize);
938 
939   if (isOnlyUsedInZeroEqualityComparison(CI) &&
940       (!Bound || isKnownNonZero(Bound, DL))) {
941     // Fold strlen:
942     //   strlen(x) != 0 --> *x != 0
943     //   strlen(x) == 0 --> *x == 0
944     // and likewise strnlen with constant N > 0:
945     //   strnlen(x, N) != 0 --> *x != 0
946     //   strnlen(x, N) == 0 --> *x == 0
947     return B.CreateZExt(B.CreateLoad(CharTy, Src, "char0"),
948                         CI->getType());
949   }
950 
951   if (Bound) {
952     if (ConstantInt *BoundCst = dyn_cast<ConstantInt>(Bound)) {
953       if (BoundCst->isZero())
954         // Fold strnlen(s, 0) -> 0 for any s, constant or otherwise.
955         return ConstantInt::get(CI->getType(), 0);
956 
957       if (BoundCst->isOne()) {
958         // Fold strnlen(s, 1) -> *s ? 1 : 0 for any s.
959         Value *CharVal = B.CreateLoad(CharTy, Src, "strnlen.char0");
960         Value *ZeroChar = ConstantInt::get(CharTy, 0);
961         Value *Cmp = B.CreateICmpNE(CharVal, ZeroChar, "strnlen.char0cmp");
962         return B.CreateZExt(Cmp, CI->getType());
963       }
964     }
965   }
966 
967   if (uint64_t Len = GetStringLength(Src, CharSize)) {
968     Value *LenC = ConstantInt::get(CI->getType(), Len - 1);
969     // Fold strlen("xyz") -> 3 and strnlen("xyz", 2) -> 2
970     // and strnlen("xyz", Bound) -> min(3, Bound) for nonconstant Bound.
971     if (Bound)
972       return B.CreateBinaryIntrinsic(Intrinsic::umin, LenC, Bound);
973     return LenC;
974   }
975 
976   if (Bound)
977     // Punt for strnlen for now.
978     return nullptr;
979 
980   // If s is a constant pointer pointing to a string literal, we can fold
981   // strlen(s + x) to strlen(s) - x, when x is known to be in the range
982   // [0, strlen(s)] or the string has a single null terminator '\0' at the end.
983   // We only try to simplify strlen when the pointer s points to an array
984   // of CharSize elements. Otherwise, we would need to scale the offset x before
985   // doing the subtraction. This will make the optimization more complex, and
986   // it's not very useful because calling strlen for a pointer of other types is
987   // very uncommon.
988   if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
989     // TODO: Handle subobjects.
990     if (!isGEPBasedOnPointerToString(GEP, CharSize))
991       return nullptr;
992 
993     ConstantDataArraySlice Slice;
994     if (getConstantDataArrayInfo(GEP->getOperand(0), Slice, CharSize)) {
995       uint64_t NullTermIdx;
996       if (Slice.Array == nullptr) {
997         NullTermIdx = 0;
998       } else {
999         NullTermIdx = ~((uint64_t)0);
1000         for (uint64_t I = 0, E = Slice.Length; I < E; ++I) {
1001           if (Slice.Array->getElementAsInteger(I + Slice.Offset) == 0) {
1002             NullTermIdx = I;
1003             break;
1004           }
1005         }
1006         // If the string does not have '\0', leave it to strlen to compute
1007         // its length.
1008         if (NullTermIdx == ~((uint64_t)0))
1009           return nullptr;
1010       }
1011 
1012       Value *Offset = GEP->getOperand(2);
1013       KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr);
1014       uint64_t ArrSize =
1015              cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
1016 
1017       // If Offset is not provably in the range [0, NullTermIdx], we can still
1018       // optimize if we can prove that the program has undefined behavior when
1019       // Offset is outside that range. That is the case when GEP->getOperand(0)
1020       // is a pointer to an object whose memory extent is NullTermIdx+1.
1021       if ((Known.isNonNegative() && Known.getMaxValue().ule(NullTermIdx)) ||
1022           (isa<GlobalVariable>(GEP->getOperand(0)) &&
1023            NullTermIdx == ArrSize - 1)) {
1024         Offset = B.CreateSExtOrTrunc(Offset, CI->getType());
1025         return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
1026                            Offset);
1027       }
1028     }
1029   }
1030 
1031   // strlen(x?"foo":"bars") --> x ? 3 : 4
1032   if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
1033     uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize);
1034     uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize);
1035     if (LenTrue && LenFalse) {
1036       ORE.emit([&]() {
1037         return OptimizationRemark("instcombine", "simplify-libcalls", CI)
1038                << "folded strlen(select) to select of constants";
1039       });
1040       return B.CreateSelect(SI->getCondition(),
1041                             ConstantInt::get(CI->getType(), LenTrue - 1),
1042                             ConstantInt::get(CI->getType(), LenFalse - 1));
1043     }
1044   }
1045 
1046   return nullptr;
1047 }
1048 
1049 Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilderBase &B) {
1050   if (Value *V = optimizeStringLength(CI, B, 8))
1051     return V;
1052   annotateNonNullNoUndefBasedOnAccess(CI, 0);
1053   return nullptr;
1054 }
1055 
1056 Value *LibCallSimplifier::optimizeStrNLen(CallInst *CI, IRBuilderBase &B) {
1057   Value *Bound = CI->getArgOperand(1);
1058   if (Value *V = optimizeStringLength(CI, B, 8, Bound))
1059     return V;
1060 
1061   if (isKnownNonZero(Bound, DL))
1062     annotateNonNullNoUndefBasedOnAccess(CI, 0);
1063   return nullptr;
1064 }
1065 
1066 Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilderBase &B) {
1067   Module &M = *CI->getModule();
1068   unsigned WCharSize = TLI->getWCharSize(M) * 8;
1069   // We cannot perform this optimization without wchar_size metadata.
1070   if (WCharSize == 0)
1071     return nullptr;
1072 
1073   return optimizeStringLength(CI, B, WCharSize);
1074 }
1075 
1076 Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilderBase &B) {
1077   StringRef S1, S2;
1078   bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
1079   bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
1080 
1081   // strpbrk(s, "") -> nullptr
1082   // strpbrk("", s) -> nullptr
1083   if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
1084     return Constant::getNullValue(CI->getType());
1085 
1086   // Constant folding.
1087   if (HasS1 && HasS2) {
1088     size_t I = S1.find_first_of(S2);
1089     if (I == StringRef::npos) // No match.
1090       return Constant::getNullValue(CI->getType());
1091 
1092     return B.CreateInBoundsGEP(B.getInt8Ty(), CI->getArgOperand(0),
1093                                B.getInt64(I), "strpbrk");
1094   }
1095 
1096   // strpbrk(s, "a") -> strchr(s, 'a')
1097   if (HasS2 && S2.size() == 1)
1098     return copyFlags(*CI, emitStrChr(CI->getArgOperand(0), S2[0], B, TLI));
1099 
1100   return nullptr;
1101 }
1102 
1103 Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilderBase &B) {
1104   Value *EndPtr = CI->getArgOperand(1);
1105   if (isa<ConstantPointerNull>(EndPtr)) {
1106     // With a null EndPtr, this function won't capture the main argument.
1107     // It would be readonly too, except that it still may write to errno.
1108     CI->addParamAttr(0, Attribute::NoCapture);
1109   }
1110 
1111   return nullptr;
1112 }
1113 
1114 Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilderBase &B) {
1115   StringRef S1, S2;
1116   bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
1117   bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
1118 
1119   // strspn(s, "") -> 0
1120   // strspn("", s) -> 0
1121   if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
1122     return Constant::getNullValue(CI->getType());
1123 
1124   // Constant folding.
1125   if (HasS1 && HasS2) {
1126     size_t Pos = S1.find_first_not_of(S2);
1127     if (Pos == StringRef::npos)
1128       Pos = S1.size();
1129     return ConstantInt::get(CI->getType(), Pos);
1130   }
1131 
1132   return nullptr;
1133 }
1134 
1135 Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilderBase &B) {
1136   StringRef S1, S2;
1137   bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
1138   bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
1139 
1140   // strcspn("", s) -> 0
1141   if (HasS1 && S1.empty())
1142     return Constant::getNullValue(CI->getType());
1143 
1144   // Constant folding.
1145   if (HasS1 && HasS2) {
1146     size_t Pos = S1.find_first_of(S2);
1147     if (Pos == StringRef::npos)
1148       Pos = S1.size();
1149     return ConstantInt::get(CI->getType(), Pos);
1150   }
1151 
1152   // strcspn(s, "") -> strlen(s)
1153   if (HasS2 && S2.empty())
1154     return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B, DL, TLI));
1155 
1156   return nullptr;
1157 }
1158 
1159 Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) {
1160   // fold strstr(x, x) -> x.
1161   if (CI->getArgOperand(0) == CI->getArgOperand(1))
1162     return CI->getArgOperand(0);
1163 
1164   // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
1165   if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
1166     Value *StrLen = emitStrLen(CI->getArgOperand(1), B, DL, TLI);
1167     if (!StrLen)
1168       return nullptr;
1169     Value *StrNCmp = emitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
1170                                  StrLen, B, DL, TLI);
1171     if (!StrNCmp)
1172       return nullptr;
1173     for (User *U : llvm::make_early_inc_range(CI->users())) {
1174       ICmpInst *Old = cast<ICmpInst>(U);
1175       Value *Cmp =
1176           B.CreateICmp(Old->getPredicate(), StrNCmp,
1177                        ConstantInt::getNullValue(StrNCmp->getType()), "cmp");
1178       replaceAllUsesWith(Old, Cmp);
1179     }
1180     return CI;
1181   }
1182 
1183   // See if either input string is a constant string.
1184   StringRef SearchStr, ToFindStr;
1185   bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr);
1186   bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr);
1187 
1188   // fold strstr(x, "") -> x.
1189   if (HasStr2 && ToFindStr.empty())
1190     return CI->getArgOperand(0);
1191 
1192   // If both strings are known, constant fold it.
1193   if (HasStr1 && HasStr2) {
1194     size_t Offset = SearchStr.find(ToFindStr);
1195 
1196     if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
1197       return Constant::getNullValue(CI->getType());
1198 
1199     // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
1200     return B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), CI->getArgOperand(0),
1201                                         Offset, "strstr");
1202   }
1203 
1204   // fold strstr(x, "y") -> strchr(x, 'y').
1205   if (HasStr2 && ToFindStr.size() == 1) {
1206     return emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
1207   }
1208 
1209   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
1210   return nullptr;
1211 }
1212 
1213 Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilderBase &B) {
1214   Value *SrcStr = CI->getArgOperand(0);
1215   Value *Size = CI->getArgOperand(2);
1216   annotateNonNullAndDereferenceable(CI, 0, Size, DL);
1217   Value *CharVal = CI->getArgOperand(1);
1218   ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
1219   Value *NullPtr = Constant::getNullValue(CI->getType());
1220 
1221   if (LenC) {
1222     if (LenC->isZero())
1223       // Fold memrchr(x, y, 0) --> null.
1224       return NullPtr;
1225 
1226     if (LenC->isOne()) {
1227       // Fold memrchr(x, y, 1) --> *x == y ? x : null for any x and y,
1228       // constant or otherwise.
1229       Value *Val = B.CreateLoad(B.getInt8Ty(), SrcStr, "memrchr.char0");
1230       // Slice off the character's high end bits.
1231       CharVal = B.CreateTrunc(CharVal, B.getInt8Ty());
1232       Value *Cmp = B.CreateICmpEQ(Val, CharVal, "memrchr.char0cmp");
1233       return B.CreateSelect(Cmp, SrcStr, NullPtr, "memrchr.sel");
1234     }
1235   }
1236 
1237   StringRef Str;
1238   if (!getConstantStringInfo(SrcStr, Str, /*TrimAtNul=*/false))
1239     return nullptr;
1240 
1241   if (Str.size() == 0)
1242     // If the array is empty fold memrchr(A, C, N) to null for any value
1243     // of C and N on the basis that the only valid value of N is zero
1244     // (otherwise the call is undefined).
1245     return NullPtr;
1246 
1247   uint64_t EndOff = UINT64_MAX;
1248   if (LenC) {
1249     EndOff = LenC->getZExtValue();
1250     if (Str.size() < EndOff)
1251       // Punt out-of-bounds accesses to sanitizers and/or libc.
1252       return nullptr;
1253   }
1254 
1255   if (ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal)) {
1256     // Fold memrchr(S, C, N) for a constant C.
1257     size_t Pos = Str.rfind(CharC->getZExtValue(), EndOff);
1258     if (Pos == StringRef::npos)
1259       // When the character is not in the source array fold the result
1260       // to null regardless of Size.
1261       return NullPtr;
1262 
1263     if (LenC)
1264       // Fold memrchr(s, c, N) --> s + Pos for constant N > Pos.
1265       return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(Pos));
1266 
1267     if (Str.find(Str[Pos]) == Pos) {
1268       // When there is just a single occurrence of C in S, i.e., the one
1269       // in Str[Pos], fold
1270       //   memrchr(s, c, N) --> N <= Pos ? null : s + Pos
1271       // for nonconstant N.
1272       Value *Cmp = B.CreateICmpULE(Size, ConstantInt::get(Size->getType(), Pos),
1273                                    "memrchr.cmp");
1274       Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr,
1275                                            B.getInt64(Pos), "memrchr.ptr_plus");
1276       return B.CreateSelect(Cmp, NullPtr, SrcPlus, "memrchr.sel");
1277     }
1278   }
1279 
1280   // Truncate the string to search at most EndOff characters.
1281   Str = Str.substr(0, EndOff);
1282   if (Str.find_first_not_of(Str[0]) != StringRef::npos)
1283     return nullptr;
1284 
1285   // If the source array consists of all equal characters, then for any
1286   // C and N (whether in bounds or not), fold memrchr(S, C, N) to
1287   //   N != 0 && *S == C ? S + N - 1 : null
1288   Type *SizeTy = Size->getType();
1289   Type *Int8Ty = B.getInt8Ty();
1290   Value *NNeZ = B.CreateICmpNE(Size, ConstantInt::get(SizeTy, 0));
1291   // Slice off the sought character's high end bits.
1292   CharVal = B.CreateTrunc(CharVal, Int8Ty);
1293   Value *CEqS0 = B.CreateICmpEQ(ConstantInt::get(Int8Ty, Str[0]), CharVal);
1294   Value *And = B.CreateLogicalAnd(NNeZ, CEqS0);
1295   Value *SizeM1 = B.CreateSub(Size, ConstantInt::get(SizeTy, 1));
1296   Value *SrcPlus =
1297       B.CreateInBoundsGEP(Int8Ty, SrcStr, SizeM1, "memrchr.ptr_plus");
1298   return B.CreateSelect(And, SrcPlus, NullPtr, "memrchr.sel");
1299 }
1300 
1301 Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
1302   Value *SrcStr = CI->getArgOperand(0);
1303   Value *Size = CI->getArgOperand(2);
1304 
1305   if (isKnownNonZero(Size, DL)) {
1306     annotateNonNullNoUndefBasedOnAccess(CI, 0);
1307     if (isOnlyUsedInEqualityComparison(CI, SrcStr))
1308       return memChrToCharCompare(CI, Size, B, DL);
1309   }
1310 
1311   Value *CharVal = CI->getArgOperand(1);
1312   ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
1313   ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
1314   Value *NullPtr = Constant::getNullValue(CI->getType());
1315 
1316   // memchr(x, y, 0) -> null
1317   if (LenC) {
1318     if (LenC->isZero())
1319       return NullPtr;
1320 
1321     if (LenC->isOne()) {
1322       // Fold memchr(x, y, 1) --> *x == y ? x : null for any x and y,
1323       // constant or otherwise.
1324       Value *Val = B.CreateLoad(B.getInt8Ty(), SrcStr, "memchr.char0");
1325       // Slice off the character's high end bits.
1326       CharVal = B.CreateTrunc(CharVal, B.getInt8Ty());
1327       Value *Cmp = B.CreateICmpEQ(Val, CharVal, "memchr.char0cmp");
1328       return B.CreateSelect(Cmp, SrcStr, NullPtr, "memchr.sel");
1329     }
1330   }
1331 
1332   StringRef Str;
1333   if (!getConstantStringInfo(SrcStr, Str, /*TrimAtNul=*/false))
1334     return nullptr;
1335 
1336   if (CharC) {
1337     size_t Pos = Str.find(CharC->getZExtValue());
1338     if (Pos == StringRef::npos)
1339       // When the character is not in the source array fold the result
1340       // to null regardless of Size.
1341       return NullPtr;
1342 
1343     // Fold memchr(s, c, n) -> n <= Pos ? null : s + Pos
1344     // When the constant Size is less than or equal to the character
1345     // position also fold the result to null.
1346     Value *Cmp = B.CreateICmpULE(Size, ConstantInt::get(Size->getType(), Pos),
1347                                  "memchr.cmp");
1348     Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(Pos),
1349                                          "memchr.ptr");
1350     return B.CreateSelect(Cmp, NullPtr, SrcPlus);
1351   }
1352 
1353   if (Str.size() == 0)
1354     // If the array is empty fold memchr(A, C, N) to null for any value
1355     // of C and N on the basis that the only valid value of N is zero
1356     // (otherwise the call is undefined).
1357     return NullPtr;
1358 
1359   if (LenC)
1360     Str = substr(Str, LenC->getZExtValue());
1361 
1362   size_t Pos = Str.find_first_not_of(Str[0]);
1363   if (Pos == StringRef::npos
1364       || Str.find_first_not_of(Str[Pos], Pos) == StringRef::npos) {
1365     // If the source array consists of at most two consecutive sequences
1366     // of the same characters, then for any C and N (whether in bounds or
1367     // not), fold memchr(S, C, N) to
1368     //   N != 0 && *S == C ? S : null
1369     // or for the two sequences to:
1370     //   N != 0 && *S == C ? S : (N > Pos && S[Pos] == C ? S + Pos : null)
1371     //   ^Sel2                   ^Sel1 are denoted above.
1372     // The latter makes it also possible to fold strchr() calls with strings
1373     // of the same characters.
1374     Type *SizeTy = Size->getType();
1375     Type *Int8Ty = B.getInt8Ty();
1376 
1377     // Slice off the sought character's high end bits.
1378     CharVal = B.CreateTrunc(CharVal, Int8Ty);
1379 
1380     Value *Sel1 = NullPtr;
1381     if (Pos != StringRef::npos) {
1382       // Handle two consecutive sequences of the same characters.
1383       Value *PosVal = ConstantInt::get(SizeTy, Pos);
1384       Value *StrPos = ConstantInt::get(Int8Ty, Str[Pos]);
1385       Value *CEqSPos = B.CreateICmpEQ(CharVal, StrPos);
1386       Value *NGtPos = B.CreateICmp(ICmpInst::ICMP_UGT, Size, PosVal);
1387       Value *And = B.CreateAnd(CEqSPos, NGtPos);
1388       Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, PosVal);
1389       Sel1 = B.CreateSelect(And, SrcPlus, NullPtr, "memchr.sel1");
1390     }
1391 
1392     Value *Str0 = ConstantInt::get(Int8Ty, Str[0]);
1393     Value *CEqS0 = B.CreateICmpEQ(Str0, CharVal);
1394     Value *NNeZ = B.CreateICmpNE(Size, ConstantInt::get(SizeTy, 0));
1395     Value *And = B.CreateAnd(NNeZ, CEqS0);
1396     return B.CreateSelect(And, SrcStr, Sel1, "memchr.sel2");
1397   }
1398 
1399   if (!LenC) {
1400     if (isOnlyUsedInEqualityComparison(CI, SrcStr))
1401       // S is dereferenceable so it's safe to load from it and fold
1402       //   memchr(S, C, N) == S to N && *S == C for any C and N.
1403       // TODO: This is safe even for nonconstant S.
1404       return memChrToCharCompare(CI, Size, B, DL);
1405 
1406     // From now on we need a constant length and constant array.
1407     return nullptr;
1408   }
1409 
1410   bool OptForSize = CI->getFunction()->hasOptSize() ||
1411                     llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
1412                                                 PGSOQueryType::IRPass);
1413 
1414   // If the char is variable but the input str and length are not we can turn
1415   // this memchr call into a simple bit field test. Of course this only works
1416   // when the return value is only checked against null.
1417   //
1418   // It would be really nice to reuse switch lowering here but we can't change
1419   // the CFG at this point.
1420   //
1421   // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n')))
1422   // != 0
1423   //   after bounds check.
1424   if (OptForSize || Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI))
1425     return nullptr;
1426 
1427   unsigned char Max =
1428       *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()),
1429                         reinterpret_cast<const unsigned char *>(Str.end()));
1430 
1431   // Make sure the bit field we're about to create fits in a register on the
1432   // target.
1433   // FIXME: On a 64 bit architecture this prevents us from using the
1434   // interesting range of alpha ascii chars. We could do better by emitting
1435   // two bitfields or shifting the range by 64 if no lower chars are used.
1436   if (!DL.fitsInLegalInteger(Max + 1)) {
1437     // Build chain of ORs
1438     // Transform:
1439     //    memchr("abcd", C, 4) != nullptr
1440     // to:
1441     //    (C == 'a' || C == 'b' || C == 'c' || C == 'd') != 0
1442     std::string SortedStr = Str.str();
1443     llvm::sort(SortedStr);
1444     // Compute the number of of non-contiguous ranges.
1445     unsigned NonContRanges = 1;
1446     for (size_t i = 1; i < SortedStr.size(); ++i) {
1447       if (SortedStr[i] > SortedStr[i - 1] + 1) {
1448         NonContRanges++;
1449       }
1450     }
1451 
1452     // Restrict this optimization to profitable cases with one or two range
1453     // checks.
1454     if (NonContRanges > 2)
1455       return nullptr;
1456 
1457     // Slice off the character's high end bits.
1458     CharVal = B.CreateTrunc(CharVal, B.getInt8Ty());
1459 
1460     SmallVector<Value *> CharCompares;
1461     for (unsigned char C : SortedStr)
1462       CharCompares.push_back(B.CreateICmpEQ(CharVal, B.getInt8(C)));
1463 
1464     return B.CreateIntToPtr(B.CreateOr(CharCompares), CI->getType());
1465   }
1466 
1467   // For the bit field use a power-of-2 type with at least 8 bits to avoid
1468   // creating unnecessary illegal types.
1469   unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max));
1470 
1471   // Now build the bit field.
1472   APInt Bitfield(Width, 0);
1473   for (char C : Str)
1474     Bitfield.setBit((unsigned char)C);
1475   Value *BitfieldC = B.getInt(Bitfield);
1476 
1477   // Adjust width of "C" to the bitfield width, then mask off the high bits.
1478   Value *C = B.CreateZExtOrTrunc(CharVal, BitfieldC->getType());
1479   C = B.CreateAnd(C, B.getIntN(Width, 0xFF));
1480 
1481   // First check that the bit field access is within bounds.
1482   Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width),
1483                                "memchr.bounds");
1484 
1485   // Create code that checks if the given bit is set in the field.
1486   Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C);
1487   Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits");
1488 
1489   // Finally merge both checks and cast to pointer type. The inttoptr
1490   // implicitly zexts the i1 to intptr type.
1491   return B.CreateIntToPtr(B.CreateLogicalAnd(Bounds, Bits, "memchr"),
1492                           CI->getType());
1493 }
1494 
1495 // Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant
1496 // arrays LHS and RHS and nonconstant Size.
1497 static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS,
1498                                     Value *Size, bool StrNCmp,
1499                                     IRBuilderBase &B, const DataLayout &DL) {
1500   if (LHS == RHS) // memcmp(s,s,x) -> 0
1501     return Constant::getNullValue(CI->getType());
1502 
1503   StringRef LStr, RStr;
1504   if (!getConstantStringInfo(LHS, LStr, /*TrimAtNul=*/false) ||
1505       !getConstantStringInfo(RHS, RStr, /*TrimAtNul=*/false))
1506     return nullptr;
1507 
1508   // If the contents of both constant arrays are known, fold a call to
1509   // memcmp(A, B, N) to
1510   //   N <= Pos ? 0 : (A < B ? -1 : B < A ? +1 : 0)
1511   // where Pos is the first mismatch between A and B, determined below.
1512 
1513   uint64_t Pos = 0;
1514   Value *Zero = ConstantInt::get(CI->getType(), 0);
1515   for (uint64_t MinSize = std::min(LStr.size(), RStr.size()); ; ++Pos) {
1516     if (Pos == MinSize ||
1517         (StrNCmp && (LStr[Pos] == '\0' && RStr[Pos] == '\0'))) {
1518       // One array is a leading part of the other of equal or greater
1519       // size, or for strncmp, the arrays are equal strings.
1520       // Fold the result to zero.  Size is assumed to be in bounds, since
1521       // otherwise the call would be undefined.
1522       return Zero;
1523     }
1524 
1525     if (LStr[Pos] != RStr[Pos])
1526       break;
1527   }
1528 
1529   // Normalize the result.
1530   typedef unsigned char UChar;
1531   int IRes = UChar(LStr[Pos]) < UChar(RStr[Pos]) ? -1 : 1;
1532   Value *MaxSize = ConstantInt::get(Size->getType(), Pos);
1533   Value *Cmp = B.CreateICmp(ICmpInst::ICMP_ULE, Size, MaxSize);
1534   Value *Res = ConstantInt::get(CI->getType(), IRes);
1535   return B.CreateSelect(Cmp, Zero, Res);
1536 }
1537 
1538 // Optimize a memcmp call CI with constant size Len.
1539 static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
1540                                          uint64_t Len, IRBuilderBase &B,
1541                                          const DataLayout &DL) {
1542   if (Len == 0) // memcmp(s1,s2,0) -> 0
1543     return Constant::getNullValue(CI->getType());
1544 
1545   // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
1546   if (Len == 1) {
1547     Value *LHSV = B.CreateZExt(B.CreateLoad(B.getInt8Ty(), LHS, "lhsc"),
1548                                CI->getType(), "lhsv");
1549     Value *RHSV = B.CreateZExt(B.CreateLoad(B.getInt8Ty(), RHS, "rhsc"),
1550                                CI->getType(), "rhsv");
1551     return B.CreateSub(LHSV, RHSV, "chardiff");
1552   }
1553 
1554   // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
1555   // TODO: The case where both inputs are constants does not need to be limited
1556   // to legal integers or equality comparison. See block below this.
1557   if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
1558     IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
1559     Align PrefAlignment = DL.getPrefTypeAlign(IntType);
1560 
1561     // First, see if we can fold either argument to a constant.
1562     Value *LHSV = nullptr;
1563     if (auto *LHSC = dyn_cast<Constant>(LHS))
1564       LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL);
1565 
1566     Value *RHSV = nullptr;
1567     if (auto *RHSC = dyn_cast<Constant>(RHS))
1568       RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL);
1569 
1570     // Don't generate unaligned loads. If either source is constant data,
1571     // alignment doesn't matter for that source because there is no load.
1572     if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) &&
1573         (RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) {
1574       if (!LHSV)
1575         LHSV = B.CreateLoad(IntType, LHS, "lhsv");
1576       if (!RHSV)
1577         RHSV = B.CreateLoad(IntType, RHS, "rhsv");
1578       return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
1579     }
1580   }
1581 
1582   return nullptr;
1583 }
1584 
1585 // Most simplifications for memcmp also apply to bcmp.
1586 Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
1587                                                    IRBuilderBase &B) {
1588   Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
1589   Value *Size = CI->getArgOperand(2);
1590 
1591   annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
1592 
1593   if (Value *Res = optimizeMemCmpVarSize(CI, LHS, RHS, Size, false, B, DL))
1594     return Res;
1595 
1596   // Handle constant Size.
1597   ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
1598   if (!LenC)
1599     return nullptr;
1600 
1601   return optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL);
1602 }
1603 
1604 Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilderBase &B) {
1605   Module *M = CI->getModule();
1606   if (Value *V = optimizeMemCmpBCmpCommon(CI, B))
1607     return V;
1608 
1609   // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
1610   // bcmp can be more efficient than memcmp because it only has to know that
1611   // there is a difference, not how different one is to the other.
1612   if (isLibFuncEmittable(M, TLI, LibFunc_bcmp) &&
1613       isOnlyUsedInZeroEqualityComparison(CI)) {
1614     Value *LHS = CI->getArgOperand(0);
1615     Value *RHS = CI->getArgOperand(1);
1616     Value *Size = CI->getArgOperand(2);
1617     return copyFlags(*CI, emitBCmp(LHS, RHS, Size, B, DL, TLI));
1618   }
1619 
1620   return nullptr;
1621 }
1622 
1623 Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilderBase &B) {
1624   return optimizeMemCmpBCmpCommon(CI, B);
1625 }
1626 
1627 Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilderBase &B) {
1628   Value *Size = CI->getArgOperand(2);
1629   annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
1630   if (isa<IntrinsicInst>(CI))
1631     return nullptr;
1632 
1633   // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
1634   CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align(1),
1635                                    CI->getArgOperand(1), Align(1), Size);
1636   mergeAttributesAndFlags(NewCI, *CI);
1637   return CI->getArgOperand(0);
1638 }
1639 
1640 Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilderBase &B) {
1641   Value *Dst = CI->getArgOperand(0);
1642   Value *Src = CI->getArgOperand(1);
1643   ConstantInt *StopChar = dyn_cast<ConstantInt>(CI->getArgOperand(2));
1644   ConstantInt *N = dyn_cast<ConstantInt>(CI->getArgOperand(3));
1645   StringRef SrcStr;
1646   if (CI->use_empty() && Dst == Src)
1647     return Dst;
1648   // memccpy(d, s, c, 0) -> nullptr
1649   if (N) {
1650     if (N->isNullValue())
1651       return Constant::getNullValue(CI->getType());
1652     if (!getConstantStringInfo(Src, SrcStr, /*TrimAtNul=*/false) ||
1653         // TODO: Handle zeroinitializer.
1654         !StopChar)
1655       return nullptr;
1656   } else {
1657     return nullptr;
1658   }
1659 
1660   // Wrap arg 'c' of type int to char
1661   size_t Pos = SrcStr.find(StopChar->getSExtValue() & 0xFF);
1662   if (Pos == StringRef::npos) {
1663     if (N->getZExtValue() <= SrcStr.size()) {
1664       copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1),
1665                                     CI->getArgOperand(3)));
1666       return Constant::getNullValue(CI->getType());
1667     }
1668     return nullptr;
1669   }
1670 
1671   Value *NewN =
1672       ConstantInt::get(N->getType(), std::min(uint64_t(Pos + 1), N->getZExtValue()));
1673   // memccpy -> llvm.memcpy
1674   copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1), NewN));
1675   return Pos + 1 <= N->getZExtValue()
1676              ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, NewN)
1677              : Constant::getNullValue(CI->getType());
1678 }
1679 
1680 Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilderBase &B) {
1681   Value *Dst = CI->getArgOperand(0);
1682   Value *N = CI->getArgOperand(2);
1683   // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n
1684   CallInst *NewCI =
1685       B.CreateMemCpy(Dst, Align(1), CI->getArgOperand(1), Align(1), N);
1686   // Propagate attributes, but memcpy has no return value, so make sure that
1687   // any return attributes are compliant.
1688   // TODO: Attach return value attributes to the 1st operand to preserve them?
1689   mergeAttributesAndFlags(NewCI, *CI);
1690   return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
1691 }
1692 
1693 Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilderBase &B) {
1694   Value *Size = CI->getArgOperand(2);
1695   annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
1696   if (isa<IntrinsicInst>(CI))
1697     return nullptr;
1698 
1699   // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n)
1700   CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align(1),
1701                                     CI->getArgOperand(1), Align(1), Size);
1702   mergeAttributesAndFlags(NewCI, *CI);
1703   return CI->getArgOperand(0);
1704 }
1705 
1706 Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilderBase &B) {
1707   Value *Size = CI->getArgOperand(2);
1708   annotateNonNullAndDereferenceable(CI, 0, Size, DL);
1709   if (isa<IntrinsicInst>(CI))
1710     return nullptr;
1711 
1712   // memset(p, v, n) -> llvm.memset(align 1 p, v, n)
1713   Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
1714   CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1));
1715   mergeAttributesAndFlags(NewCI, *CI);
1716   return CI->getArgOperand(0);
1717 }
1718 
1719 Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) {
1720   if (isa<ConstantPointerNull>(CI->getArgOperand(0)))
1721     return copyFlags(*CI, emitMalloc(CI->getArgOperand(1), B, DL, TLI));
1722 
1723   return nullptr;
1724 }
1725 
1726 // When enabled, replace operator new() calls marked with a hot or cold memprof
1727 // attribute with an operator new() call that takes a __hot_cold_t parameter.
1728 // Currently this is supported by the open source version of tcmalloc, see:
1729 // https://github.com/google/tcmalloc/blob/master/tcmalloc/new_extension.h
1730 Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,
1731                                       LibFunc &Func) {
1732   if (!OptimizeHotColdNew)
1733     return nullptr;
1734 
1735   uint8_t HotCold;
1736   if (CI->getAttributes().getFnAttr("memprof").getValueAsString() == "cold")
1737     HotCold = ColdNewHintValue;
1738   else if (CI->getAttributes().getFnAttr("memprof").getValueAsString() ==
1739            "notcold")
1740     HotCold = NotColdNewHintValue;
1741   else if (CI->getAttributes().getFnAttr("memprof").getValueAsString() == "hot")
1742     HotCold = HotNewHintValue;
1743   else
1744     return nullptr;
1745 
1746   // For calls that already pass a hot/cold hint, only update the hint if
1747   // directed by OptimizeExistingHotColdNew. For other calls to new, add a hint
1748   // if cold or hot, and leave as-is for default handling if "notcold" aka warm.
1749   // Note that in cases where we decide it is "notcold", it might be slightly
1750   // better to replace the hinted call with a non hinted call, to avoid the
1751   // extra parameter and the if condition check of the hint value in the
1752   // allocator. This can be considered in the future.
1753   switch (Func) {
1754   case LibFunc_Znwm12__hot_cold_t:
1755     if (OptimizeExistingHotColdNew)
1756       return emitHotColdNew(CI->getArgOperand(0), B, TLI,
1757                             LibFunc_Znwm12__hot_cold_t, HotCold);
1758     break;
1759   case LibFunc_Znwm:
1760     if (HotCold != NotColdNewHintValue)
1761       return emitHotColdNew(CI->getArgOperand(0), B, TLI,
1762                             LibFunc_Znwm12__hot_cold_t, HotCold);
1763     break;
1764   case LibFunc_Znam12__hot_cold_t:
1765     if (OptimizeExistingHotColdNew)
1766       return emitHotColdNew(CI->getArgOperand(0), B, TLI,
1767                             LibFunc_Znam12__hot_cold_t, HotCold);
1768     break;
1769   case LibFunc_Znam:
1770     if (HotCold != NotColdNewHintValue)
1771       return emitHotColdNew(CI->getArgOperand(0), B, TLI,
1772                             LibFunc_Znam12__hot_cold_t, HotCold);
1773     break;
1774   case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
1775     if (OptimizeExistingHotColdNew)
1776       return emitHotColdNewNoThrow(
1777           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1778           LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold);
1779     break;
1780   case LibFunc_ZnwmRKSt9nothrow_t:
1781     if (HotCold != NotColdNewHintValue)
1782       return emitHotColdNewNoThrow(
1783           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1784           LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold);
1785     break;
1786   case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
1787     if (OptimizeExistingHotColdNew)
1788       return emitHotColdNewNoThrow(
1789           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1790           LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold);
1791     break;
1792   case LibFunc_ZnamRKSt9nothrow_t:
1793     if (HotCold != NotColdNewHintValue)
1794       return emitHotColdNewNoThrow(
1795           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1796           LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold);
1797     break;
1798   case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
1799     if (OptimizeExistingHotColdNew)
1800       return emitHotColdNewAligned(
1801           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1802           LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold);
1803     break;
1804   case LibFunc_ZnwmSt11align_val_t:
1805     if (HotCold != NotColdNewHintValue)
1806       return emitHotColdNewAligned(
1807           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1808           LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold);
1809     break;
1810   case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
1811     if (OptimizeExistingHotColdNew)
1812       return emitHotColdNewAligned(
1813           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1814           LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold);
1815     break;
1816   case LibFunc_ZnamSt11align_val_t:
1817     if (HotCold != NotColdNewHintValue)
1818       return emitHotColdNewAligned(
1819           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1820           LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold);
1821     break;
1822   case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
1823     if (OptimizeExistingHotColdNew)
1824       return emitHotColdNewAlignedNoThrow(
1825           CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
1826           TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
1827           HotCold);
1828     break;
1829   case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
1830     if (HotCold != NotColdNewHintValue)
1831       return emitHotColdNewAlignedNoThrow(
1832           CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
1833           TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
1834           HotCold);
1835     break;
1836   case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
1837     if (OptimizeExistingHotColdNew)
1838       return emitHotColdNewAlignedNoThrow(
1839           CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
1840           TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
1841           HotCold);
1842     break;
1843   case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
1844     if (HotCold != NotColdNewHintValue)
1845       return emitHotColdNewAlignedNoThrow(
1846           CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
1847           TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
1848           HotCold);
1849     break;
1850   case LibFunc_size_returning_new:
1851     if (HotCold != NotColdNewHintValue)
1852       return emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI,
1853                                          LibFunc_size_returning_new_hot_cold,
1854                                          HotCold);
1855     break;
1856   case LibFunc_size_returning_new_hot_cold:
1857     if (OptimizeExistingHotColdNew)
1858       return emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI,
1859                                          LibFunc_size_returning_new_hot_cold,
1860                                          HotCold);
1861     break;
1862   case LibFunc_size_returning_new_aligned:
1863     if (HotCold != NotColdNewHintValue)
1864       return emitHotColdSizeReturningNewAligned(
1865           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1866           LibFunc_size_returning_new_aligned_hot_cold, HotCold);
1867     break;
1868   case LibFunc_size_returning_new_aligned_hot_cold:
1869     if (OptimizeExistingHotColdNew)
1870       return emitHotColdSizeReturningNewAligned(
1871           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1872           LibFunc_size_returning_new_aligned_hot_cold, HotCold);
1873     break;
1874   default:
1875     return nullptr;
1876   }
1877   return nullptr;
1878 }
1879 
1880 //===----------------------------------------------------------------------===//
1881 // Math Library Optimizations
1882 //===----------------------------------------------------------------------===//
1883 
1884 // Replace a libcall \p CI with a call to intrinsic \p IID
1885 static Value *replaceUnaryCall(CallInst *CI, IRBuilderBase &B,
1886                                Intrinsic::ID IID) {
1887   CallInst *NewCall = B.CreateUnaryIntrinsic(IID, CI->getArgOperand(0), CI);
1888   NewCall->takeName(CI);
1889   return copyFlags(*CI, NewCall);
1890 }
1891 
1892 /// Return a variant of Val with float type.
1893 /// Currently this works in two cases: If Val is an FPExtension of a float
1894 /// value to something bigger, simply return the operand.
1895 /// If Val is a ConstantFP but can be converted to a float ConstantFP without
1896 /// loss of precision do so.
1897 static Value *valueHasFloatPrecision(Value *Val) {
1898   if (FPExtInst *Cast = dyn_cast<FPExtInst>(Val)) {
1899     Value *Op = Cast->getOperand(0);
1900     if (Op->getType()->isFloatTy())
1901       return Op;
1902   }
1903   if (ConstantFP *Const = dyn_cast<ConstantFP>(Val)) {
1904     APFloat F = Const->getValueAPF();
1905     bool losesInfo;
1906     (void)F.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
1907                     &losesInfo);
1908     if (!losesInfo)
1909       return ConstantFP::get(Const->getContext(), F);
1910   }
1911   return nullptr;
1912 }
1913 
1914 /// Shrink double -> float functions.
1915 static Value *optimizeDoubleFP(CallInst *CI, IRBuilderBase &B,
1916                                bool isBinary, const TargetLibraryInfo *TLI,
1917                                bool isPrecise = false) {
1918   Function *CalleeFn = CI->getCalledFunction();
1919   if (!CI->getType()->isDoubleTy() || !CalleeFn)
1920     return nullptr;
1921 
1922   // If not all the uses of the function are converted to float, then bail out.
1923   // This matters if the precision of the result is more important than the
1924   // precision of the arguments.
1925   if (isPrecise)
1926     for (User *U : CI->users()) {
1927       FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
1928       if (!Cast || !Cast->getType()->isFloatTy())
1929         return nullptr;
1930     }
1931 
1932   // If this is something like 'g((double) float)', convert to 'gf(float)'.
1933   Value *V[2];
1934   V[0] = valueHasFloatPrecision(CI->getArgOperand(0));
1935   V[1] = isBinary ? valueHasFloatPrecision(CI->getArgOperand(1)) : nullptr;
1936   if (!V[0] || (isBinary && !V[1]))
1937     return nullptr;
1938 
1939   // If call isn't an intrinsic, check that it isn't within a function with the
1940   // same name as the float version of this call, otherwise the result is an
1941   // infinite loop.  For example, from MinGW-w64:
1942   //
1943   // float expf(float val) { return (float) exp((double) val); }
1944   StringRef CalleeName = CalleeFn->getName();
1945   bool IsIntrinsic = CalleeFn->isIntrinsic();
1946   if (!IsIntrinsic) {
1947     StringRef CallerName = CI->getFunction()->getName();
1948     if (!CallerName.empty() && CallerName.back() == 'f' &&
1949         CallerName.size() == (CalleeName.size() + 1) &&
1950         CallerName.starts_with(CalleeName))
1951       return nullptr;
1952   }
1953 
1954   // Propagate the math semantics from the current function to the new function.
1955   IRBuilderBase::FastMathFlagGuard Guard(B);
1956   B.setFastMathFlags(CI->getFastMathFlags());
1957 
1958   // g((double) float) -> (double) gf(float)
1959   Value *R;
1960   if (IsIntrinsic) {
1961     Module *M = CI->getModule();
1962     Intrinsic::ID IID = CalleeFn->getIntrinsicID();
1963     Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
1964     R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]);
1965   } else {
1966     AttributeList CalleeAttrs = CalleeFn->getAttributes();
1967     R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], TLI, CalleeName, B,
1968                                          CalleeAttrs)
1969                  : emitUnaryFloatFnCall(V[0], TLI, CalleeName, B, CalleeAttrs);
1970   }
1971   return B.CreateFPExt(R, B.getDoubleTy());
1972 }
1973 
1974 /// Shrink double -> float for unary functions.
1975 static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilderBase &B,
1976                                     const TargetLibraryInfo *TLI,
1977                                     bool isPrecise = false) {
1978   return optimizeDoubleFP(CI, B, false, TLI, isPrecise);
1979 }
1980 
1981 /// Shrink double -> float for binary functions.
1982 static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilderBase &B,
1983                                      const TargetLibraryInfo *TLI,
1984                                      bool isPrecise = false) {
1985   return optimizeDoubleFP(CI, B, true, TLI, isPrecise);
1986 }
1987 
1988 // cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z)))
1989 Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilderBase &B) {
1990   Value *Real, *Imag;
1991 
1992   if (CI->arg_size() == 1) {
1993 
1994     if (!CI->isFast())
1995       return nullptr;
1996 
1997     Value *Op = CI->getArgOperand(0);
1998     assert(Op->getType()->isArrayTy() && "Unexpected signature for cabs!");
1999 
2000     Real = B.CreateExtractValue(Op, 0, "real");
2001     Imag = B.CreateExtractValue(Op, 1, "imag");
2002 
2003   } else {
2004     assert(CI->arg_size() == 2 && "Unexpected signature for cabs!");
2005 
2006     Real = CI->getArgOperand(0);
2007     Imag = CI->getArgOperand(1);
2008 
2009     // if real or imaginary part is zero, simplify to abs(cimag(z))
2010     // or abs(creal(z))
2011     Value *AbsOp = nullptr;
2012     if (ConstantFP *ConstReal = dyn_cast<ConstantFP>(Real)) {
2013       if (ConstReal->isZero())
2014         AbsOp = Imag;
2015 
2016     } else if (ConstantFP *ConstImag = dyn_cast<ConstantFP>(Imag)) {
2017       if (ConstImag->isZero())
2018         AbsOp = Real;
2019     }
2020 
2021     if (AbsOp) {
2022       IRBuilderBase::FastMathFlagGuard Guard(B);
2023       B.setFastMathFlags(CI->getFastMathFlags());
2024 
2025       return copyFlags(
2026           *CI, B.CreateUnaryIntrinsic(Intrinsic::fabs, AbsOp, nullptr, "cabs"));
2027     }
2028 
2029     if (!CI->isFast())
2030       return nullptr;
2031   }
2032 
2033   // Propagate fast-math flags from the existing call to new instructions.
2034   IRBuilderBase::FastMathFlagGuard Guard(B);
2035   B.setFastMathFlags(CI->getFastMathFlags());
2036 
2037   Value *RealReal = B.CreateFMul(Real, Real);
2038   Value *ImagImag = B.CreateFMul(Imag, Imag);
2039 
2040   return copyFlags(*CI, B.CreateUnaryIntrinsic(Intrinsic::sqrt,
2041                                                B.CreateFAdd(RealReal, ImagImag),
2042                                                nullptr, "cabs"));
2043 }
2044 
2045 // Return a properly extended integer (DstWidth bits wide) if the operation is
2046 // an itofp.
2047 static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
2048   if (isa<SIToFPInst>(I2F) || isa<UIToFPInst>(I2F)) {
2049     Value *Op = cast<Instruction>(I2F)->getOperand(0);
2050     // Make sure that the exponent fits inside an "int" of size DstWidth,
2051     // thus avoiding any range issues that FP has not.
2052     unsigned BitWidth = Op->getType()->getScalarSizeInBits();
2053     if (BitWidth < DstWidth || (BitWidth == DstWidth && isa<SIToFPInst>(I2F))) {
2054       Type *IntTy = Op->getType()->getWithNewBitWidth(DstWidth);
2055       return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, IntTy)
2056                                   : B.CreateZExt(Op, IntTy);
2057     }
2058   }
2059 
2060   return nullptr;
2061 }
2062 
2063 /// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
2064 /// ldexp(1.0, x) for pow(2.0, itofp(x)); exp2(n * x) for pow(2.0 ** n, x);
2065 /// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x).
2066 Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
2067   Module *M = Pow->getModule();
2068   Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
2069   Type *Ty = Pow->getType();
2070   bool Ignored;
2071 
2072   // Evaluate special cases related to a nested function as the base.
2073 
2074   // pow(exp(x), y) -> exp(x * y)
2075   // pow(exp2(x), y) -> exp2(x * y)
2076   // If exp{,2}() is used only once, it is better to fold two transcendental
2077   // math functions into one.  If used again, exp{,2}() would still have to be
2078   // called with the original argument, then keep both original transcendental
2079   // functions.  However, this transformation is only safe with fully relaxed
2080   // math semantics, since, besides rounding differences, it changes overflow
2081   // and underflow behavior quite dramatically.  For example:
2082   //   pow(exp(1000), 0.001) = pow(inf, 0.001) = inf
2083   // Whereas:
2084   //   exp(1000 * 0.001) = exp(1)
2085   // TODO: Loosen the requirement for fully relaxed math semantics.
2086   // TODO: Handle exp10() when more targets have it available.
2087   CallInst *BaseFn = dyn_cast<CallInst>(Base);
2088   if (BaseFn && BaseFn->hasOneUse() && BaseFn->isFast() && Pow->isFast()) {
2089     LibFunc LibFn;
2090 
2091     Function *CalleeFn = BaseFn->getCalledFunction();
2092     if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) &&
2093         isLibFuncEmittable(M, TLI, LibFn)) {
2094       StringRef ExpName;
2095       Intrinsic::ID ID;
2096       Value *ExpFn;
2097       LibFunc LibFnFloat, LibFnDouble, LibFnLongDouble;
2098 
2099       switch (LibFn) {
2100       default:
2101         return nullptr;
2102       case LibFunc_expf:
2103       case LibFunc_exp:
2104       case LibFunc_expl:
2105         ExpName = TLI->getName(LibFunc_exp);
2106         ID = Intrinsic::exp;
2107         LibFnFloat = LibFunc_expf;
2108         LibFnDouble = LibFunc_exp;
2109         LibFnLongDouble = LibFunc_expl;
2110         break;
2111       case LibFunc_exp2f:
2112       case LibFunc_exp2:
2113       case LibFunc_exp2l:
2114         ExpName = TLI->getName(LibFunc_exp2);
2115         ID = Intrinsic::exp2;
2116         LibFnFloat = LibFunc_exp2f;
2117         LibFnDouble = LibFunc_exp2;
2118         LibFnLongDouble = LibFunc_exp2l;
2119         break;
2120       }
2121 
2122       // Create new exp{,2}() with the product as its argument.
2123       Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul");
2124       ExpFn = BaseFn->doesNotAccessMemory()
2125                   ? B.CreateUnaryIntrinsic(ID, FMul, nullptr, ExpName)
2126                   : emitUnaryFloatFnCall(FMul, TLI, LibFnDouble, LibFnFloat,
2127                                          LibFnLongDouble, B,
2128                                          BaseFn->getAttributes());
2129 
2130       // Since the new exp{,2}() is different from the original one, dead code
2131       // elimination cannot be trusted to remove it, since it may have side
2132       // effects (e.g., errno).  When the only consumer for the original
2133       // exp{,2}() is pow(), then it has to be explicitly erased.
2134       substituteInParent(BaseFn, ExpFn);
2135       return ExpFn;
2136     }
2137   }
2138 
2139   // Evaluate special cases related to a constant base.
2140 
2141   const APFloat *BaseF;
2142   if (!match(Base, m_APFloat(BaseF)))
2143     return nullptr;
2144 
2145   AttributeList NoAttrs; // Attributes are only meaningful on the original call
2146 
2147   const bool UseIntrinsic = Pow->doesNotAccessMemory();
2148 
2149   // pow(2.0, itofp(x)) -> ldexp(1.0, x)
2150   if ((UseIntrinsic || !Ty->isVectorTy()) && BaseF->isExactlyValue(2.0) &&
2151       (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) &&
2152       (UseIntrinsic ||
2153        hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl))) {
2154 
2155     // TODO: Shouldn't really need to depend on getIntToFPVal for intrinsic. Can
2156     // just directly use the original integer type.
2157     if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize())) {
2158       Constant *One = ConstantFP::get(Ty, 1.0);
2159 
2160       if (UseIntrinsic) {
2161         return copyFlags(*Pow, B.CreateIntrinsic(Intrinsic::ldexp,
2162                                                  {Ty, ExpoI->getType()},
2163                                                  {One, ExpoI}, Pow, "exp2"));
2164       }
2165 
2166       return copyFlags(*Pow, emitBinaryFloatFnCall(
2167                                  One, ExpoI, TLI, LibFunc_ldexp, LibFunc_ldexpf,
2168                                  LibFunc_ldexpl, B, NoAttrs));
2169     }
2170   }
2171 
2172   // pow(2.0 ** n, x) -> exp2(n * x)
2173   if (hasFloatFn(M, TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
2174     APFloat BaseR = APFloat(1.0);
2175     BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
2176     BaseR = BaseR / *BaseF;
2177     bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger();
2178     const APFloat *NF = IsReciprocal ? &BaseR : BaseF;
2179     APSInt NI(64, false);
2180     if ((IsInteger || IsReciprocal) &&
2181         NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) ==
2182             APFloat::opOK &&
2183         NI > 1 && NI.isPowerOf2()) {
2184       double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
2185       Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
2186       if (Pow->doesNotAccessMemory())
2187         return copyFlags(*Pow, B.CreateUnaryIntrinsic(Intrinsic::exp2, FMul,
2188                                                       nullptr, "exp2"));
2189       else
2190         return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2,
2191                                                     LibFunc_exp2f,
2192                                                     LibFunc_exp2l, B, NoAttrs));
2193     }
2194   }
2195 
2196   // pow(10.0, x) -> exp10(x)
2197   if (BaseF->isExactlyValue(10.0) &&
2198       hasFloatFn(M, TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) {
2199 
2200     if (Pow->doesNotAccessMemory()) {
2201       CallInst *NewExp10 =
2202           B.CreateIntrinsic(Intrinsic::exp10, {Ty}, {Expo}, Pow, "exp10");
2203       return copyFlags(*Pow, NewExp10);
2204     }
2205 
2206     return copyFlags(*Pow, emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10,
2207                                                 LibFunc_exp10f, LibFunc_exp10l,
2208                                                 B, NoAttrs));
2209   }
2210 
2211   // pow(x, y) -> exp2(log2(x) * y)
2212   if (Pow->hasApproxFunc() && Pow->hasNoNaNs() && BaseF->isFiniteNonZero() &&
2213       !BaseF->isNegative()) {
2214     // pow(1, inf) is defined to be 1 but exp2(log2(1) * inf) evaluates to NaN.
2215     // Luckily optimizePow has already handled the x == 1 case.
2216     assert(!match(Base, m_FPOne()) &&
2217            "pow(1.0, y) should have been simplified earlier!");
2218 
2219     Value *Log = nullptr;
2220     if (Ty->isFloatTy())
2221       Log = ConstantFP::get(Ty, std::log2(BaseF->convertToFloat()));
2222     else if (Ty->isDoubleTy())
2223       Log = ConstantFP::get(Ty, std::log2(BaseF->convertToDouble()));
2224 
2225     if (Log) {
2226       Value *FMul = B.CreateFMul(Log, Expo, "mul");
2227       if (Pow->doesNotAccessMemory())
2228         return copyFlags(*Pow, B.CreateUnaryIntrinsic(Intrinsic::exp2, FMul,
2229                                                       nullptr, "exp2"));
2230       else if (hasFloatFn(M, TLI, Ty, LibFunc_exp2, LibFunc_exp2f,
2231                           LibFunc_exp2l))
2232         return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2,
2233                                                     LibFunc_exp2f,
2234                                                     LibFunc_exp2l, B, NoAttrs));
2235     }
2236   }
2237 
2238   return nullptr;
2239 }
2240 
2241 static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
2242                           Module *M, IRBuilderBase &B,
2243                           const TargetLibraryInfo *TLI) {
2244   // If errno is never set, then use the intrinsic for sqrt().
2245   if (NoErrno)
2246     return B.CreateUnaryIntrinsic(Intrinsic::sqrt, V, nullptr, "sqrt");
2247 
2248   // Otherwise, use the libcall for sqrt().
2249   if (hasFloatFn(M, TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf,
2250                  LibFunc_sqrtl))
2251     // TODO: We also should check that the target can in fact lower the sqrt()
2252     // libcall. We currently have no way to ask this question, so we ask if
2253     // the target has a sqrt() libcall, which is not exactly the same.
2254     return emitUnaryFloatFnCall(V, TLI, LibFunc_sqrt, LibFunc_sqrtf,
2255                                 LibFunc_sqrtl, B, Attrs);
2256 
2257   return nullptr;
2258 }
2259 
2260 /// Use square root in place of pow(x, +/-0.5).
2261 Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) {
2262   Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
2263   Module *Mod = Pow->getModule();
2264   Type *Ty = Pow->getType();
2265 
2266   const APFloat *ExpoF;
2267   if (!match(Expo, m_APFloat(ExpoF)) ||
2268       (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
2269     return nullptr;
2270 
2271   // Converting pow(X, -0.5) to 1/sqrt(X) may introduce an extra rounding step,
2272   // so that requires fast-math-flags (afn or reassoc).
2273   if (ExpoF->isNegative() && (!Pow->hasApproxFunc() && !Pow->hasAllowReassoc()))
2274     return nullptr;
2275 
2276   // If we have a pow() library call (accesses memory) and we can't guarantee
2277   // that the base is not an infinity, give up:
2278   // pow(-Inf, 0.5) is optionally required to have a result of +Inf (not setting
2279   // errno), but sqrt(-Inf) is required by various standards to set errno.
2280   if (!Pow->doesNotAccessMemory() && !Pow->hasNoInfs() &&
2281       !isKnownNeverInfinity(
2282           Base, 0, SimplifyQuery(DL, TLI, DT, AC, Pow, true, true, DC)))
2283     return nullptr;
2284 
2285   Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), Mod, B,
2286                      TLI);
2287   if (!Sqrt)
2288     return nullptr;
2289 
2290   // Handle signed zero base by expanding to fabs(sqrt(x)).
2291   if (!Pow->hasNoSignedZeros())
2292     Sqrt = B.CreateUnaryIntrinsic(Intrinsic::fabs, Sqrt, nullptr, "abs");
2293 
2294   Sqrt = copyFlags(*Pow, Sqrt);
2295 
2296   // Handle non finite base by expanding to
2297   // (x == -infinity ? +infinity : sqrt(x)).
2298   if (!Pow->hasNoInfs()) {
2299     Value *PosInf = ConstantFP::getInfinity(Ty),
2300           *NegInf = ConstantFP::getInfinity(Ty, true);
2301     Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf");
2302     Sqrt = B.CreateSelect(FCmp, PosInf, Sqrt);
2303   }
2304 
2305   // If the exponent is negative, then get the reciprocal.
2306   if (ExpoF->isNegative())
2307     Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal");
2308 
2309   return Sqrt;
2310 }
2311 
2312 static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M,
2313                                            IRBuilderBase &B) {
2314   Value *Args[] = {Base, Expo};
2315   Type *Types[] = {Base->getType(), Expo->getType()};
2316   return B.CreateIntrinsic(Intrinsic::powi, Types, Args);
2317 }
2318 
2319 Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
2320   Value *Base = Pow->getArgOperand(0);
2321   Value *Expo = Pow->getArgOperand(1);
2322   Function *Callee = Pow->getCalledFunction();
2323   StringRef Name = Callee->getName();
2324   Type *Ty = Pow->getType();
2325   Module *M = Pow->getModule();
2326   bool AllowApprox = Pow->hasApproxFunc();
2327   bool Ignored;
2328 
2329   // Propagate the math semantics from the call to any created instructions.
2330   IRBuilderBase::FastMathFlagGuard Guard(B);
2331   B.setFastMathFlags(Pow->getFastMathFlags());
2332   // Evaluate special cases related to the base.
2333 
2334   // pow(1.0, x) -> 1.0
2335   if (match(Base, m_FPOne()))
2336     return Base;
2337 
2338   if (Value *Exp = replacePowWithExp(Pow, B))
2339     return Exp;
2340 
2341   // Evaluate special cases related to the exponent.
2342 
2343   // pow(x, -1.0) -> 1.0 / x
2344   if (match(Expo, m_SpecificFP(-1.0)))
2345     return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal");
2346 
2347   // pow(x, +/-0.0) -> 1.0
2348   if (match(Expo, m_AnyZeroFP()))
2349     return ConstantFP::get(Ty, 1.0);
2350 
2351   // pow(x, 1.0) -> x
2352   if (match(Expo, m_FPOne()))
2353     return Base;
2354 
2355   // pow(x, 2.0) -> x * x
2356   if (match(Expo, m_SpecificFP(2.0)))
2357     return B.CreateFMul(Base, Base, "square");
2358 
2359   if (Value *Sqrt = replacePowWithSqrt(Pow, B))
2360     return Sqrt;
2361 
2362   // If we can approximate pow:
2363   // pow(x, n) -> powi(x, n) * sqrt(x) if n has exactly a 0.5 fraction
2364   // pow(x, n) -> powi(x, n) if n is a constant signed integer value
2365   const APFloat *ExpoF;
2366   if (AllowApprox && match(Expo, m_APFloat(ExpoF)) &&
2367       !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) {
2368     APFloat ExpoA(abs(*ExpoF));
2369     APFloat ExpoI(*ExpoF);
2370     Value *Sqrt = nullptr;
2371     if (!ExpoA.isInteger()) {
2372       APFloat Expo2 = ExpoA;
2373       // To check if ExpoA is an integer + 0.5, we add it to itself. If there
2374       // is no floating point exception and the result is an integer, then
2375       // ExpoA == integer + 0.5
2376       if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK)
2377         return nullptr;
2378 
2379       if (!Expo2.isInteger())
2380         return nullptr;
2381 
2382       if (ExpoI.roundToIntegral(APFloat::rmTowardNegative) !=
2383           APFloat::opInexact)
2384         return nullptr;
2385       if (!ExpoI.isInteger())
2386         return nullptr;
2387       ExpoF = &ExpoI;
2388 
2389       Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), M,
2390                          B, TLI);
2391       if (!Sqrt)
2392         return nullptr;
2393     }
2394 
2395     // 0.5 fraction is now optionally handled.
2396     // Do pow -> powi for remaining integer exponent
2397     APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false);
2398     if (ExpoF->isInteger() &&
2399         ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
2400             APFloat::opOK) {
2401       Value *PowI = copyFlags(
2402           *Pow,
2403           createPowWithIntegerExponent(
2404               Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo),
2405               M, B));
2406 
2407       if (PowI && Sqrt)
2408         return B.CreateFMul(PowI, Sqrt);
2409 
2410       return PowI;
2411     }
2412   }
2413 
2414   // powf(x, itofp(y)) -> powi(x, y)
2415   if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
2416     if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
2417       return copyFlags(*Pow, createPowWithIntegerExponent(Base, ExpoI, M, B));
2418   }
2419 
2420   // Shrink pow() to powf() if the arguments are single precision,
2421   // unless the result is expected to be double precision.
2422   if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) &&
2423       hasFloatVersion(M, Name)) {
2424     if (Value *Shrunk = optimizeBinaryDoubleFP(Pow, B, TLI, true))
2425       return Shrunk;
2426   }
2427 
2428   return nullptr;
2429 }
2430 
2431 Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
2432   Module *M = CI->getModule();
2433   Function *Callee = CI->getCalledFunction();
2434   StringRef Name = Callee->getName();
2435   Value *Ret = nullptr;
2436   if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) &&
2437       hasFloatVersion(M, Name))
2438     Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
2439 
2440   // If we have an llvm.exp2 intrinsic, emit the llvm.ldexp intrinsic. If we
2441   // have the libcall, emit the libcall.
2442   //
2443   // TODO: In principle we should be able to just always use the intrinsic for
2444   // any doesNotAccessMemory callsite.
2445 
2446   const bool UseIntrinsic = Callee->isIntrinsic();
2447   // Bail out for vectors because the code below only expects scalars.
2448   Type *Ty = CI->getType();
2449   if (!UseIntrinsic && Ty->isVectorTy())
2450     return Ret;
2451 
2452   // exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= IntSize
2453   // exp2(uitofp(x)) -> ldexp(1.0, zext(x))  if sizeof(x) < IntSize
2454   Value *Op = CI->getArgOperand(0);
2455   if ((isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op)) &&
2456       (UseIntrinsic ||
2457        hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl))) {
2458     if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize())) {
2459       Constant *One = ConstantFP::get(Ty, 1.0);
2460 
2461       if (UseIntrinsic) {
2462         return copyFlags(*CI, B.CreateIntrinsic(Intrinsic::ldexp,
2463                                                 {Ty, Exp->getType()},
2464                                                 {One, Exp}, CI));
2465       }
2466 
2467       IRBuilderBase::FastMathFlagGuard Guard(B);
2468       B.setFastMathFlags(CI->getFastMathFlags());
2469       return copyFlags(*CI, emitBinaryFloatFnCall(
2470                                 One, Exp, TLI, LibFunc_ldexp, LibFunc_ldexpf,
2471                                 LibFunc_ldexpl, B, AttributeList()));
2472     }
2473   }
2474 
2475   return Ret;
2476 }
2477 
2478 Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) {
2479   Module *M = CI->getModule();
2480 
2481   // If we can shrink the call to a float function rather than a double
2482   // function, do that first.
2483   Function *Callee = CI->getCalledFunction();
2484   StringRef Name = Callee->getName();
2485   if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(M, Name))
2486     if (Value *Ret = optimizeBinaryDoubleFP(CI, B, TLI))
2487       return Ret;
2488 
2489   // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to
2490   // the intrinsics for improved optimization (for example, vectorization).
2491   // No-signed-zeros is implied by the definitions of fmax/fmin themselves.
2492   // From the C standard draft WG14/N1256:
2493   // "Ideally, fmax would be sensitive to the sign of zero, for example
2494   // fmax(-0.0, +0.0) would return +0; however, implementation in software
2495   // might be impractical."
2496   IRBuilderBase::FastMathFlagGuard Guard(B);
2497   FastMathFlags FMF = CI->getFastMathFlags();
2498   FMF.setNoSignedZeros();
2499   B.setFastMathFlags(FMF);
2500 
2501   Intrinsic::ID IID = Callee->getName().starts_with("fmin") ? Intrinsic::minnum
2502                                                             : Intrinsic::maxnum;
2503   return copyFlags(*CI, B.CreateBinaryIntrinsic(IID, CI->getArgOperand(0),
2504                                                 CI->getArgOperand(1)));
2505 }
2506 
2507 Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
2508   Function *LogFn = Log->getCalledFunction();
2509   StringRef LogNm = LogFn->getName();
2510   Intrinsic::ID LogID = LogFn->getIntrinsicID();
2511   Module *Mod = Log->getModule();
2512   Type *Ty = Log->getType();
2513   Value *Ret = nullptr;
2514 
2515   if (UnsafeFPShrink && hasFloatVersion(Mod, LogNm))
2516     Ret = optimizeUnaryDoubleFP(Log, B, TLI, true);
2517 
2518   // The earlier call must also be 'fast' in order to do these transforms.
2519   CallInst *Arg = dyn_cast<CallInst>(Log->getArgOperand(0));
2520   if (!Log->isFast() || !Arg || !Arg->isFast() || !Arg->hasOneUse())
2521     return Ret;
2522 
2523   LibFunc LogLb, ExpLb, Exp2Lb, Exp10Lb, PowLb;
2524 
2525   // This is only applicable to log(), log2(), log10().
2526   if (TLI->getLibFunc(LogNm, LogLb))
2527     switch (LogLb) {
2528     case LibFunc_logf:
2529       LogID = Intrinsic::log;
2530       ExpLb = LibFunc_expf;
2531       Exp2Lb = LibFunc_exp2f;
2532       Exp10Lb = LibFunc_exp10f;
2533       PowLb = LibFunc_powf;
2534       break;
2535     case LibFunc_log:
2536       LogID = Intrinsic::log;
2537       ExpLb = LibFunc_exp;
2538       Exp2Lb = LibFunc_exp2;
2539       Exp10Lb = LibFunc_exp10;
2540       PowLb = LibFunc_pow;
2541       break;
2542     case LibFunc_logl:
2543       LogID = Intrinsic::log;
2544       ExpLb = LibFunc_expl;
2545       Exp2Lb = LibFunc_exp2l;
2546       Exp10Lb = LibFunc_exp10l;
2547       PowLb = LibFunc_powl;
2548       break;
2549     case LibFunc_log2f:
2550       LogID = Intrinsic::log2;
2551       ExpLb = LibFunc_expf;
2552       Exp2Lb = LibFunc_exp2f;
2553       Exp10Lb = LibFunc_exp10f;
2554       PowLb = LibFunc_powf;
2555       break;
2556     case LibFunc_log2:
2557       LogID = Intrinsic::log2;
2558       ExpLb = LibFunc_exp;
2559       Exp2Lb = LibFunc_exp2;
2560       Exp10Lb = LibFunc_exp10;
2561       PowLb = LibFunc_pow;
2562       break;
2563     case LibFunc_log2l:
2564       LogID = Intrinsic::log2;
2565       ExpLb = LibFunc_expl;
2566       Exp2Lb = LibFunc_exp2l;
2567       Exp10Lb = LibFunc_exp10l;
2568       PowLb = LibFunc_powl;
2569       break;
2570     case LibFunc_log10f:
2571       LogID = Intrinsic::log10;
2572       ExpLb = LibFunc_expf;
2573       Exp2Lb = LibFunc_exp2f;
2574       Exp10Lb = LibFunc_exp10f;
2575       PowLb = LibFunc_powf;
2576       break;
2577     case LibFunc_log10:
2578       LogID = Intrinsic::log10;
2579       ExpLb = LibFunc_exp;
2580       Exp2Lb = LibFunc_exp2;
2581       Exp10Lb = LibFunc_exp10;
2582       PowLb = LibFunc_pow;
2583       break;
2584     case LibFunc_log10l:
2585       LogID = Intrinsic::log10;
2586       ExpLb = LibFunc_expl;
2587       Exp2Lb = LibFunc_exp2l;
2588       Exp10Lb = LibFunc_exp10l;
2589       PowLb = LibFunc_powl;
2590       break;
2591     default:
2592       return Ret;
2593     }
2594   else if (LogID == Intrinsic::log || LogID == Intrinsic::log2 ||
2595            LogID == Intrinsic::log10) {
2596     if (Ty->getScalarType()->isFloatTy()) {
2597       ExpLb = LibFunc_expf;
2598       Exp2Lb = LibFunc_exp2f;
2599       Exp10Lb = LibFunc_exp10f;
2600       PowLb = LibFunc_powf;
2601     } else if (Ty->getScalarType()->isDoubleTy()) {
2602       ExpLb = LibFunc_exp;
2603       Exp2Lb = LibFunc_exp2;
2604       Exp10Lb = LibFunc_exp10;
2605       PowLb = LibFunc_pow;
2606     } else
2607       return Ret;
2608   } else
2609     return Ret;
2610 
2611   IRBuilderBase::FastMathFlagGuard Guard(B);
2612   B.setFastMathFlags(FastMathFlags::getFast());
2613 
2614   Intrinsic::ID ArgID = Arg->getIntrinsicID();
2615   LibFunc ArgLb = NotLibFunc;
2616   TLI->getLibFunc(*Arg, ArgLb);
2617 
2618   // log(pow(x,y)) -> y*log(x)
2619   AttributeList NoAttrs;
2620   if (ArgLb == PowLb || ArgID == Intrinsic::pow || ArgID == Intrinsic::powi) {
2621     Value *LogX =
2622         Log->doesNotAccessMemory()
2623             ? B.CreateUnaryIntrinsic(LogID, Arg->getOperand(0), nullptr, "log")
2624             : emitUnaryFloatFnCall(Arg->getOperand(0), TLI, LogNm, B, NoAttrs);
2625     Value *Y = Arg->getArgOperand(1);
2626     // Cast exponent to FP if integer.
2627     if (ArgID == Intrinsic::powi)
2628       Y = B.CreateSIToFP(Y, Ty, "cast");
2629     Value *MulY = B.CreateFMul(Y, LogX, "mul");
2630     // Since pow() may have side effects, e.g. errno,
2631     // dead code elimination may not be trusted to remove it.
2632     substituteInParent(Arg, MulY);
2633     return MulY;
2634   }
2635 
2636   // log(exp{,2,10}(y)) -> y*log({e,2,10})
2637   // TODO: There is no exp10() intrinsic yet.
2638   if (ArgLb == ExpLb || ArgLb == Exp2Lb || ArgLb == Exp10Lb ||
2639            ArgID == Intrinsic::exp || ArgID == Intrinsic::exp2) {
2640     Constant *Eul;
2641     if (ArgLb == ExpLb || ArgID == Intrinsic::exp)
2642       // FIXME: Add more precise value of e for long double.
2643       Eul = ConstantFP::get(Log->getType(), numbers::e);
2644     else if (ArgLb == Exp2Lb || ArgID == Intrinsic::exp2)
2645       Eul = ConstantFP::get(Log->getType(), 2.0);
2646     else
2647       Eul = ConstantFP::get(Log->getType(), 10.0);
2648     Value *LogE = Log->doesNotAccessMemory()
2649                       ? B.CreateUnaryIntrinsic(LogID, Eul, nullptr, "log")
2650                       : emitUnaryFloatFnCall(Eul, TLI, LogNm, B, NoAttrs);
2651     Value *MulY = B.CreateFMul(Arg->getArgOperand(0), LogE, "mul");
2652     // Since exp() may have side effects, e.g. errno,
2653     // dead code elimination may not be trusted to remove it.
2654     substituteInParent(Arg, MulY);
2655     return MulY;
2656   }
2657 
2658   return Ret;
2659 }
2660 
2661 // sqrt(exp(X)) -> exp(X * 0.5)
2662 Value *LibCallSimplifier::mergeSqrtToExp(CallInst *CI, IRBuilderBase &B) {
2663   if (!CI->hasAllowReassoc())
2664     return nullptr;
2665 
2666   Function *SqrtFn = CI->getCalledFunction();
2667   CallInst *Arg = dyn_cast<CallInst>(CI->getArgOperand(0));
2668   if (!Arg || !Arg->hasAllowReassoc() || !Arg->hasOneUse())
2669     return nullptr;
2670   Intrinsic::ID ArgID = Arg->getIntrinsicID();
2671   LibFunc ArgLb = NotLibFunc;
2672   TLI->getLibFunc(*Arg, ArgLb);
2673 
2674   LibFunc SqrtLb, ExpLb, Exp2Lb, Exp10Lb;
2675 
2676   if (TLI->getLibFunc(SqrtFn->getName(), SqrtLb))
2677     switch (SqrtLb) {
2678     case LibFunc_sqrtf:
2679       ExpLb = LibFunc_expf;
2680       Exp2Lb = LibFunc_exp2f;
2681       Exp10Lb = LibFunc_exp10f;
2682       break;
2683     case LibFunc_sqrt:
2684       ExpLb = LibFunc_exp;
2685       Exp2Lb = LibFunc_exp2;
2686       Exp10Lb = LibFunc_exp10;
2687       break;
2688     case LibFunc_sqrtl:
2689       ExpLb = LibFunc_expl;
2690       Exp2Lb = LibFunc_exp2l;
2691       Exp10Lb = LibFunc_exp10l;
2692       break;
2693     default:
2694       return nullptr;
2695     }
2696   else if (SqrtFn->getIntrinsicID() == Intrinsic::sqrt) {
2697     if (CI->getType()->getScalarType()->isFloatTy()) {
2698       ExpLb = LibFunc_expf;
2699       Exp2Lb = LibFunc_exp2f;
2700       Exp10Lb = LibFunc_exp10f;
2701     } else if (CI->getType()->getScalarType()->isDoubleTy()) {
2702       ExpLb = LibFunc_exp;
2703       Exp2Lb = LibFunc_exp2;
2704       Exp10Lb = LibFunc_exp10;
2705     } else
2706       return nullptr;
2707   } else
2708     return nullptr;
2709 
2710   if (ArgLb != ExpLb && ArgLb != Exp2Lb && ArgLb != Exp10Lb &&
2711       ArgID != Intrinsic::exp && ArgID != Intrinsic::exp2)
2712     return nullptr;
2713 
2714   IRBuilderBase::InsertPointGuard Guard(B);
2715   B.SetInsertPoint(Arg);
2716   auto *ExpOperand = Arg->getOperand(0);
2717   auto *FMul =
2718       B.CreateFMulFMF(ExpOperand, ConstantFP::get(ExpOperand->getType(), 0.5),
2719                       CI, "merged.sqrt");
2720 
2721   Arg->setOperand(0, FMul);
2722   return Arg;
2723 }
2724 
2725 Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
2726   Module *M = CI->getModule();
2727   Function *Callee = CI->getCalledFunction();
2728   Value *Ret = nullptr;
2729   // TODO: Once we have a way (other than checking for the existince of the
2730   // libcall) to tell whether our target can lower @llvm.sqrt, relax the
2731   // condition below.
2732   if (isLibFuncEmittable(M, TLI, LibFunc_sqrtf) &&
2733       (Callee->getName() == "sqrt" ||
2734        Callee->getIntrinsicID() == Intrinsic::sqrt))
2735     Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
2736 
2737   if (Value *Opt = mergeSqrtToExp(CI, B))
2738     return Opt;
2739 
2740   if (!CI->isFast())
2741     return Ret;
2742 
2743   Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0));
2744   if (!I || I->getOpcode() != Instruction::FMul || !I->isFast())
2745     return Ret;
2746 
2747   // We're looking for a repeated factor in a multiplication tree,
2748   // so we can do this fold: sqrt(x * x) -> fabs(x);
2749   // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y).
2750   Value *Op0 = I->getOperand(0);
2751   Value *Op1 = I->getOperand(1);
2752   Value *RepeatOp = nullptr;
2753   Value *OtherOp = nullptr;
2754   if (Op0 == Op1) {
2755     // Simple match: the operands of the multiply are identical.
2756     RepeatOp = Op0;
2757   } else {
2758     // Look for a more complicated pattern: one of the operands is itself
2759     // a multiply, so search for a common factor in that multiply.
2760     // Note: We don't bother looking any deeper than this first level or for
2761     // variations of this pattern because instcombine's visitFMUL and/or the
2762     // reassociation pass should give us this form.
2763     Value *MulOp;
2764     if (match(Op0, m_FMul(m_Value(MulOp), m_Deferred(MulOp))) &&
2765         cast<Instruction>(Op0)->isFast()) {
2766       // Pattern: sqrt((x * x) * z)
2767       RepeatOp = MulOp;
2768       OtherOp = Op1;
2769     } else if (match(Op1, m_FMul(m_Value(MulOp), m_Deferred(MulOp))) &&
2770                cast<Instruction>(Op1)->isFast()) {
2771       // Pattern: sqrt(z * (x * x))
2772       RepeatOp = MulOp;
2773       OtherOp = Op0;
2774     }
2775   }
2776   if (!RepeatOp)
2777     return Ret;
2778 
2779   // Fast math flags for any created instructions should match the sqrt
2780   // and multiply.
2781   IRBuilderBase::FastMathFlagGuard Guard(B);
2782   B.setFastMathFlags(I->getFastMathFlags());
2783 
2784   // If we found a repeated factor, hoist it out of the square root and
2785   // replace it with the fabs of that factor.
2786   Value *FabsCall =
2787       B.CreateUnaryIntrinsic(Intrinsic::fabs, RepeatOp, nullptr, "fabs");
2788   if (OtherOp) {
2789     // If we found a non-repeated factor, we still need to get its square
2790     // root. We then multiply that by the value that was simplified out
2791     // of the square root calculation.
2792     Value *SqrtCall =
2793         B.CreateUnaryIntrinsic(Intrinsic::sqrt, OtherOp, nullptr, "sqrt");
2794     return copyFlags(*CI, B.CreateFMul(FabsCall, SqrtCall));
2795   }
2796   return copyFlags(*CI, FabsCall);
2797 }
2798 
2799 Value *LibCallSimplifier::optimizeFMod(CallInst *CI, IRBuilderBase &B) {
2800   SimplifyQuery SQ(DL, TLI, DT, AC, CI, true, true, DC);
2801 
2802   // fmod(x,y) can set errno if y == 0 or x == +/-inf, and returns Nan in those
2803   // case. If we know those do not happen, then we can convert the fmod into
2804   // frem.
2805   bool IsNoNan = CI->hasNoNaNs();
2806   if (!IsNoNan) {
2807     KnownFPClass Known0 = computeKnownFPClass(CI->getOperand(0), fcInf,
2808                                               /*Depth=*/0, SQ);
2809     if (Known0.isKnownNeverInfinity()) {
2810       KnownFPClass Known1 =
2811           computeKnownFPClass(CI->getOperand(1), fcZero | fcSubnormal,
2812                               /*Depth=*/0, SQ);
2813       Function *F = CI->getParent()->getParent();
2814       if (Known1.isKnownNeverLogicalZero(*F, CI->getType()))
2815         IsNoNan = true;
2816     }
2817   }
2818 
2819   if (IsNoNan) {
2820     Value *FRem = B.CreateFRemFMF(CI->getOperand(0), CI->getOperand(1), CI);
2821     if (auto *FRemI = dyn_cast<Instruction>(FRem))
2822       FRemI->setHasNoNaNs(true);
2823     return FRem;
2824   }
2825   return nullptr;
2826 }
2827 
2828 Value *LibCallSimplifier::optimizeTrigInversionPairs(CallInst *CI,
2829                                                      IRBuilderBase &B) {
2830   Module *M = CI->getModule();
2831   Function *Callee = CI->getCalledFunction();
2832   Value *Ret = nullptr;
2833   StringRef Name = Callee->getName();
2834   if (UnsafeFPShrink &&
2835       (Name == "tan" || Name == "atanh" || Name == "sinh" || Name == "cosh" ||
2836        Name == "asinh") &&
2837       hasFloatVersion(M, Name))
2838     Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
2839 
2840   Value *Op1 = CI->getArgOperand(0);
2841   auto *OpC = dyn_cast<CallInst>(Op1);
2842   if (!OpC)
2843     return Ret;
2844 
2845   // Both calls must be 'fast' in order to remove them.
2846   if (!CI->isFast() || !OpC->isFast())
2847     return Ret;
2848 
2849   // tan(atan(x)) -> x
2850   // atanh(tanh(x)) -> x
2851   // sinh(asinh(x)) -> x
2852   // asinh(sinh(x)) -> x
2853   // cosh(acosh(x)) -> x
2854   LibFunc Func;
2855   Function *F = OpC->getCalledFunction();
2856   if (F && TLI->getLibFunc(F->getName(), Func) &&
2857       isLibFuncEmittable(M, TLI, Func)) {
2858     LibFunc inverseFunc = llvm::StringSwitch<LibFunc>(Callee->getName())
2859                               .Case("tan", LibFunc_atan)
2860                               .Case("atanh", LibFunc_tanh)
2861                               .Case("sinh", LibFunc_asinh)
2862                               .Case("cosh", LibFunc_acosh)
2863                               .Case("tanf", LibFunc_atanf)
2864                               .Case("atanhf", LibFunc_tanhf)
2865                               .Case("sinhf", LibFunc_asinhf)
2866                               .Case("coshf", LibFunc_acoshf)
2867                               .Case("tanl", LibFunc_atanl)
2868                               .Case("atanhl", LibFunc_tanhl)
2869                               .Case("sinhl", LibFunc_asinhl)
2870                               .Case("coshl", LibFunc_acoshl)
2871                               .Case("asinh", LibFunc_sinh)
2872                               .Case("asinhf", LibFunc_sinhf)
2873                               .Case("asinhl", LibFunc_sinhl)
2874                               .Default(NumLibFuncs); // Used as error value
2875     if (Func == inverseFunc)
2876       Ret = OpC->getArgOperand(0);
2877   }
2878   return Ret;
2879 }
2880 
2881 static bool isTrigLibCall(CallInst *CI) {
2882   // We can only hope to do anything useful if we can ignore things like errno
2883   // and floating-point exceptions.
2884   // We already checked the prototype.
2885   return CI->doesNotThrow() && CI->doesNotAccessMemory();
2886 }
2887 
2888 static bool insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg,
2889                              bool UseFloat, Value *&Sin, Value *&Cos,
2890                              Value *&SinCos, const TargetLibraryInfo *TLI) {
2891   Module *M = OrigCallee->getParent();
2892   Type *ArgTy = Arg->getType();
2893   Type *ResTy;
2894   StringRef Name;
2895 
2896   Triple T(OrigCallee->getParent()->getTargetTriple());
2897   if (UseFloat) {
2898     Name = "__sincospif_stret";
2899 
2900     assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
2901     // x86_64 can't use {float, float} since that would be returned in both
2902     // xmm0 and xmm1, which isn't what a real struct would do.
2903     ResTy = T.getArch() == Triple::x86_64
2904                 ? static_cast<Type *>(FixedVectorType::get(ArgTy, 2))
2905                 : static_cast<Type *>(StructType::get(ArgTy, ArgTy));
2906   } else {
2907     Name = "__sincospi_stret";
2908     ResTy = StructType::get(ArgTy, ArgTy);
2909   }
2910 
2911   if (!isLibFuncEmittable(M, TLI, Name))
2912     return false;
2913   LibFunc TheLibFunc;
2914   TLI->getLibFunc(Name, TheLibFunc);
2915   FunctionCallee Callee = getOrInsertLibFunc(
2916       M, *TLI, TheLibFunc, OrigCallee->getAttributes(), ResTy, ArgTy);
2917 
2918   if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
2919     // If the argument is an instruction, it must dominate all uses so put our
2920     // sincos call there.
2921     B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
2922   } else {
2923     // Otherwise (e.g. for a constant) the beginning of the function is as
2924     // good a place as any.
2925     BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
2926     B.SetInsertPoint(&EntryBB, EntryBB.begin());
2927   }
2928 
2929   SinCos = B.CreateCall(Callee, Arg, "sincospi");
2930 
2931   if (SinCos->getType()->isStructTy()) {
2932     Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
2933     Cos = B.CreateExtractValue(SinCos, 1, "cospi");
2934   } else {
2935     Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
2936                                  "sinpi");
2937     Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
2938                                  "cospi");
2939   }
2940 
2941   return true;
2942 }
2943 
2944 static Value *optimizeSymmetricCall(CallInst *CI, bool IsEven,
2945                                     IRBuilderBase &B) {
2946   Value *X;
2947   Value *Src = CI->getArgOperand(0);
2948 
2949   if (match(Src, m_OneUse(m_FNeg(m_Value(X))))) {
2950     IRBuilderBase::FastMathFlagGuard Guard(B);
2951     B.setFastMathFlags(CI->getFastMathFlags());
2952 
2953     auto *CallInst = copyFlags(*CI, B.CreateCall(CI->getCalledFunction(), {X}));
2954     if (IsEven) {
2955       // Even function: f(-x) = f(x)
2956       return CallInst;
2957     }
2958     // Odd function: f(-x) = -f(x)
2959     return B.CreateFNeg(CallInst);
2960   }
2961 
2962   // Even function: f(abs(x)) = f(x), f(copysign(x, y)) = f(x)
2963   if (IsEven && (match(Src, m_FAbs(m_Value(X))) ||
2964                  match(Src, m_CopySign(m_Value(X), m_Value())))) {
2965     IRBuilderBase::FastMathFlagGuard Guard(B);
2966     B.setFastMathFlags(CI->getFastMathFlags());
2967 
2968     auto *CallInst = copyFlags(*CI, B.CreateCall(CI->getCalledFunction(), {X}));
2969     return CallInst;
2970   }
2971 
2972   return nullptr;
2973 }
2974 
2975 Value *LibCallSimplifier::optimizeSymmetric(CallInst *CI, LibFunc Func,
2976                                             IRBuilderBase &B) {
2977   switch (Func) {
2978   case LibFunc_cos:
2979   case LibFunc_cosf:
2980   case LibFunc_cosl:
2981     return optimizeSymmetricCall(CI, /*IsEven*/ true, B);
2982 
2983   case LibFunc_sin:
2984   case LibFunc_sinf:
2985   case LibFunc_sinl:
2986 
2987   case LibFunc_tan:
2988   case LibFunc_tanf:
2989   case LibFunc_tanl:
2990 
2991   case LibFunc_erf:
2992   case LibFunc_erff:
2993   case LibFunc_erfl:
2994     return optimizeSymmetricCall(CI, /*IsEven*/ false, B);
2995 
2996   default:
2997     return nullptr;
2998   }
2999 }
3000 
3001 Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBase &B) {
3002   // Make sure the prototype is as expected, otherwise the rest of the
3003   // function is probably invalid and likely to abort.
3004   if (!isTrigLibCall(CI))
3005     return nullptr;
3006 
3007   Value *Arg = CI->getArgOperand(0);
3008   SmallVector<CallInst *, 1> SinCalls;
3009   SmallVector<CallInst *, 1> CosCalls;
3010   SmallVector<CallInst *, 1> SinCosCalls;
3011 
3012   bool IsFloat = Arg->getType()->isFloatTy();
3013 
3014   // Look for all compatible sinpi, cospi and sincospi calls with the same
3015   // argument. If there are enough (in some sense) we can make the
3016   // substitution.
3017   Function *F = CI->getFunction();
3018   for (User *U : Arg->users())
3019     classifyArgUse(U, F, IsFloat, SinCalls, CosCalls, SinCosCalls);
3020 
3021   // It's only worthwhile if both sinpi and cospi are actually used.
3022   if (SinCalls.empty() || CosCalls.empty())
3023     return nullptr;
3024 
3025   Value *Sin, *Cos, *SinCos;
3026   if (!insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
3027                         SinCos, TLI))
3028     return nullptr;
3029 
3030   auto replaceTrigInsts = [this](SmallVectorImpl<CallInst *> &Calls,
3031                                  Value *Res) {
3032     for (CallInst *C : Calls)
3033       replaceAllUsesWith(C, Res);
3034   };
3035 
3036   replaceTrigInsts(SinCalls, Sin);
3037   replaceTrigInsts(CosCalls, Cos);
3038   replaceTrigInsts(SinCosCalls, SinCos);
3039 
3040   return IsSin ? Sin : Cos;
3041 }
3042 
3043 void LibCallSimplifier::classifyArgUse(
3044     Value *Val, Function *F, bool IsFloat,
3045     SmallVectorImpl<CallInst *> &SinCalls,
3046     SmallVectorImpl<CallInst *> &CosCalls,
3047     SmallVectorImpl<CallInst *> &SinCosCalls) {
3048   auto *CI = dyn_cast<CallInst>(Val);
3049   if (!CI || CI->use_empty())
3050     return;
3051 
3052   // Don't consider calls in other functions.
3053   if (CI->getFunction() != F)
3054     return;
3055 
3056   Module *M = CI->getModule();
3057   Function *Callee = CI->getCalledFunction();
3058   LibFunc Func;
3059   if (!Callee || !TLI->getLibFunc(*Callee, Func) ||
3060       !isLibFuncEmittable(M, TLI, Func) ||
3061       !isTrigLibCall(CI))
3062     return;
3063 
3064   if (IsFloat) {
3065     if (Func == LibFunc_sinpif)
3066       SinCalls.push_back(CI);
3067     else if (Func == LibFunc_cospif)
3068       CosCalls.push_back(CI);
3069     else if (Func == LibFunc_sincospif_stret)
3070       SinCosCalls.push_back(CI);
3071   } else {
3072     if (Func == LibFunc_sinpi)
3073       SinCalls.push_back(CI);
3074     else if (Func == LibFunc_cospi)
3075       CosCalls.push_back(CI);
3076     else if (Func == LibFunc_sincospi_stret)
3077       SinCosCalls.push_back(CI);
3078   }
3079 }
3080 
3081 /// Constant folds remquo
3082 Value *LibCallSimplifier::optimizeRemquo(CallInst *CI, IRBuilderBase &B) {
3083   const APFloat *X, *Y;
3084   if (!match(CI->getArgOperand(0), m_APFloat(X)) ||
3085       !match(CI->getArgOperand(1), m_APFloat(Y)))
3086     return nullptr;
3087 
3088   APFloat::opStatus Status;
3089   APFloat Quot = *X;
3090   Status = Quot.divide(*Y, APFloat::rmNearestTiesToEven);
3091   if (Status != APFloat::opOK && Status != APFloat::opInexact)
3092     return nullptr;
3093   APFloat Rem = *X;
3094   if (Rem.remainder(*Y) != APFloat::opOK)
3095     return nullptr;
3096 
3097   // TODO: We can only keep at least the three of the last bits of x/y
3098   unsigned IntBW = TLI->getIntSize();
3099   APSInt QuotInt(IntBW, /*isUnsigned=*/false);
3100   bool IsExact;
3101   Status =
3102       Quot.convertToInteger(QuotInt, APFloat::rmNearestTiesToEven, &IsExact);
3103   if (Status != APFloat::opOK && Status != APFloat::opInexact)
3104     return nullptr;
3105 
3106   B.CreateAlignedStore(
3107       ConstantInt::get(B.getIntNTy(IntBW), QuotInt.getExtValue()),
3108       CI->getArgOperand(2), CI->getParamAlign(2));
3109   return ConstantFP::get(CI->getType(), Rem);
3110 }
3111 
3112 //===----------------------------------------------------------------------===//
3113 // Integer Library Call Optimizations
3114 //===----------------------------------------------------------------------===//
3115 
3116 Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilderBase &B) {
3117   // All variants of ffs return int which need not be 32 bits wide.
3118   // ffs{,l,ll}(x) -> x != 0 ? (int)llvm.cttz(x)+1 : 0
3119   Type *RetType = CI->getType();
3120   Value *Op = CI->getArgOperand(0);
3121   Type *ArgType = Op->getType();
3122   Value *V = B.CreateIntrinsic(Intrinsic::cttz, {ArgType}, {Op, B.getTrue()},
3123                                nullptr, "cttz");
3124   V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
3125   V = B.CreateIntCast(V, RetType, false);
3126 
3127   Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
3128   return B.CreateSelect(Cond, V, ConstantInt::get(RetType, 0));
3129 }
3130 
3131 Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilderBase &B) {
3132   // All variants of fls return int which need not be 32 bits wide.
3133   // fls{,l,ll}(x) -> (int)(sizeInBits(x) - llvm.ctlz(x, false))
3134   Value *Op = CI->getArgOperand(0);
3135   Type *ArgType = Op->getType();
3136   Value *V = B.CreateIntrinsic(Intrinsic::ctlz, {ArgType}, {Op, B.getFalse()},
3137                                nullptr, "ctlz");
3138   V = B.CreateSub(ConstantInt::get(V->getType(), ArgType->getIntegerBitWidth()),
3139                   V);
3140   return B.CreateIntCast(V, CI->getType(), false);
3141 }
3142 
3143 Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilderBase &B) {
3144   // abs(x) -> x <s 0 ? -x : x
3145   // The negation has 'nsw' because abs of INT_MIN is undefined.
3146   Value *X = CI->getArgOperand(0);
3147   Value *IsNeg = B.CreateIsNeg(X);
3148   Value *NegX = B.CreateNSWNeg(X, "neg");
3149   return B.CreateSelect(IsNeg, NegX, X);
3150 }
3151 
3152 Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilderBase &B) {
3153   // isdigit(c) -> (c-'0') <u 10
3154   Value *Op = CI->getArgOperand(0);
3155   Type *ArgType = Op->getType();
3156   Op = B.CreateSub(Op, ConstantInt::get(ArgType, '0'), "isdigittmp");
3157   Op = B.CreateICmpULT(Op, ConstantInt::get(ArgType, 10), "isdigit");
3158   return B.CreateZExt(Op, CI->getType());
3159 }
3160 
3161 Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilderBase &B) {
3162   // isascii(c) -> c <u 128
3163   Value *Op = CI->getArgOperand(0);
3164   Type *ArgType = Op->getType();
3165   Op = B.CreateICmpULT(Op, ConstantInt::get(ArgType, 128), "isascii");
3166   return B.CreateZExt(Op, CI->getType());
3167 }
3168 
3169 Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilderBase &B) {
3170   // toascii(c) -> c & 0x7f
3171   return B.CreateAnd(CI->getArgOperand(0),
3172                      ConstantInt::get(CI->getType(), 0x7F));
3173 }
3174 
3175 // Fold calls to atoi, atol, and atoll.
3176 Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilderBase &B) {
3177   CI->addParamAttr(0, Attribute::NoCapture);
3178 
3179   StringRef Str;
3180   if (!getConstantStringInfo(CI->getArgOperand(0), Str))
3181     return nullptr;
3182 
3183   return convertStrToInt(CI, Str, nullptr, 10, /*AsSigned=*/true, B);
3184 }
3185 
3186 // Fold calls to strtol, strtoll, strtoul, and strtoull.
3187 Value *LibCallSimplifier::optimizeStrToInt(CallInst *CI, IRBuilderBase &B,
3188                                            bool AsSigned) {
3189   Value *EndPtr = CI->getArgOperand(1);
3190   if (isa<ConstantPointerNull>(EndPtr)) {
3191     // With a null EndPtr, this function won't capture the main argument.
3192     // It would be readonly too, except that it still may write to errno.
3193     CI->addParamAttr(0, Attribute::NoCapture);
3194     EndPtr = nullptr;
3195   } else if (!isKnownNonZero(EndPtr, DL))
3196     return nullptr;
3197 
3198   StringRef Str;
3199   if (!getConstantStringInfo(CI->getArgOperand(0), Str))
3200     return nullptr;
3201 
3202   if (ConstantInt *CInt = dyn_cast<ConstantInt>(CI->getArgOperand(2))) {
3203     return convertStrToInt(CI, Str, EndPtr, CInt->getSExtValue(), AsSigned, B);
3204   }
3205 
3206   return nullptr;
3207 }
3208 
3209 //===----------------------------------------------------------------------===//
3210 // Formatting and IO Library Call Optimizations
3211 //===----------------------------------------------------------------------===//
3212 
3213 static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg);
3214 
3215 Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilderBase &B,
3216                                                  int StreamArg) {
3217   Function *Callee = CI->getCalledFunction();
3218   // Error reporting calls should be cold, mark them as such.
3219   // This applies even to non-builtin calls: it is only a hint and applies to
3220   // functions that the frontend might not understand as builtins.
3221 
3222   // This heuristic was suggested in:
3223   // Improving Static Branch Prediction in a Compiler
3224   // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
3225   // Proceedings of PACT'98, Oct. 1998, IEEE
3226   if (!CI->hasFnAttr(Attribute::Cold) &&
3227       isReportingError(Callee, CI, StreamArg)) {
3228     CI->addFnAttr(Attribute::Cold);
3229   }
3230 
3231   return nullptr;
3232 }
3233 
3234 static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
3235   if (!Callee || !Callee->isDeclaration())
3236     return false;
3237 
3238   if (StreamArg < 0)
3239     return true;
3240 
3241   // These functions might be considered cold, but only if their stream
3242   // argument is stderr.
3243 
3244   if (StreamArg >= (int)CI->arg_size())
3245     return false;
3246   LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg));
3247   if (!LI)
3248     return false;
3249   GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand());
3250   if (!GV || !GV->isDeclaration())
3251     return false;
3252   return GV->getName() == "stderr";
3253 }
3254 
3255 Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
3256   // Check for a fixed format string.
3257   StringRef FormatStr;
3258   if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
3259     return nullptr;
3260 
3261   // Empty format string -> noop.
3262   if (FormatStr.empty()) // Tolerate printf's declared void.
3263     return CI->use_empty() ? (Value *)CI : ConstantInt::get(CI->getType(), 0);
3264 
3265   // Do not do any of the following transformations if the printf return value
3266   // is used, in general the printf return value is not compatible with either
3267   // putchar() or puts().
3268   if (!CI->use_empty())
3269     return nullptr;
3270 
3271   Type *IntTy = CI->getType();
3272   // printf("x") -> putchar('x'), even for "%" and "%%".
3273   if (FormatStr.size() == 1 || FormatStr == "%%") {
3274     // Convert the character to unsigned char before passing it to putchar
3275     // to avoid host-specific sign extension in the IR.  Putchar converts
3276     // it to unsigned char regardless.
3277     Value *IntChar = ConstantInt::get(IntTy, (unsigned char)FormatStr[0]);
3278     return copyFlags(*CI, emitPutChar(IntChar, B, TLI));
3279   }
3280 
3281   // Try to remove call or emit putchar/puts.
3282   if (FormatStr == "%s" && CI->arg_size() > 1) {
3283     StringRef OperandStr;
3284     if (!getConstantStringInfo(CI->getOperand(1), OperandStr))
3285       return nullptr;
3286     // printf("%s", "") --> NOP
3287     if (OperandStr.empty())
3288       return (Value *)CI;
3289     // printf("%s", "a") --> putchar('a')
3290     if (OperandStr.size() == 1) {
3291       // Convert the character to unsigned char before passing it to putchar
3292       // to avoid host-specific sign extension in the IR.  Putchar converts
3293       // it to unsigned char regardless.
3294       Value *IntChar = ConstantInt::get(IntTy, (unsigned char)OperandStr[0]);
3295       return copyFlags(*CI, emitPutChar(IntChar, B, TLI));
3296     }
3297     // printf("%s", str"\n") --> puts(str)
3298     if (OperandStr.back() == '\n') {
3299       OperandStr = OperandStr.drop_back();
3300       Value *GV = B.CreateGlobalString(OperandStr, "str");
3301       return copyFlags(*CI, emitPutS(GV, B, TLI));
3302     }
3303     return nullptr;
3304   }
3305 
3306   // printf("foo\n") --> puts("foo")
3307   if (FormatStr.back() == '\n' &&
3308       !FormatStr.contains('%')) { // No format characters.
3309     // Create a string literal with no \n on it.  We expect the constant merge
3310     // pass to be run after this pass, to merge duplicate strings.
3311     FormatStr = FormatStr.drop_back();
3312     Value *GV = B.CreateGlobalString(FormatStr, "str");
3313     return copyFlags(*CI, emitPutS(GV, B, TLI));
3314   }
3315 
3316   // Optimize specific format strings.
3317   // printf("%c", chr) --> putchar(chr)
3318   if (FormatStr == "%c" && CI->arg_size() > 1 &&
3319       CI->getArgOperand(1)->getType()->isIntegerTy()) {
3320     // Convert the argument to the type expected by putchar, i.e., int, which
3321     // need not be 32 bits wide but which is the same as printf's return type.
3322     Value *IntChar = B.CreateIntCast(CI->getArgOperand(1), IntTy, false);
3323     return copyFlags(*CI, emitPutChar(IntChar, B, TLI));
3324   }
3325 
3326   // printf("%s\n", str) --> puts(str)
3327   if (FormatStr == "%s\n" && CI->arg_size() > 1 &&
3328       CI->getArgOperand(1)->getType()->isPointerTy())
3329     return copyFlags(*CI, emitPutS(CI->getArgOperand(1), B, TLI));
3330   return nullptr;
3331 }
3332 
3333 Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) {
3334 
3335   Module *M = CI->getModule();
3336   Function *Callee = CI->getCalledFunction();
3337   FunctionType *FT = Callee->getFunctionType();
3338   if (Value *V = optimizePrintFString(CI, B)) {
3339     return V;
3340   }
3341 
3342   annotateNonNullNoUndefBasedOnAccess(CI, 0);
3343 
3344   // printf(format, ...) -> iprintf(format, ...) if no floating point
3345   // arguments.
3346   if (isLibFuncEmittable(M, TLI, LibFunc_iprintf) &&
3347       !callHasFloatingPointArgument(CI)) {
3348     FunctionCallee IPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_iprintf, FT,
3349                                                   Callee->getAttributes());
3350     CallInst *New = cast<CallInst>(CI->clone());
3351     New->setCalledFunction(IPrintFFn);
3352     B.Insert(New);
3353     return New;
3354   }
3355 
3356   // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point
3357   // arguments.
3358   if (isLibFuncEmittable(M, TLI, LibFunc_small_printf) &&
3359       !callHasFP128Argument(CI)) {
3360     auto SmallPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_small_printf, FT,
3361                                             Callee->getAttributes());
3362     CallInst *New = cast<CallInst>(CI->clone());
3363     New->setCalledFunction(SmallPrintFFn);
3364     B.Insert(New);
3365     return New;
3366   }
3367 
3368   return nullptr;
3369 }
3370 
3371 Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
3372                                                 IRBuilderBase &B) {
3373   // Check for a fixed format string.
3374   StringRef FormatStr;
3375   if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
3376     return nullptr;
3377 
3378   // If we just have a format string (nothing else crazy) transform it.
3379   Value *Dest = CI->getArgOperand(0);
3380   if (CI->arg_size() == 2) {
3381     // Make sure there's no % in the constant array.  We could try to handle
3382     // %% -> % in the future if we cared.
3383     if (FormatStr.contains('%'))
3384       return nullptr; // we found a format specifier, bail out.
3385 
3386     // sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1)
3387     B.CreateMemCpy(
3388         Dest, Align(1), CI->getArgOperand(1), Align(1),
3389         ConstantInt::get(DL.getIntPtrType(CI->getContext()),
3390                          FormatStr.size() + 1)); // Copy the null byte.
3391     return ConstantInt::get(CI->getType(), FormatStr.size());
3392   }
3393 
3394   // The remaining optimizations require the format string to be "%s" or "%c"
3395   // and have an extra operand.
3396   if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() < 3)
3397     return nullptr;
3398 
3399   // Decode the second character of the format string.
3400   if (FormatStr[1] == 'c') {
3401     // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
3402     if (!CI->getArgOperand(2)->getType()->isIntegerTy())
3403       return nullptr;
3404     Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
3405     Value *Ptr = Dest;
3406     B.CreateStore(V, Ptr);
3407     Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
3408     B.CreateStore(B.getInt8(0), Ptr);
3409 
3410     return ConstantInt::get(CI->getType(), 1);
3411   }
3412 
3413   if (FormatStr[1] == 's') {
3414     // sprintf(dest, "%s", str) -> llvm.memcpy(align 1 dest, align 1 str,
3415     // strlen(str)+1)
3416     if (!CI->getArgOperand(2)->getType()->isPointerTy())
3417       return nullptr;
3418 
3419     if (CI->use_empty())
3420       // sprintf(dest, "%s", str) -> strcpy(dest, str)
3421       return copyFlags(*CI, emitStrCpy(Dest, CI->getArgOperand(2), B, TLI));
3422 
3423     uint64_t SrcLen = GetStringLength(CI->getArgOperand(2));
3424     if (SrcLen) {
3425       B.CreateMemCpy(
3426           Dest, Align(1), CI->getArgOperand(2), Align(1),
3427           ConstantInt::get(DL.getIntPtrType(CI->getContext()), SrcLen));
3428       // Returns total number of characters written without null-character.
3429       return ConstantInt::get(CI->getType(), SrcLen - 1);
3430     } else if (Value *V = emitStpCpy(Dest, CI->getArgOperand(2), B, TLI)) {
3431       // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest
3432       Value *PtrDiff = B.CreatePtrDiff(B.getInt8Ty(), V, Dest);
3433       return B.CreateIntCast(PtrDiff, CI->getType(), false);
3434     }
3435 
3436     bool OptForSize = CI->getFunction()->hasOptSize() ||
3437                       llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
3438                                                   PGSOQueryType::IRPass);
3439     if (OptForSize)
3440       return nullptr;
3441 
3442     Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI);
3443     if (!Len)
3444       return nullptr;
3445     Value *IncLen =
3446         B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc");
3447     B.CreateMemCpy(Dest, Align(1), CI->getArgOperand(2), Align(1), IncLen);
3448 
3449     // The sprintf result is the unincremented number of bytes in the string.
3450     return B.CreateIntCast(Len, CI->getType(), false);
3451   }
3452   return nullptr;
3453 }
3454 
3455 Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) {
3456   Module *M = CI->getModule();
3457   Function *Callee = CI->getCalledFunction();
3458   FunctionType *FT = Callee->getFunctionType();
3459   if (Value *V = optimizeSPrintFString(CI, B)) {
3460     return V;
3461   }
3462 
3463   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
3464 
3465   // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
3466   // point arguments.
3467   if (isLibFuncEmittable(M, TLI, LibFunc_siprintf) &&
3468       !callHasFloatingPointArgument(CI)) {
3469     FunctionCallee SIPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_siprintf,
3470                                                    FT, Callee->getAttributes());
3471     CallInst *New = cast<CallInst>(CI->clone());
3472     New->setCalledFunction(SIPrintFFn);
3473     B.Insert(New);
3474     return New;
3475   }
3476 
3477   // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit
3478   // floating point arguments.
3479   if (isLibFuncEmittable(M, TLI, LibFunc_small_sprintf) &&
3480       !callHasFP128Argument(CI)) {
3481     auto SmallSPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_small_sprintf, FT,
3482                                              Callee->getAttributes());
3483     CallInst *New = cast<CallInst>(CI->clone());
3484     New->setCalledFunction(SmallSPrintFFn);
3485     B.Insert(New);
3486     return New;
3487   }
3488 
3489   return nullptr;
3490 }
3491 
3492 // Transform an snprintf call CI with the bound N to format the string Str
3493 // either to a call to memcpy, or to single character a store, or to nothing,
3494 // and fold the result to a constant.  A nonnull StrArg refers to the string
3495 // argument being formatted.  Otherwise the call is one with N < 2 and
3496 // the "%c" directive to format a single character.
3497 Value *LibCallSimplifier::emitSnPrintfMemCpy(CallInst *CI, Value *StrArg,
3498                                              StringRef Str, uint64_t N,
3499                                              IRBuilderBase &B) {
3500   assert(StrArg || (N < 2 && Str.size() == 1));
3501 
3502   unsigned IntBits = TLI->getIntSize();
3503   uint64_t IntMax = maxIntN(IntBits);
3504   if (Str.size() > IntMax)
3505     // Bail if the string is longer than INT_MAX.  POSIX requires
3506     // implementations to set errno to EOVERFLOW in this case, in
3507     // addition to when N is larger than that (checked by the caller).
3508     return nullptr;
3509 
3510   Value *StrLen = ConstantInt::get(CI->getType(), Str.size());
3511   if (N == 0)
3512     return StrLen;
3513 
3514   // Set to the number of bytes to copy fron StrArg which is also
3515   // the offset of the terinating nul.
3516   uint64_t NCopy;
3517   if (N > Str.size())
3518     // Copy the full string, including the terminating nul (which must
3519     // be present regardless of the bound).
3520     NCopy = Str.size() + 1;
3521   else
3522     NCopy = N - 1;
3523 
3524   Value *DstArg = CI->getArgOperand(0);
3525   if (NCopy && StrArg)
3526     // Transform the call to lvm.memcpy(dst, fmt, N).
3527     copyFlags(
3528          *CI,
3529           B.CreateMemCpy(
3530                          DstArg, Align(1), StrArg, Align(1),
3531               ConstantInt::get(DL.getIntPtrType(CI->getContext()), NCopy)));
3532 
3533   if (N > Str.size())
3534     // Return early when the whole format string, including the final nul,
3535     // has been copied.
3536     return StrLen;
3537 
3538   // Otherwise, when truncating the string append a terminating nul.
3539   Type *Int8Ty = B.getInt8Ty();
3540   Value *NulOff = B.getIntN(IntBits, NCopy);
3541   Value *DstEnd = B.CreateInBoundsGEP(Int8Ty, DstArg, NulOff, "endptr");
3542   B.CreateStore(ConstantInt::get(Int8Ty, 0), DstEnd);
3543   return StrLen;
3544 }
3545 
3546 Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI,
3547                                                  IRBuilderBase &B) {
3548   // Check for size
3549   ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
3550   if (!Size)
3551     return nullptr;
3552 
3553   uint64_t N = Size->getZExtValue();
3554   uint64_t IntMax = maxIntN(TLI->getIntSize());
3555   if (N > IntMax)
3556     // Bail if the bound exceeds INT_MAX.  POSIX requires implementations
3557     // to set errno to EOVERFLOW in this case.
3558     return nullptr;
3559 
3560   Value *DstArg = CI->getArgOperand(0);
3561   Value *FmtArg = CI->getArgOperand(2);
3562 
3563   // Check for a fixed format string.
3564   StringRef FormatStr;
3565   if (!getConstantStringInfo(FmtArg, FormatStr))
3566     return nullptr;
3567 
3568   // If we just have a format string (nothing else crazy) transform it.
3569   if (CI->arg_size() == 3) {
3570     if (FormatStr.contains('%'))
3571       // Bail if the format string contains a directive and there are
3572       // no arguments.  We could handle "%%" in the future.
3573       return nullptr;
3574 
3575     return emitSnPrintfMemCpy(CI, FmtArg, FormatStr, N, B);
3576   }
3577 
3578   // The remaining optimizations require the format string to be "%s" or "%c"
3579   // and have an extra operand.
3580   if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() != 4)
3581     return nullptr;
3582 
3583   // Decode the second character of the format string.
3584   if (FormatStr[1] == 'c') {
3585     if (N <= 1) {
3586       // Use an arbitary string of length 1 to transform the call into
3587       // either a nul store (N == 1) or a no-op (N == 0) and fold it
3588       // to one.
3589       StringRef CharStr("*");
3590       return emitSnPrintfMemCpy(CI, nullptr, CharStr, N, B);
3591     }
3592 
3593     // snprintf(dst, size, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
3594     if (!CI->getArgOperand(3)->getType()->isIntegerTy())
3595       return nullptr;
3596     Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char");
3597     Value *Ptr = DstArg;
3598     B.CreateStore(V, Ptr);
3599     Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
3600     B.CreateStore(B.getInt8(0), Ptr);
3601     return ConstantInt::get(CI->getType(), 1);
3602   }
3603 
3604   if (FormatStr[1] != 's')
3605     return nullptr;
3606 
3607   Value *StrArg = CI->getArgOperand(3);
3608   // snprintf(dest, size, "%s", str) to llvm.memcpy(dest, str, len+1, 1)
3609   StringRef Str;
3610   if (!getConstantStringInfo(StrArg, Str))
3611     return nullptr;
3612 
3613   return emitSnPrintfMemCpy(CI, StrArg, Str, N, B);
3614 }
3615 
3616 Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilderBase &B) {
3617   if (Value *V = optimizeSnPrintFString(CI, B)) {
3618     return V;
3619   }
3620 
3621   if (isKnownNonZero(CI->getOperand(1), DL))
3622     annotateNonNullNoUndefBasedOnAccess(CI, 0);
3623   return nullptr;
3624 }
3625 
3626 Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI,
3627                                                 IRBuilderBase &B) {
3628   optimizeErrorReporting(CI, B, 0);
3629 
3630   // All the optimizations depend on the format string.
3631   StringRef FormatStr;
3632   if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
3633     return nullptr;
3634 
3635   // Do not do any of the following transformations if the fprintf return
3636   // value is used, in general the fprintf return value is not compatible
3637   // with fwrite(), fputc() or fputs().
3638   if (!CI->use_empty())
3639     return nullptr;
3640 
3641   // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
3642   if (CI->arg_size() == 2) {
3643     // Could handle %% -> % if we cared.
3644     if (FormatStr.contains('%'))
3645       return nullptr; // We found a format specifier.
3646 
3647     unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
3648     Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
3649     return copyFlags(
3650         *CI, emitFWrite(CI->getArgOperand(1),
3651                         ConstantInt::get(SizeTTy, FormatStr.size()),
3652                         CI->getArgOperand(0), B, DL, TLI));
3653   }
3654 
3655   // The remaining optimizations require the format string to be "%s" or "%c"
3656   // and have an extra operand.
3657   if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() < 3)
3658     return nullptr;
3659 
3660   // Decode the second character of the format string.
3661   if (FormatStr[1] == 'c') {
3662     // fprintf(F, "%c", chr) --> fputc((int)chr, F)
3663     if (!CI->getArgOperand(2)->getType()->isIntegerTy())
3664       return nullptr;
3665     Type *IntTy = B.getIntNTy(TLI->getIntSize());
3666     Value *V = B.CreateIntCast(CI->getArgOperand(2), IntTy, /*isSigned*/ true,
3667                                "chari");
3668     return copyFlags(*CI, emitFPutC(V, CI->getArgOperand(0), B, TLI));
3669   }
3670 
3671   if (FormatStr[1] == 's') {
3672     // fprintf(F, "%s", str) --> fputs(str, F)
3673     if (!CI->getArgOperand(2)->getType()->isPointerTy())
3674       return nullptr;
3675     return copyFlags(
3676         *CI, emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI));
3677   }
3678   return nullptr;
3679 }
3680 
3681 Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilderBase &B) {
3682   Module *M = CI->getModule();
3683   Function *Callee = CI->getCalledFunction();
3684   FunctionType *FT = Callee->getFunctionType();
3685   if (Value *V = optimizeFPrintFString(CI, B)) {
3686     return V;
3687   }
3688 
3689   // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
3690   // floating point arguments.
3691   if (isLibFuncEmittable(M, TLI, LibFunc_fiprintf) &&
3692       !callHasFloatingPointArgument(CI)) {
3693     FunctionCallee FIPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_fiprintf,
3694                                                    FT, Callee->getAttributes());
3695     CallInst *New = cast<CallInst>(CI->clone());
3696     New->setCalledFunction(FIPrintFFn);
3697     B.Insert(New);
3698     return New;
3699   }
3700 
3701   // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no
3702   // 128-bit floating point arguments.
3703   if (isLibFuncEmittable(M, TLI, LibFunc_small_fprintf) &&
3704       !callHasFP128Argument(CI)) {
3705     auto SmallFPrintFFn =
3706         getOrInsertLibFunc(M, *TLI, LibFunc_small_fprintf, FT,
3707                            Callee->getAttributes());
3708     CallInst *New = cast<CallInst>(CI->clone());
3709     New->setCalledFunction(SmallFPrintFFn);
3710     B.Insert(New);
3711     return New;
3712   }
3713 
3714   return nullptr;
3715 }
3716 
3717 Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilderBase &B) {
3718   optimizeErrorReporting(CI, B, 3);
3719 
3720   // Get the element size and count.
3721   ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
3722   ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
3723   if (SizeC && CountC) {
3724     uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue();
3725 
3726     // If this is writing zero records, remove the call (it's a noop).
3727     if (Bytes == 0)
3728       return ConstantInt::get(CI->getType(), 0);
3729 
3730     // If this is writing one byte, turn it into fputc.
3731     // This optimisation is only valid, if the return value is unused.
3732     if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
3733       Value *Char = B.CreateLoad(B.getInt8Ty(), CI->getArgOperand(0), "char");
3734       Type *IntTy = B.getIntNTy(TLI->getIntSize());
3735       Value *Cast = B.CreateIntCast(Char, IntTy, /*isSigned*/ true, "chari");
3736       Value *NewCI = emitFPutC(Cast, CI->getArgOperand(3), B, TLI);
3737       return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
3738     }
3739   }
3740 
3741   return nullptr;
3742 }
3743 
3744 Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilderBase &B) {
3745   optimizeErrorReporting(CI, B, 1);
3746 
3747   // Don't rewrite fputs to fwrite when optimising for size because fwrite
3748   // requires more arguments and thus extra MOVs are required.
3749   bool OptForSize = CI->getFunction()->hasOptSize() ||
3750                     llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
3751                                                 PGSOQueryType::IRPass);
3752   if (OptForSize)
3753     return nullptr;
3754 
3755   // We can't optimize if return value is used.
3756   if (!CI->use_empty())
3757     return nullptr;
3758 
3759   // fputs(s,F) --> fwrite(s,strlen(s),1,F)
3760   uint64_t Len = GetStringLength(CI->getArgOperand(0));
3761   if (!Len)
3762     return nullptr;
3763 
3764   // Known to have no uses (see above).
3765   unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
3766   Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
3767   return copyFlags(
3768       *CI,
3769       emitFWrite(CI->getArgOperand(0),
3770                  ConstantInt::get(SizeTTy, Len - 1),
3771                  CI->getArgOperand(1), B, DL, TLI));
3772 }
3773 
3774 Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilderBase &B) {
3775   annotateNonNullNoUndefBasedOnAccess(CI, 0);
3776   if (!CI->use_empty())
3777     return nullptr;
3778 
3779   // Check for a constant string.
3780   // puts("") -> putchar('\n')
3781   StringRef Str;
3782   if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty()) {
3783     // putchar takes an argument of the same type as puts returns, i.e.,
3784     // int, which need not be 32 bits wide.
3785     Type *IntTy = CI->getType();
3786     return copyFlags(*CI, emitPutChar(ConstantInt::get(IntTy, '\n'), B, TLI));
3787   }
3788 
3789   return nullptr;
3790 }
3791 
3792 Value *LibCallSimplifier::optimizeExit(CallInst *CI) {
3793 
3794   // Mark 'exit' as cold if its not exit(0) (success).
3795   const APInt *C;
3796   if (!CI->hasFnAttr(Attribute::Cold) &&
3797       match(CI->getArgOperand(0), m_APInt(C)) && !C->isZero()) {
3798     CI->addFnAttr(Attribute::Cold);
3799   }
3800   return nullptr;
3801 }
3802 
3803 Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilderBase &B) {
3804   // bcopy(src, dst, n) -> llvm.memmove(dst, src, n)
3805   return copyFlags(*CI, B.CreateMemMove(CI->getArgOperand(1), Align(1),
3806                                         CI->getArgOperand(0), Align(1),
3807                                         CI->getArgOperand(2)));
3808 }
3809 
3810 bool LibCallSimplifier::hasFloatVersion(const Module *M, StringRef FuncName) {
3811   SmallString<20> FloatFuncName = FuncName;
3812   FloatFuncName += 'f';
3813   return isLibFuncEmittable(M, TLI, FloatFuncName);
3814 }
3815 
3816 Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
3817                                                       IRBuilderBase &Builder) {
3818   Module *M = CI->getModule();
3819   LibFunc Func;
3820   Function *Callee = CI->getCalledFunction();
3821 
3822   // Check for string/memory library functions.
3823   if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) {
3824     // Make sure we never change the calling convention.
3825     assert(
3826         (ignoreCallingConv(Func) ||
3827          TargetLibraryInfoImpl::isCallingConvCCompatible(CI)) &&
3828         "Optimizing string/memory libcall would change the calling convention");
3829     switch (Func) {
3830     case LibFunc_strcat:
3831       return optimizeStrCat(CI, Builder);
3832     case LibFunc_strncat:
3833       return optimizeStrNCat(CI, Builder);
3834     case LibFunc_strchr:
3835       return optimizeStrChr(CI, Builder);
3836     case LibFunc_strrchr:
3837       return optimizeStrRChr(CI, Builder);
3838     case LibFunc_strcmp:
3839       return optimizeStrCmp(CI, Builder);
3840     case LibFunc_strncmp:
3841       return optimizeStrNCmp(CI, Builder);
3842     case LibFunc_strcpy:
3843       return optimizeStrCpy(CI, Builder);
3844     case LibFunc_stpcpy:
3845       return optimizeStpCpy(CI, Builder);
3846     case LibFunc_strlcpy:
3847       return optimizeStrLCpy(CI, Builder);
3848     case LibFunc_stpncpy:
3849       return optimizeStringNCpy(CI, /*RetEnd=*/true, Builder);
3850     case LibFunc_strncpy:
3851       return optimizeStringNCpy(CI, /*RetEnd=*/false, Builder);
3852     case LibFunc_strlen:
3853       return optimizeStrLen(CI, Builder);
3854     case LibFunc_strnlen:
3855       return optimizeStrNLen(CI, Builder);
3856     case LibFunc_strpbrk:
3857       return optimizeStrPBrk(CI, Builder);
3858     case LibFunc_strndup:
3859       return optimizeStrNDup(CI, Builder);
3860     case LibFunc_strtol:
3861     case LibFunc_strtod:
3862     case LibFunc_strtof:
3863     case LibFunc_strtoul:
3864     case LibFunc_strtoll:
3865     case LibFunc_strtold:
3866     case LibFunc_strtoull:
3867       return optimizeStrTo(CI, Builder);
3868     case LibFunc_strspn:
3869       return optimizeStrSpn(CI, Builder);
3870     case LibFunc_strcspn:
3871       return optimizeStrCSpn(CI, Builder);
3872     case LibFunc_strstr:
3873       return optimizeStrStr(CI, Builder);
3874     case LibFunc_memchr:
3875       return optimizeMemChr(CI, Builder);
3876     case LibFunc_memrchr:
3877       return optimizeMemRChr(CI, Builder);
3878     case LibFunc_bcmp:
3879       return optimizeBCmp(CI, Builder);
3880     case LibFunc_memcmp:
3881       return optimizeMemCmp(CI, Builder);
3882     case LibFunc_memcpy:
3883       return optimizeMemCpy(CI, Builder);
3884     case LibFunc_memccpy:
3885       return optimizeMemCCpy(CI, Builder);
3886     case LibFunc_mempcpy:
3887       return optimizeMemPCpy(CI, Builder);
3888     case LibFunc_memmove:
3889       return optimizeMemMove(CI, Builder);
3890     case LibFunc_memset:
3891       return optimizeMemSet(CI, Builder);
3892     case LibFunc_realloc:
3893       return optimizeRealloc(CI, Builder);
3894     case LibFunc_wcslen:
3895       return optimizeWcslen(CI, Builder);
3896     case LibFunc_bcopy:
3897       return optimizeBCopy(CI, Builder);
3898     case LibFunc_Znwm:
3899     case LibFunc_ZnwmRKSt9nothrow_t:
3900     case LibFunc_ZnwmSt11align_val_t:
3901     case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
3902     case LibFunc_Znam:
3903     case LibFunc_ZnamRKSt9nothrow_t:
3904     case LibFunc_ZnamSt11align_val_t:
3905     case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
3906     case LibFunc_Znwm12__hot_cold_t:
3907     case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
3908     case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
3909     case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
3910     case LibFunc_Znam12__hot_cold_t:
3911     case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
3912     case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
3913     case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
3914     case LibFunc_size_returning_new:
3915     case LibFunc_size_returning_new_hot_cold:
3916     case LibFunc_size_returning_new_aligned:
3917     case LibFunc_size_returning_new_aligned_hot_cold:
3918       return optimizeNew(CI, Builder, Func);
3919     default:
3920       break;
3921     }
3922   }
3923   return nullptr;
3924 }
3925 
3926 /// Constant folding nan/nanf/nanl.
3927 static Value *optimizeNaN(CallInst *CI) {
3928   StringRef CharSeq;
3929   if (!getConstantStringInfo(CI->getArgOperand(0), CharSeq))
3930     return nullptr;
3931 
3932   APInt Fill;
3933   // Treat empty strings as if they were zero.
3934   if (CharSeq.empty())
3935     Fill = APInt(32, 0);
3936   else if (CharSeq.getAsInteger(0, Fill))
3937     return nullptr;
3938 
3939   return ConstantFP::getQNaN(CI->getType(), /*Negative=*/false, &Fill);
3940 }
3941 
3942 Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
3943                                                        LibFunc Func,
3944                                                        IRBuilderBase &Builder) {
3945   const Module *M = CI->getModule();
3946 
3947   // Don't optimize calls that require strict floating point semantics.
3948   if (CI->isStrictFP())
3949     return nullptr;
3950 
3951   if (Value *V = optimizeSymmetric(CI, Func, Builder))
3952     return V;
3953 
3954   switch (Func) {
3955   case LibFunc_sinpif:
3956   case LibFunc_sinpi:
3957     return optimizeSinCosPi(CI, /*IsSin*/true, Builder);
3958   case LibFunc_cospif:
3959   case LibFunc_cospi:
3960     return optimizeSinCosPi(CI, /*IsSin*/false, Builder);
3961   case LibFunc_powf:
3962   case LibFunc_pow:
3963   case LibFunc_powl:
3964     return optimizePow(CI, Builder);
3965   case LibFunc_exp2l:
3966   case LibFunc_exp2:
3967   case LibFunc_exp2f:
3968     return optimizeExp2(CI, Builder);
3969   case LibFunc_fabsf:
3970   case LibFunc_fabs:
3971   case LibFunc_fabsl:
3972     return replaceUnaryCall(CI, Builder, Intrinsic::fabs);
3973   case LibFunc_sqrtf:
3974   case LibFunc_sqrt:
3975   case LibFunc_sqrtl:
3976     return optimizeSqrt(CI, Builder);
3977   case LibFunc_fmod:
3978   case LibFunc_fmodf:
3979   case LibFunc_fmodl:
3980     return optimizeFMod(CI, Builder);
3981   case LibFunc_logf:
3982   case LibFunc_log:
3983   case LibFunc_logl:
3984   case LibFunc_log10f:
3985   case LibFunc_log10:
3986   case LibFunc_log10l:
3987   case LibFunc_log1pf:
3988   case LibFunc_log1p:
3989   case LibFunc_log1pl:
3990   case LibFunc_log2f:
3991   case LibFunc_log2:
3992   case LibFunc_log2l:
3993   case LibFunc_logbf:
3994   case LibFunc_logb:
3995   case LibFunc_logbl:
3996     return optimizeLog(CI, Builder);
3997   case LibFunc_tan:
3998   case LibFunc_tanf:
3999   case LibFunc_tanl:
4000   case LibFunc_sinh:
4001   case LibFunc_sinhf:
4002   case LibFunc_sinhl:
4003   case LibFunc_asinh:
4004   case LibFunc_asinhf:
4005   case LibFunc_asinhl:
4006   case LibFunc_cosh:
4007   case LibFunc_coshf:
4008   case LibFunc_coshl:
4009   case LibFunc_atanh:
4010   case LibFunc_atanhf:
4011   case LibFunc_atanhl:
4012     return optimizeTrigInversionPairs(CI, Builder);
4013   case LibFunc_ceil:
4014     return replaceUnaryCall(CI, Builder, Intrinsic::ceil);
4015   case LibFunc_floor:
4016     return replaceUnaryCall(CI, Builder, Intrinsic::floor);
4017   case LibFunc_round:
4018     return replaceUnaryCall(CI, Builder, Intrinsic::round);
4019   case LibFunc_roundeven:
4020     return replaceUnaryCall(CI, Builder, Intrinsic::roundeven);
4021   case LibFunc_nearbyint:
4022     return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint);
4023   case LibFunc_rint:
4024     return replaceUnaryCall(CI, Builder, Intrinsic::rint);
4025   case LibFunc_trunc:
4026     return replaceUnaryCall(CI, Builder, Intrinsic::trunc);
4027   case LibFunc_acos:
4028   case LibFunc_acosh:
4029   case LibFunc_asin:
4030   case LibFunc_atan:
4031   case LibFunc_cbrt:
4032   case LibFunc_exp:
4033   case LibFunc_exp10:
4034   case LibFunc_expm1:
4035   case LibFunc_cos:
4036   case LibFunc_sin:
4037   case LibFunc_tanh:
4038     if (UnsafeFPShrink && hasFloatVersion(M, CI->getCalledFunction()->getName()))
4039       return optimizeUnaryDoubleFP(CI, Builder, TLI, true);
4040     return nullptr;
4041   case LibFunc_copysign:
4042     if (hasFloatVersion(M, CI->getCalledFunction()->getName()))
4043       return optimizeBinaryDoubleFP(CI, Builder, TLI);
4044     return nullptr;
4045   case LibFunc_fminf:
4046   case LibFunc_fmin:
4047   case LibFunc_fminl:
4048   case LibFunc_fmaxf:
4049   case LibFunc_fmax:
4050   case LibFunc_fmaxl:
4051     return optimizeFMinFMax(CI, Builder);
4052   case LibFunc_cabs:
4053   case LibFunc_cabsf:
4054   case LibFunc_cabsl:
4055     return optimizeCAbs(CI, Builder);
4056   case LibFunc_remquo:
4057   case LibFunc_remquof:
4058   case LibFunc_remquol:
4059     return optimizeRemquo(CI, Builder);
4060   case LibFunc_nan:
4061   case LibFunc_nanf:
4062   case LibFunc_nanl:
4063     return optimizeNaN(CI);
4064   default:
4065     return nullptr;
4066   }
4067 }
4068 
4069 Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
4070   Module *M = CI->getModule();
4071   assert(!CI->isMustTailCall() && "These transforms aren't musttail safe.");
4072 
4073   // TODO: Split out the code below that operates on FP calls so that
4074   //       we can all non-FP calls with the StrictFP attribute to be
4075   //       optimized.
4076   if (CI->isNoBuiltin())
4077     return nullptr;
4078 
4079   LibFunc Func;
4080   Function *Callee = CI->getCalledFunction();
4081   bool IsCallingConvC = TargetLibraryInfoImpl::isCallingConvCCompatible(CI);
4082 
4083   SmallVector<OperandBundleDef, 2> OpBundles;
4084   CI->getOperandBundlesAsDefs(OpBundles);
4085 
4086   IRBuilderBase::OperandBundlesGuard Guard(Builder);
4087   Builder.setDefaultOperandBundles(OpBundles);
4088 
4089   // Command-line parameter overrides instruction attribute.
4090   // This can't be moved to optimizeFloatingPointLibCall() because it may be
4091   // used by the intrinsic optimizations.
4092   if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
4093     UnsafeFPShrink = EnableUnsafeFPShrink;
4094   else if (isa<FPMathOperator>(CI) && CI->isFast())
4095     UnsafeFPShrink = true;
4096 
4097   // First, check for intrinsics.
4098   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
4099     if (!IsCallingConvC)
4100       return nullptr;
4101     // The FP intrinsics have corresponding constrained versions so we don't
4102     // need to check for the StrictFP attribute here.
4103     switch (II->getIntrinsicID()) {
4104     case Intrinsic::pow:
4105       return optimizePow(CI, Builder);
4106     case Intrinsic::exp2:
4107       return optimizeExp2(CI, Builder);
4108     case Intrinsic::log:
4109     case Intrinsic::log2:
4110     case Intrinsic::log10:
4111       return optimizeLog(CI, Builder);
4112     case Intrinsic::sqrt:
4113       return optimizeSqrt(CI, Builder);
4114     case Intrinsic::memset:
4115       return optimizeMemSet(CI, Builder);
4116     case Intrinsic::memcpy:
4117       return optimizeMemCpy(CI, Builder);
4118     case Intrinsic::memmove:
4119       return optimizeMemMove(CI, Builder);
4120     default:
4121       return nullptr;
4122     }
4123   }
4124 
4125   // Also try to simplify calls to fortified library functions.
4126   if (Value *SimplifiedFortifiedCI =
4127           FortifiedSimplifier.optimizeCall(CI, Builder))
4128     return SimplifiedFortifiedCI;
4129 
4130   // Then check for known library functions.
4131   if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) {
4132     // We never change the calling convention.
4133     if (!ignoreCallingConv(Func) && !IsCallingConvC)
4134       return nullptr;
4135     if (Value *V = optimizeStringMemoryLibCall(CI, Builder))
4136       return V;
4137     if (Value *V = optimizeFloatingPointLibCall(CI, Func, Builder))
4138       return V;
4139     switch (Func) {
4140     case LibFunc_ffs:
4141     case LibFunc_ffsl:
4142     case LibFunc_ffsll:
4143       return optimizeFFS(CI, Builder);
4144     case LibFunc_fls:
4145     case LibFunc_flsl:
4146     case LibFunc_flsll:
4147       return optimizeFls(CI, Builder);
4148     case LibFunc_abs:
4149     case LibFunc_labs:
4150     case LibFunc_llabs:
4151       return optimizeAbs(CI, Builder);
4152     case LibFunc_isdigit:
4153       return optimizeIsDigit(CI, Builder);
4154     case LibFunc_isascii:
4155       return optimizeIsAscii(CI, Builder);
4156     case LibFunc_toascii:
4157       return optimizeToAscii(CI, Builder);
4158     case LibFunc_atoi:
4159     case LibFunc_atol:
4160     case LibFunc_atoll:
4161       return optimizeAtoi(CI, Builder);
4162     case LibFunc_strtol:
4163     case LibFunc_strtoll:
4164       return optimizeStrToInt(CI, Builder, /*AsSigned=*/true);
4165     case LibFunc_strtoul:
4166     case LibFunc_strtoull:
4167       return optimizeStrToInt(CI, Builder, /*AsSigned=*/false);
4168     case LibFunc_printf:
4169       return optimizePrintF(CI, Builder);
4170     case LibFunc_sprintf:
4171       return optimizeSPrintF(CI, Builder);
4172     case LibFunc_snprintf:
4173       return optimizeSnPrintF(CI, Builder);
4174     case LibFunc_fprintf:
4175       return optimizeFPrintF(CI, Builder);
4176     case LibFunc_fwrite:
4177       return optimizeFWrite(CI, Builder);
4178     case LibFunc_fputs:
4179       return optimizeFPuts(CI, Builder);
4180     case LibFunc_puts:
4181       return optimizePuts(CI, Builder);
4182     case LibFunc_perror:
4183       return optimizeErrorReporting(CI, Builder);
4184     case LibFunc_vfprintf:
4185     case LibFunc_fiprintf:
4186       return optimizeErrorReporting(CI, Builder, 0);
4187     case LibFunc_exit:
4188     case LibFunc_Exit:
4189       return optimizeExit(CI);
4190     default:
4191       return nullptr;
4192     }
4193   }
4194   return nullptr;
4195 }
4196 
4197 LibCallSimplifier::LibCallSimplifier(
4198     const DataLayout &DL, const TargetLibraryInfo *TLI, DominatorTree *DT,
4199     DomConditionCache *DC, AssumptionCache *AC, OptimizationRemarkEmitter &ORE,
4200     BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
4201     function_ref<void(Instruction *, Value *)> Replacer,
4202     function_ref<void(Instruction *)> Eraser)
4203     : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), DT(DT), DC(DC), AC(AC),
4204       ORE(ORE), BFI(BFI), PSI(PSI), Replacer(Replacer), Eraser(Eraser) {}
4205 
4206 void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
4207   // Indirect through the replacer used in this instance.
4208   Replacer(I, With);
4209 }
4210 
4211 void LibCallSimplifier::eraseFromParent(Instruction *I) {
4212   Eraser(I);
4213 }
4214 
4215 // TODO:
4216 //   Additional cases that we need to add to this file:
4217 //
4218 // cbrt:
4219 //   * cbrt(expN(X))  -> expN(x/3)
4220 //   * cbrt(sqrt(x))  -> pow(x,1/6)
4221 //   * cbrt(cbrt(x))  -> pow(x,1/9)
4222 //
4223 // exp, expf, expl:
4224 //   * exp(log(x))  -> x
4225 //
4226 // log, logf, logl:
4227 //   * log(exp(x))   -> x
4228 //   * log(exp(y))   -> y*log(e)
4229 //   * log(exp10(y)) -> y*log(10)
4230 //   * log(sqrt(x))  -> 0.5*log(x)
4231 //
4232 // pow, powf, powl:
4233 //   * pow(sqrt(x),y) -> pow(x,y*0.5)
4234 //   * pow(pow(x,y),z)-> pow(x,y*z)
4235 //
4236 // signbit:
4237 //   * signbit(cnst) -> cnst'
4238 //   * signbit(nncst) -> 0 (if pstv is a non-negative constant)
4239 //
4240 // sqrt, sqrtf, sqrtl:
4241 //   * sqrt(expN(x))  -> expN(x*0.5)
4242 //   * sqrt(Nroot(x)) -> pow(x,1/(2*N))
4243 //   * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
4244 //
4245 
4246 //===----------------------------------------------------------------------===//
4247 // Fortified Library Call Optimizations
4248 //===----------------------------------------------------------------------===//
4249 
4250 bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(
4251     CallInst *CI, unsigned ObjSizeOp, std::optional<unsigned> SizeOp,
4252     std::optional<unsigned> StrOp, std::optional<unsigned> FlagOp) {
4253   // If this function takes a flag argument, the implementation may use it to
4254   // perform extra checks. Don't fold into the non-checking variant.
4255   if (FlagOp) {
4256     ConstantInt *Flag = dyn_cast<ConstantInt>(CI->getArgOperand(*FlagOp));
4257     if (!Flag || !Flag->isZero())
4258       return false;
4259   }
4260 
4261   if (SizeOp && CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(*SizeOp))
4262     return true;
4263 
4264   if (ConstantInt *ObjSizeCI =
4265           dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) {
4266     if (ObjSizeCI->isMinusOne())
4267       return true;
4268     // If the object size wasn't -1 (unknown), bail out if we were asked to.
4269     if (OnlyLowerUnknownSize)
4270       return false;
4271     if (StrOp) {
4272       uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp));
4273       // If the length is 0 we don't know how long it is and so we can't
4274       // remove the check.
4275       if (Len)
4276         annotateDereferenceableBytes(CI, *StrOp, Len);
4277       else
4278         return false;
4279       return ObjSizeCI->getZExtValue() >= Len;
4280     }
4281 
4282     if (SizeOp) {
4283       if (ConstantInt *SizeCI =
4284               dyn_cast<ConstantInt>(CI->getArgOperand(*SizeOp)))
4285         return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
4286     }
4287   }
4288   return false;
4289 }
4290 
4291 Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
4292                                                      IRBuilderBase &B) {
4293   if (isFortifiedCallFoldable(CI, 3, 2)) {
4294     CallInst *NewCI =
4295         B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
4296                        Align(1), CI->getArgOperand(2));
4297     mergeAttributesAndFlags(NewCI, *CI);
4298     return CI->getArgOperand(0);
4299   }
4300   return nullptr;
4301 }
4302 
4303 Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
4304                                                       IRBuilderBase &B) {
4305   if (isFortifiedCallFoldable(CI, 3, 2)) {
4306     CallInst *NewCI =
4307         B.CreateMemMove(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
4308                         Align(1), CI->getArgOperand(2));
4309     mergeAttributesAndFlags(NewCI, *CI);
4310     return CI->getArgOperand(0);
4311   }
4312   return nullptr;
4313 }
4314 
4315 Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
4316                                                      IRBuilderBase &B) {
4317   if (isFortifiedCallFoldable(CI, 3, 2)) {
4318     Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
4319     CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val,
4320                                      CI->getArgOperand(2), Align(1));
4321     mergeAttributesAndFlags(NewCI, *CI);
4322     return CI->getArgOperand(0);
4323   }
4324   return nullptr;
4325 }
4326 
4327 Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI,
4328                                                       IRBuilderBase &B) {
4329   const DataLayout &DL = CI->getDataLayout();
4330   if (isFortifiedCallFoldable(CI, 3, 2))
4331     if (Value *Call = emitMemPCpy(CI->getArgOperand(0), CI->getArgOperand(1),
4332                                   CI->getArgOperand(2), B, DL, TLI)) {
4333       return mergeAttributesAndFlags(cast<CallInst>(Call), *CI);
4334     }
4335   return nullptr;
4336 }
4337 
4338 Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
4339                                                       IRBuilderBase &B,
4340                                                       LibFunc Func) {
4341   const DataLayout &DL = CI->getDataLayout();
4342   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
4343         *ObjSize = CI->getArgOperand(2);
4344 
4345   // __stpcpy_chk(x,x,...)  -> x+strlen(x)
4346   if (Func == LibFunc_stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
4347     Value *StrLen = emitStrLen(Src, B, DL, TLI);
4348     return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
4349   }
4350 
4351   // If a) we don't have any length information, or b) we know this will
4352   // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
4353   // st[rp]cpy_chk call which may fail at runtime if the size is too long.
4354   // TODO: It might be nice to get a maximum length out of the possible
4355   // string lengths for varying.
4356   if (isFortifiedCallFoldable(CI, 2, std::nullopt, 1)) {
4357     if (Func == LibFunc_strcpy_chk)
4358       return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI));
4359     else
4360       return copyFlags(*CI, emitStpCpy(Dst, Src, B, TLI));
4361   }
4362 
4363   if (OnlyLowerUnknownSize)
4364     return nullptr;
4365 
4366   // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
4367   uint64_t Len = GetStringLength(Src);
4368   if (Len)
4369     annotateDereferenceableBytes(CI, 1, Len);
4370   else
4371     return nullptr;
4372 
4373   unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
4374   Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
4375   Value *LenV = ConstantInt::get(SizeTTy, Len);
4376   Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
4377   // If the function was an __stpcpy_chk, and we were able to fold it into
4378   // a __memcpy_chk, we still need to return the correct end pointer.
4379   if (Ret && Func == LibFunc_stpcpy_chk)
4380     return B.CreateInBoundsGEP(B.getInt8Ty(), Dst,
4381                                ConstantInt::get(SizeTTy, Len - 1));
4382   return copyFlags(*CI, cast<CallInst>(Ret));
4383 }
4384 
4385 Value *FortifiedLibCallSimplifier::optimizeStrLenChk(CallInst *CI,
4386                                                      IRBuilderBase &B) {
4387   if (isFortifiedCallFoldable(CI, 1, std::nullopt, 0))
4388     return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B,
4389                                      CI->getDataLayout(), TLI));
4390   return nullptr;
4391 }
4392 
4393 Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
4394                                                        IRBuilderBase &B,
4395                                                        LibFunc Func) {
4396   if (isFortifiedCallFoldable(CI, 3, 2)) {
4397     if (Func == LibFunc_strncpy_chk)
4398       return copyFlags(*CI,
4399                        emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
4400                                    CI->getArgOperand(2), B, TLI));
4401     else
4402       return copyFlags(*CI,
4403                        emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
4404                                    CI->getArgOperand(2), B, TLI));
4405   }
4406 
4407   return nullptr;
4408 }
4409 
4410 Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
4411                                                       IRBuilderBase &B) {
4412   if (isFortifiedCallFoldable(CI, 4, 3))
4413     return copyFlags(
4414         *CI, emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1),
4415                          CI->getArgOperand(2), CI->getArgOperand(3), B, TLI));
4416 
4417   return nullptr;
4418 }
4419 
4420 Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
4421                                                        IRBuilderBase &B) {
4422   if (isFortifiedCallFoldable(CI, 3, 1, std::nullopt, 2)) {
4423     SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 5));
4424     return copyFlags(*CI,
4425                      emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
4426                                   CI->getArgOperand(4), VariadicArgs, B, TLI));
4427   }
4428 
4429   return nullptr;
4430 }
4431 
4432 Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI,
4433                                                       IRBuilderBase &B) {
4434   if (isFortifiedCallFoldable(CI, 2, std::nullopt, std::nullopt, 1)) {
4435     SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 4));
4436     return copyFlags(*CI,
4437                      emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
4438                                  VariadicArgs, B, TLI));
4439   }
4440 
4441   return nullptr;
4442 }
4443 
4444 Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI,
4445                                                      IRBuilderBase &B) {
4446   if (isFortifiedCallFoldable(CI, 2))
4447     return copyFlags(
4448         *CI, emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI));
4449 
4450   return nullptr;
4451 }
4452 
4453 Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI,
4454                                                    IRBuilderBase &B) {
4455   if (isFortifiedCallFoldable(CI, 3))
4456     return copyFlags(*CI,
4457                      emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1),
4458                                  CI->getArgOperand(2), B, TLI));
4459 
4460   return nullptr;
4461 }
4462 
4463 Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI,
4464                                                       IRBuilderBase &B) {
4465   if (isFortifiedCallFoldable(CI, 3))
4466     return copyFlags(*CI,
4467                      emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1),
4468                                  CI->getArgOperand(2), B, TLI));
4469 
4470   return nullptr;
4471 }
4472 
4473 Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI,
4474                                                       IRBuilderBase &B) {
4475   if (isFortifiedCallFoldable(CI, 3))
4476     return copyFlags(*CI,
4477                      emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1),
4478                                  CI->getArgOperand(2), B, TLI));
4479 
4480   return nullptr;
4481 }
4482 
4483 Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI,
4484                                                         IRBuilderBase &B) {
4485   if (isFortifiedCallFoldable(CI, 3, 1, std::nullopt, 2))
4486     return copyFlags(
4487         *CI, emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
4488                            CI->getArgOperand(4), CI->getArgOperand(5), B, TLI));
4489 
4490   return nullptr;
4491 }
4492 
4493 Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI,
4494                                                        IRBuilderBase &B) {
4495   if (isFortifiedCallFoldable(CI, 2, std::nullopt, std::nullopt, 1))
4496     return copyFlags(*CI,
4497                      emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
4498                                   CI->getArgOperand(4), B, TLI));
4499 
4500   return nullptr;
4501 }
4502 
4503 Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI,
4504                                                 IRBuilderBase &Builder) {
4505   // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here.
4506   // Some clang users checked for _chk libcall availability using:
4507   //   __has_builtin(__builtin___memcpy_chk)
4508   // When compiling with -fno-builtin, this is always true.
4509   // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we
4510   // end up with fortified libcalls, which isn't acceptable in a freestanding
4511   // environment which only provides their non-fortified counterparts.
4512   //
4513   // Until we change clang and/or teach external users to check for availability
4514   // differently, disregard the "nobuiltin" attribute and TLI::has.
4515   //
4516   // PR23093.
4517 
4518   LibFunc Func;
4519   Function *Callee = CI->getCalledFunction();
4520   bool IsCallingConvC = TargetLibraryInfoImpl::isCallingConvCCompatible(CI);
4521 
4522   SmallVector<OperandBundleDef, 2> OpBundles;
4523   CI->getOperandBundlesAsDefs(OpBundles);
4524 
4525   IRBuilderBase::OperandBundlesGuard Guard(Builder);
4526   Builder.setDefaultOperandBundles(OpBundles);
4527 
4528   // First, check that this is a known library functions and that the prototype
4529   // is correct.
4530   if (!TLI->getLibFunc(*Callee, Func))
4531     return nullptr;
4532 
4533   // We never change the calling convention.
4534   if (!ignoreCallingConv(Func) && !IsCallingConvC)
4535     return nullptr;
4536 
4537   switch (Func) {
4538   case LibFunc_memcpy_chk:
4539     return optimizeMemCpyChk(CI, Builder);
4540   case LibFunc_mempcpy_chk:
4541     return optimizeMemPCpyChk(CI, Builder);
4542   case LibFunc_memmove_chk:
4543     return optimizeMemMoveChk(CI, Builder);
4544   case LibFunc_memset_chk:
4545     return optimizeMemSetChk(CI, Builder);
4546   case LibFunc_stpcpy_chk:
4547   case LibFunc_strcpy_chk:
4548     return optimizeStrpCpyChk(CI, Builder, Func);
4549   case LibFunc_strlen_chk:
4550     return optimizeStrLenChk(CI, Builder);
4551   case LibFunc_stpncpy_chk:
4552   case LibFunc_strncpy_chk:
4553     return optimizeStrpNCpyChk(CI, Builder, Func);
4554   case LibFunc_memccpy_chk:
4555     return optimizeMemCCpyChk(CI, Builder);
4556   case LibFunc_snprintf_chk:
4557     return optimizeSNPrintfChk(CI, Builder);
4558   case LibFunc_sprintf_chk:
4559     return optimizeSPrintfChk(CI, Builder);
4560   case LibFunc_strcat_chk:
4561     return optimizeStrCatChk(CI, Builder);
4562   case LibFunc_strlcat_chk:
4563     return optimizeStrLCat(CI, Builder);
4564   case LibFunc_strncat_chk:
4565     return optimizeStrNCatChk(CI, Builder);
4566   case LibFunc_strlcpy_chk:
4567     return optimizeStrLCpyChk(CI, Builder);
4568   case LibFunc_vsnprintf_chk:
4569     return optimizeVSNPrintfChk(CI, Builder);
4570   case LibFunc_vsprintf_chk:
4571     return optimizeVSPrintfChk(CI, Builder);
4572   default:
4573     break;
4574   }
4575   return nullptr;
4576 }
4577 
4578 FortifiedLibCallSimplifier::FortifiedLibCallSimplifier(
4579     const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize)
4580     : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}
4581