xref: /llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp (revision 5c48f6fa543e7d081354d3f6c1c2524eb8075223)
1 //===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the library calls simplifier. It does not implement
10 // any pass, but can't be used by other passes to do simplifications.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/Analysis/ConstantFolding.h"
19 #include "llvm/Analysis/Loads.h"
20 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "llvm/IR/AttributeMask.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/IR/PatternMatch.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/KnownBits.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/TargetParser/Triple.h"
35 #include "llvm/Transforms/Utils/BuildLibCalls.h"
36 #include "llvm/Transforms/Utils/Local.h"
37 #include "llvm/Transforms/Utils/SizeOpts.h"
38 
39 #include <cmath>
40 
41 using namespace llvm;
42 using namespace PatternMatch;
43 
44 static cl::opt<bool>
45     EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
46                          cl::init(false),
47                          cl::desc("Enable unsafe double to float "
48                                   "shrinking for math lib calls"));
49 
50 // Enable conversion of operator new calls with a MemProf hot or cold hint
51 // to an operator new call that takes a hot/cold hint. Off by default since
52 // not all allocators currently support this extension.
53 static cl::opt<bool>
54     OptimizeHotColdNew("optimize-hot-cold-new", cl::Hidden, cl::init(false),
55                        cl::desc("Enable hot/cold operator new library calls"));
56 static cl::opt<bool> OptimizeExistingHotColdNew(
57     "optimize-existing-hot-cold-new", cl::Hidden, cl::init(false),
58     cl::desc(
59         "Enable optimization of existing hot/cold operator new library calls"));
60 
61 namespace {
62 
63 // Specialized parser to ensure the hint is an 8 bit value (we can't specify
64 // uint8_t to opt<> as that is interpreted to mean that we are passing a char
65 // option with a specific set of values.
66 struct HotColdHintParser : public cl::parser<unsigned> {
67   HotColdHintParser(cl::Option &O) : cl::parser<unsigned>(O) {}
68 
69   bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, unsigned &Value) {
70     if (Arg.getAsInteger(0, Value))
71       return O.error("'" + Arg + "' value invalid for uint argument!");
72 
73     if (Value > 255)
74       return O.error("'" + Arg + "' value must be in the range [0, 255]!");
75 
76     return false;
77   }
78 };
79 
80 } // end anonymous namespace
81 
82 // Hot/cold operator new takes an 8 bit hotness hint, where 0 is the coldest
83 // and 255 is the hottest. Default to 1 value away from the coldest and hottest
84 // hints, so that the compiler hinted allocations are slightly less strong than
85 // manually inserted hints at the two extremes.
86 static cl::opt<unsigned, false, HotColdHintParser> ColdNewHintValue(
87     "cold-new-hint-value", cl::Hidden, cl::init(1),
88     cl::desc("Value to pass to hot/cold operator new for cold allocation"));
89 static cl::opt<unsigned, false, HotColdHintParser>
90     NotColdNewHintValue("notcold-new-hint-value", cl::Hidden, cl::init(128),
91                         cl::desc("Value to pass to hot/cold operator new for "
92                                  "notcold (warm) allocation"));
93 static cl::opt<unsigned, false, HotColdHintParser> HotNewHintValue(
94     "hot-new-hint-value", cl::Hidden, cl::init(254),
95     cl::desc("Value to pass to hot/cold operator new for hot allocation"));
96 
97 //===----------------------------------------------------------------------===//
98 // Helper Functions
99 //===----------------------------------------------------------------------===//
100 
101 static bool ignoreCallingConv(LibFunc Func) {
102   return Func == LibFunc_abs || Func == LibFunc_labs ||
103          Func == LibFunc_llabs || Func == LibFunc_strlen;
104 }
105 
106 /// Return true if it is only used in equality comparisons with With.
107 static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
108   for (User *U : V->users()) {
109     if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
110       if (IC->isEquality() && IC->getOperand(1) == With)
111         continue;
112     // Unknown instruction.
113     return false;
114   }
115   return true;
116 }
117 
118 static bool callHasFloatingPointArgument(const CallInst *CI) {
119   return any_of(CI->operands(), [](const Use &OI) {
120     return OI->getType()->isFloatingPointTy();
121   });
122 }
123 
124 static bool callHasFP128Argument(const CallInst *CI) {
125   return any_of(CI->operands(), [](const Use &OI) {
126     return OI->getType()->isFP128Ty();
127   });
128 }
129 
130 // Convert the entire string Str representing an integer in Base, up to
131 // the terminating nul if present, to a constant according to the rules
132 // of strtoul[l] or, when AsSigned is set, of strtol[l].  On success
133 // return the result, otherwise null.
134 // The function assumes the string is encoded in ASCII and carefully
135 // avoids converting sequences (including "") that the corresponding
136 // library call might fail and set errno for.
137 static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr,
138                               uint64_t Base, bool AsSigned, IRBuilderBase &B) {
139   if (Base < 2 || Base > 36)
140     if (Base != 0)
141       // Fail for an invalid base (required by POSIX).
142       return nullptr;
143 
144   // Current offset into the original string to reflect in EndPtr.
145   size_t Offset = 0;
146   // Strip leading whitespace.
147   for ( ; Offset != Str.size(); ++Offset)
148     if (!isSpace((unsigned char)Str[Offset])) {
149       Str = Str.substr(Offset);
150       break;
151     }
152 
153   if (Str.empty())
154     // Fail for empty subject sequences (POSIX allows but doesn't require
155     // strtol[l]/strtoul[l] to fail with EINVAL).
156     return nullptr;
157 
158   // Strip but remember the sign.
159   bool Negate = Str[0] == '-';
160   if (Str[0] == '-' || Str[0] == '+') {
161     Str = Str.drop_front();
162     if (Str.empty())
163       // Fail for a sign with nothing after it.
164       return nullptr;
165     ++Offset;
166   }
167 
168   // Set Max to the absolute value of the minimum (for signed), or
169   // to the maximum (for unsigned) value representable in the type.
170   Type *RetTy = CI->getType();
171   unsigned NBits = RetTy->getPrimitiveSizeInBits();
172   uint64_t Max = AsSigned && Negate ? 1 : 0;
173   Max += AsSigned ? maxIntN(NBits) : maxUIntN(NBits);
174 
175   // Autodetect Base if it's zero and consume the "0x" prefix.
176   if (Str.size() > 1) {
177     if (Str[0] == '0') {
178       if (toUpper((unsigned char)Str[1]) == 'X') {
179         if (Str.size() == 2 || (Base && Base != 16))
180           // Fail if Base doesn't allow the "0x" prefix or for the prefix
181           // alone that implementations like BSD set errno to EINVAL for.
182           return nullptr;
183 
184         Str = Str.drop_front(2);
185         Offset += 2;
186         Base = 16;
187       }
188       else if (Base == 0)
189         Base = 8;
190     } else if (Base == 0)
191       Base = 10;
192   }
193   else if (Base == 0)
194     Base = 10;
195 
196   // Convert the rest of the subject sequence, not including the sign,
197   // to its uint64_t representation (this assumes the source character
198   // set is ASCII).
199   uint64_t Result = 0;
200   for (unsigned i = 0; i != Str.size(); ++i) {
201     unsigned char DigVal = Str[i];
202     if (isDigit(DigVal))
203       DigVal = DigVal - '0';
204     else {
205       DigVal = toUpper(DigVal);
206       if (isAlpha(DigVal))
207         DigVal = DigVal - 'A' + 10;
208       else
209         return nullptr;
210     }
211 
212     if (DigVal >= Base)
213       // Fail if the digit is not valid in the Base.
214       return nullptr;
215 
216     // Add the digit and fail if the result is not representable in
217     // the (unsigned form of the) destination type.
218     bool VFlow;
219     Result = SaturatingMultiplyAdd(Result, Base, (uint64_t)DigVal, &VFlow);
220     if (VFlow || Result > Max)
221       return nullptr;
222   }
223 
224   if (EndPtr) {
225     // Store the pointer to the end.
226     Value *Off = B.getInt64(Offset + Str.size());
227     Value *StrBeg = CI->getArgOperand(0);
228     Value *StrEnd = B.CreateInBoundsGEP(B.getInt8Ty(), StrBeg, Off, "endptr");
229     B.CreateStore(StrEnd, EndPtr);
230   }
231 
232   if (Negate)
233     // Unsigned negation doesn't overflow.
234     Result = -Result;
235 
236   return ConstantInt::get(RetTy, Result);
237 }
238 
239 static bool isOnlyUsedInComparisonWithZero(Value *V) {
240   for (User *U : V->users()) {
241     if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
242       if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
243         if (C->isNullValue())
244           continue;
245     // Unknown instruction.
246     return false;
247   }
248   return true;
249 }
250 
251 static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
252                                  const DataLayout &DL) {
253   if (!isOnlyUsedInComparisonWithZero(CI))
254     return false;
255 
256   if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL))
257     return false;
258 
259   if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory))
260     return false;
261 
262   return true;
263 }
264 
265 static void annotateDereferenceableBytes(CallInst *CI,
266                                          ArrayRef<unsigned> ArgNos,
267                                          uint64_t DereferenceableBytes) {
268   const Function *F = CI->getCaller();
269   if (!F)
270     return;
271   for (unsigned ArgNo : ArgNos) {
272     uint64_t DerefBytes = DereferenceableBytes;
273     unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace();
274     if (!llvm::NullPointerIsDefined(F, AS) ||
275         CI->paramHasAttr(ArgNo, Attribute::NonNull))
276       DerefBytes = std::max(CI->getParamDereferenceableOrNullBytes(ArgNo),
277                             DereferenceableBytes);
278 
279     if (CI->getParamDereferenceableBytes(ArgNo) < DerefBytes) {
280       CI->removeParamAttr(ArgNo, Attribute::Dereferenceable);
281       if (!llvm::NullPointerIsDefined(F, AS) ||
282           CI->paramHasAttr(ArgNo, Attribute::NonNull))
283         CI->removeParamAttr(ArgNo, Attribute::DereferenceableOrNull);
284       CI->addParamAttr(ArgNo, Attribute::getWithDereferenceableBytes(
285                                   CI->getContext(), DerefBytes));
286     }
287   }
288 }
289 
290 static void annotateNonNullNoUndefBasedOnAccess(CallInst *CI,
291                                          ArrayRef<unsigned> ArgNos) {
292   Function *F = CI->getCaller();
293   if (!F)
294     return;
295 
296   for (unsigned ArgNo : ArgNos) {
297     if (!CI->paramHasAttr(ArgNo, Attribute::NoUndef))
298       CI->addParamAttr(ArgNo, Attribute::NoUndef);
299 
300     if (!CI->paramHasAttr(ArgNo, Attribute::NonNull)) {
301       unsigned AS =
302           CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace();
303       if (llvm::NullPointerIsDefined(F, AS))
304         continue;
305       CI->addParamAttr(ArgNo, Attribute::NonNull);
306     }
307 
308     annotateDereferenceableBytes(CI, ArgNo, 1);
309   }
310 }
311 
312 static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef<unsigned> ArgNos,
313                                Value *Size, const DataLayout &DL) {
314   if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size)) {
315     annotateNonNullNoUndefBasedOnAccess(CI, ArgNos);
316     annotateDereferenceableBytes(CI, ArgNos, LenC->getZExtValue());
317   } else if (isKnownNonZero(Size, DL)) {
318     annotateNonNullNoUndefBasedOnAccess(CI, ArgNos);
319     const APInt *X, *Y;
320     uint64_t DerefMin = 1;
321     if (match(Size, m_Select(m_Value(), m_APInt(X), m_APInt(Y)))) {
322       DerefMin = std::min(X->getZExtValue(), Y->getZExtValue());
323       annotateDereferenceableBytes(CI, ArgNos, DerefMin);
324     }
325   }
326 }
327 
328 // Copy CallInst "flags" like musttail, notail, and tail. Return New param for
329 // easier chaining. Calls to emit* and B.createCall should probably be wrapped
330 // in this function when New is created to replace Old. Callers should take
331 // care to check Old.isMustTailCall() if they aren't replacing Old directly
332 // with New.
333 static Value *copyFlags(const CallInst &Old, Value *New) {
334   assert(!Old.isMustTailCall() && "do not copy musttail call flags");
335   assert(!Old.isNoTailCall() && "do not copy notail call flags");
336   if (auto *NewCI = dyn_cast_or_null<CallInst>(New))
337     NewCI->setTailCallKind(Old.getTailCallKind());
338   return New;
339 }
340 
341 static Value *mergeAttributesAndFlags(CallInst *NewCI, const CallInst &Old) {
342   NewCI->setAttributes(AttributeList::get(
343       NewCI->getContext(), {NewCI->getAttributes(), Old.getAttributes()}));
344   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
345   return copyFlags(Old, NewCI);
346 }
347 
348 // Helper to avoid truncating the length if size_t is 32-bits.
349 static StringRef substr(StringRef Str, uint64_t Len) {
350   return Len >= Str.size() ? Str : Str.substr(0, Len);
351 }
352 
353 //===----------------------------------------------------------------------===//
354 // String and Memory Library Call Optimizations
355 //===----------------------------------------------------------------------===//
356 
357 Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilderBase &B) {
358   // Extract some information from the instruction
359   Value *Dst = CI->getArgOperand(0);
360   Value *Src = CI->getArgOperand(1);
361   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
362 
363   // See if we can get the length of the input string.
364   uint64_t Len = GetStringLength(Src);
365   if (Len)
366     annotateDereferenceableBytes(CI, 1, Len);
367   else
368     return nullptr;
369   --Len; // Unbias length.
370 
371   // Handle the simple, do-nothing case: strcat(x, "") -> x
372   if (Len == 0)
373     return Dst;
374 
375   return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, Len, B));
376 }
377 
378 Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
379                                            IRBuilderBase &B) {
380   // We need to find the end of the destination string.  That's where the
381   // memory is to be moved to. We just generate a call to strlen.
382   Value *DstLen = emitStrLen(Dst, B, DL, TLI);
383   if (!DstLen)
384     return nullptr;
385 
386   // Now that we have the destination's length, we must index into the
387   // destination's pointer to get the actual memcpy destination (end of
388   // the string .. we're concatenating).
389   Value *CpyDst = B.CreateInBoundsGEP(B.getInt8Ty(), Dst, DstLen, "endptr");
390 
391   // We have enough information to now generate the memcpy call to do the
392   // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
393   B.CreateMemCpy(
394       CpyDst, Align(1), Src, Align(1),
395       ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1));
396   return Dst;
397 }
398 
399 Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilderBase &B) {
400   // Extract some information from the instruction.
401   Value *Dst = CI->getArgOperand(0);
402   Value *Src = CI->getArgOperand(1);
403   Value *Size = CI->getArgOperand(2);
404   uint64_t Len;
405   annotateNonNullNoUndefBasedOnAccess(CI, 0);
406   if (isKnownNonZero(Size, DL))
407     annotateNonNullNoUndefBasedOnAccess(CI, 1);
408 
409   // We don't do anything if length is not constant.
410   ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size);
411   if (LengthArg) {
412     Len = LengthArg->getZExtValue();
413     // strncat(x, c, 0) -> x
414     if (!Len)
415       return Dst;
416   } else {
417     return nullptr;
418   }
419 
420   // See if we can get the length of the input string.
421   uint64_t SrcLen = GetStringLength(Src);
422   if (SrcLen) {
423     annotateDereferenceableBytes(CI, 1, SrcLen);
424     --SrcLen; // Unbias length.
425   } else {
426     return nullptr;
427   }
428 
429   // strncat(x, "", c) -> x
430   if (SrcLen == 0)
431     return Dst;
432 
433   // We don't optimize this case.
434   if (Len < SrcLen)
435     return nullptr;
436 
437   // strncat(x, s, c) -> strcat(x, s)
438   // s is constant so the strcat can be optimized further.
439   return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, SrcLen, B));
440 }
441 
442 // Helper to transform memchr(S, C, N) == S to N && *S == C and, when
443 // NBytes is null, strchr(S, C) to *S == C.  A precondition of the function
444 // is that either S is dereferenceable or the value of N is nonzero.
445 static Value* memChrToCharCompare(CallInst *CI, Value *NBytes,
446                                   IRBuilderBase &B, const DataLayout &DL)
447 {
448   Value *Src = CI->getArgOperand(0);
449   Value *CharVal = CI->getArgOperand(1);
450 
451   // Fold memchr(A, C, N) == A to N && *A == C.
452   Type *CharTy = B.getInt8Ty();
453   Value *Char0 = B.CreateLoad(CharTy, Src);
454   CharVal = B.CreateTrunc(CharVal, CharTy);
455   Value *Cmp = B.CreateICmpEQ(Char0, CharVal, "char0cmp");
456 
457   if (NBytes) {
458     Value *Zero = ConstantInt::get(NBytes->getType(), 0);
459     Value *And = B.CreateICmpNE(NBytes, Zero);
460     Cmp = B.CreateLogicalAnd(And, Cmp);
461   }
462 
463   Value *NullPtr = Constant::getNullValue(CI->getType());
464   return B.CreateSelect(Cmp, Src, NullPtr);
465 }
466 
467 Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
468   Value *SrcStr = CI->getArgOperand(0);
469   Value *CharVal = CI->getArgOperand(1);
470   annotateNonNullNoUndefBasedOnAccess(CI, 0);
471 
472   if (isOnlyUsedInEqualityComparison(CI, SrcStr))
473     return memChrToCharCompare(CI, nullptr, B, DL);
474 
475   // If the second operand is non-constant, see if we can compute the length
476   // of the input string and turn this into memchr.
477   ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
478   if (!CharC) {
479     uint64_t Len = GetStringLength(SrcStr);
480     if (Len)
481       annotateDereferenceableBytes(CI, 0, Len);
482     else
483       return nullptr;
484 
485     Function *Callee = CI->getCalledFunction();
486     FunctionType *FT = Callee->getFunctionType();
487     unsigned IntBits = TLI->getIntSize();
488     if (!FT->getParamType(1)->isIntegerTy(IntBits)) // memchr needs 'int'.
489       return nullptr;
490 
491     unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
492     Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
493     return copyFlags(*CI,
494                      emitMemChr(SrcStr, CharVal, // include nul.
495                                 ConstantInt::get(SizeTTy, Len), B,
496                                 DL, TLI));
497   }
498 
499   if (CharC->isZero()) {
500     Value *NullPtr = Constant::getNullValue(CI->getType());
501     if (isOnlyUsedInEqualityComparison(CI, NullPtr))
502       // Pre-empt the transformation to strlen below and fold
503       // strchr(A, '\0') == null to false.
504       return B.CreateIntToPtr(B.getTrue(), CI->getType());
505   }
506 
507   // Otherwise, the character is a constant, see if the first argument is
508   // a string literal.  If so, we can constant fold.
509   StringRef Str;
510   if (!getConstantStringInfo(SrcStr, Str)) {
511     if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
512       if (Value *StrLen = emitStrLen(SrcStr, B, DL, TLI))
513         return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, StrLen, "strchr");
514     return nullptr;
515   }
516 
517   // Compute the offset, make sure to handle the case when we're searching for
518   // zero (a weird way to spell strlen).
519   size_t I = (0xFF & CharC->getSExtValue()) == 0
520                  ? Str.size()
521                  : Str.find(CharC->getSExtValue());
522   if (I == StringRef::npos) // Didn't find the char.  strchr returns null.
523     return Constant::getNullValue(CI->getType());
524 
525   // strchr(s+n,c)  -> gep(s+n+i,c)
526   return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr");
527 }
528 
529 Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilderBase &B) {
530   Value *SrcStr = CI->getArgOperand(0);
531   Value *CharVal = CI->getArgOperand(1);
532   ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
533   annotateNonNullNoUndefBasedOnAccess(CI, 0);
534 
535   StringRef Str;
536   if (!getConstantStringInfo(SrcStr, Str)) {
537     // strrchr(s, 0) -> strchr(s, 0)
538     if (CharC && CharC->isZero())
539       return copyFlags(*CI, emitStrChr(SrcStr, '\0', B, TLI));
540     return nullptr;
541   }
542 
543   unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
544   Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
545 
546   // Try to expand strrchr to the memrchr nonstandard extension if it's
547   // available, or simply fail otherwise.
548   uint64_t NBytes = Str.size() + 1;   // Include the terminating nul.
549   Value *Size = ConstantInt::get(SizeTTy, NBytes);
550   return copyFlags(*CI, emitMemRChr(SrcStr, CharVal, Size, B, DL, TLI));
551 }
552 
553 Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) {
554   Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
555   if (Str1P == Str2P) // strcmp(x,x)  -> 0
556     return ConstantInt::get(CI->getType(), 0);
557 
558   StringRef Str1, Str2;
559   bool HasStr1 = getConstantStringInfo(Str1P, Str1);
560   bool HasStr2 = getConstantStringInfo(Str2P, Str2);
561 
562   // strcmp(x, y)  -> cnst  (if both x and y are constant strings)
563   if (HasStr1 && HasStr2)
564     return ConstantInt::get(CI->getType(),
565                             std::clamp(Str1.compare(Str2), -1, 1));
566 
567   if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
568     return B.CreateNeg(B.CreateZExt(
569         B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
570 
571   if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
572     return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
573                         CI->getType());
574 
575   // strcmp(P, "x") -> memcmp(P, "x", 2)
576   uint64_t Len1 = GetStringLength(Str1P);
577   if (Len1)
578     annotateDereferenceableBytes(CI, 0, Len1);
579   uint64_t Len2 = GetStringLength(Str2P);
580   if (Len2)
581     annotateDereferenceableBytes(CI, 1, Len2);
582 
583   if (Len1 && Len2) {
584     return copyFlags(
585         *CI, emitMemCmp(Str1P, Str2P,
586                         ConstantInt::get(DL.getIntPtrType(CI->getContext()),
587                                          std::min(Len1, Len2)),
588                         B, DL, TLI));
589   }
590 
591   // strcmp to memcmp
592   if (!HasStr1 && HasStr2) {
593     if (canTransformToMemCmp(CI, Str1P, Len2, DL))
594       return copyFlags(
595           *CI,
596           emitMemCmp(Str1P, Str2P,
597                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2),
598                      B, DL, TLI));
599   } else if (HasStr1 && !HasStr2) {
600     if (canTransformToMemCmp(CI, Str2P, Len1, DL))
601       return copyFlags(
602           *CI,
603           emitMemCmp(Str1P, Str2P,
604                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1),
605                      B, DL, TLI));
606   }
607 
608   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
609   return nullptr;
610 }
611 
612 // Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant
613 // arrays LHS and RHS and nonconstant Size.
614 static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS,
615                                     Value *Size, bool StrNCmp,
616                                     IRBuilderBase &B, const DataLayout &DL);
617 
618 Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) {
619   Value *Str1P = CI->getArgOperand(0);
620   Value *Str2P = CI->getArgOperand(1);
621   Value *Size = CI->getArgOperand(2);
622   if (Str1P == Str2P) // strncmp(x,x,n)  -> 0
623     return ConstantInt::get(CI->getType(), 0);
624 
625   if (isKnownNonZero(Size, DL))
626     annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
627   // Get the length argument if it is constant.
628   uint64_t Length;
629   if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size))
630     Length = LengthArg->getZExtValue();
631   else
632     return optimizeMemCmpVarSize(CI, Str1P, Str2P, Size, true, B, DL);
633 
634   if (Length == 0) // strncmp(x,y,0)   -> 0
635     return ConstantInt::get(CI->getType(), 0);
636 
637   if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
638     return copyFlags(*CI, emitMemCmp(Str1P, Str2P, Size, B, DL, TLI));
639 
640   StringRef Str1, Str2;
641   bool HasStr1 = getConstantStringInfo(Str1P, Str1);
642   bool HasStr2 = getConstantStringInfo(Str2P, Str2);
643 
644   // strncmp(x, y)  -> cnst  (if both x and y are constant strings)
645   if (HasStr1 && HasStr2) {
646     // Avoid truncating the 64-bit Length to 32 bits in ILP32.
647     StringRef SubStr1 = substr(Str1, Length);
648     StringRef SubStr2 = substr(Str2, Length);
649     return ConstantInt::get(CI->getType(),
650                             std::clamp(SubStr1.compare(SubStr2), -1, 1));
651   }
652 
653   if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
654     return B.CreateNeg(B.CreateZExt(
655         B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
656 
657   if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
658     return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
659                         CI->getType());
660 
661   uint64_t Len1 = GetStringLength(Str1P);
662   if (Len1)
663     annotateDereferenceableBytes(CI, 0, Len1);
664   uint64_t Len2 = GetStringLength(Str2P);
665   if (Len2)
666     annotateDereferenceableBytes(CI, 1, Len2);
667 
668   // strncmp to memcmp
669   if (!HasStr1 && HasStr2) {
670     Len2 = std::min(Len2, Length);
671     if (canTransformToMemCmp(CI, Str1P, Len2, DL))
672       return copyFlags(
673           *CI,
674           emitMemCmp(Str1P, Str2P,
675                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2),
676                      B, DL, TLI));
677   } else if (HasStr1 && !HasStr2) {
678     Len1 = std::min(Len1, Length);
679     if (canTransformToMemCmp(CI, Str2P, Len1, DL))
680       return copyFlags(
681           *CI,
682           emitMemCmp(Str1P, Str2P,
683                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1),
684                      B, DL, TLI));
685   }
686 
687   return nullptr;
688 }
689 
690 Value *LibCallSimplifier::optimizeStrNDup(CallInst *CI, IRBuilderBase &B) {
691   Value *Src = CI->getArgOperand(0);
692   ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
693   uint64_t SrcLen = GetStringLength(Src);
694   if (SrcLen && Size) {
695     annotateDereferenceableBytes(CI, 0, SrcLen);
696     if (SrcLen <= Size->getZExtValue() + 1)
697       return copyFlags(*CI, emitStrDup(Src, B, TLI));
698   }
699 
700   return nullptr;
701 }
702 
703 Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) {
704   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
705   if (Dst == Src) // strcpy(x,x)  -> x
706     return Src;
707 
708   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
709   // See if we can get the length of the input string.
710   uint64_t Len = GetStringLength(Src);
711   if (Len)
712     annotateDereferenceableBytes(CI, 1, Len);
713   else
714     return nullptr;
715 
716   // We have enough information to now generate the memcpy call to do the
717   // copy for us.  Make a memcpy to copy the nul byte with align = 1.
718   CallInst *NewCI =
719       B.CreateMemCpy(Dst, Align(1), Src, Align(1),
720                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
721   mergeAttributesAndFlags(NewCI, *CI);
722   return Dst;
723 }
724 
725 Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
726   Function *Callee = CI->getCalledFunction();
727   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
728 
729   // stpcpy(d,s) -> strcpy(d,s) if the result is not used.
730   if (CI->use_empty())
731     return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI));
732 
733   if (Dst == Src) { // stpcpy(x,x)  -> x+strlen(x)
734     Value *StrLen = emitStrLen(Src, B, DL, TLI);
735     return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
736   }
737 
738   // See if we can get the length of the input string.
739   uint64_t Len = GetStringLength(Src);
740   if (Len)
741     annotateDereferenceableBytes(CI, 1, Len);
742   else
743     return nullptr;
744 
745   Type *PT = Callee->getFunctionType()->getParamType(0);
746   Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
747   Value *DstEnd = B.CreateInBoundsGEP(
748       B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
749 
750   // We have enough information to now generate the memcpy call to do the
751   // copy for us.  Make a memcpy to copy the nul byte with align = 1.
752   CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), LenV);
753   mergeAttributesAndFlags(NewCI, *CI);
754   return DstEnd;
755 }
756 
757 // Optimize a call to size_t strlcpy(char*, const char*, size_t).
758 
759 Value *LibCallSimplifier::optimizeStrLCpy(CallInst *CI, IRBuilderBase &B) {
760   Value *Size = CI->getArgOperand(2);
761   if (isKnownNonZero(Size, DL))
762     // Like snprintf, the function stores into the destination only when
763     // the size argument is nonzero.
764     annotateNonNullNoUndefBasedOnAccess(CI, 0);
765   // The function reads the source argument regardless of Size (it returns
766   // its length).
767   annotateNonNullNoUndefBasedOnAccess(CI, 1);
768 
769   uint64_t NBytes;
770   if (ConstantInt *SizeC = dyn_cast<ConstantInt>(Size))
771     NBytes = SizeC->getZExtValue();
772   else
773     return nullptr;
774 
775   Value *Dst = CI->getArgOperand(0);
776   Value *Src = CI->getArgOperand(1);
777   if (NBytes <= 1) {
778     if (NBytes == 1)
779       // For a call to strlcpy(D, S, 1) first store a nul in *D.
780       B.CreateStore(B.getInt8(0), Dst);
781 
782     // Transform strlcpy(D, S, 0) to a call to strlen(S).
783     return copyFlags(*CI, emitStrLen(Src, B, DL, TLI));
784   }
785 
786   // Try to determine the length of the source, substituting its size
787   // when it's not nul-terminated (as it's required to be) to avoid
788   // reading past its end.
789   StringRef Str;
790   if (!getConstantStringInfo(Src, Str, /*TrimAtNul=*/false))
791     return nullptr;
792 
793   uint64_t SrcLen = Str.find('\0');
794   // Set if the terminating nul should be copied by the call to memcpy
795   // below.
796   bool NulTerm = SrcLen < NBytes;
797 
798   if (NulTerm)
799     // Overwrite NBytes with the number of bytes to copy, including
800     // the terminating nul.
801     NBytes = SrcLen + 1;
802   else {
803     // Set the length of the source for the function to return to its
804     // size, and cap NBytes at the same.
805     SrcLen = std::min(SrcLen, uint64_t(Str.size()));
806     NBytes = std::min(NBytes - 1, SrcLen);
807   }
808 
809   if (SrcLen == 0) {
810     // Transform strlcpy(D, "", N) to (*D = '\0, 0).
811     B.CreateStore(B.getInt8(0), Dst);
812     return ConstantInt::get(CI->getType(), 0);
813   }
814 
815   Function *Callee = CI->getCalledFunction();
816   Type *PT = Callee->getFunctionType()->getParamType(0);
817   // Transform strlcpy(D, S, N) to memcpy(D, S, N') where N' is the lower
818   // bound on strlen(S) + 1 and N, optionally followed by a nul store to
819   // D[N' - 1] if necessary.
820   CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
821                         ConstantInt::get(DL.getIntPtrType(PT), NBytes));
822   mergeAttributesAndFlags(NewCI, *CI);
823 
824   if (!NulTerm) {
825     Value *EndOff = ConstantInt::get(CI->getType(), NBytes);
826     Value *EndPtr = B.CreateInBoundsGEP(B.getInt8Ty(), Dst, EndOff);
827     B.CreateStore(B.getInt8(0), EndPtr);
828   }
829 
830   // Like snprintf, strlcpy returns the number of nonzero bytes that would
831   // have been copied if the bound had been sufficiently big (which in this
832   // case is strlen(Src)).
833   return ConstantInt::get(CI->getType(), SrcLen);
834 }
835 
836 // Optimize a call CI to either stpncpy when RetEnd is true, or to strncpy
837 // otherwise.
838 Value *LibCallSimplifier::optimizeStringNCpy(CallInst *CI, bool RetEnd,
839                                              IRBuilderBase &B) {
840   Function *Callee = CI->getCalledFunction();
841   Value *Dst = CI->getArgOperand(0);
842   Value *Src = CI->getArgOperand(1);
843   Value *Size = CI->getArgOperand(2);
844 
845   if (isKnownNonZero(Size, DL)) {
846     // Both st{p,r}ncpy(D, S, N) access the source and destination arrays
847     // only when N is nonzero.
848     annotateNonNullNoUndefBasedOnAccess(CI, 0);
849     annotateNonNullNoUndefBasedOnAccess(CI, 1);
850   }
851 
852   // If the "bound" argument is known set N to it.  Otherwise set it to
853   // UINT64_MAX and handle it later.
854   uint64_t N = UINT64_MAX;
855   if (ConstantInt *SizeC = dyn_cast<ConstantInt>(Size))
856     N = SizeC->getZExtValue();
857 
858   if (N == 0)
859     // Fold st{p,r}ncpy(D, S, 0) to D.
860     return Dst;
861 
862   if (N == 1) {
863     Type *CharTy = B.getInt8Ty();
864     Value *CharVal = B.CreateLoad(CharTy, Src, "stxncpy.char0");
865     B.CreateStore(CharVal, Dst);
866     if (!RetEnd)
867       // Transform strncpy(D, S, 1) to return (*D = *S), D.
868       return Dst;
869 
870     // Transform stpncpy(D, S, 1) to return (*D = *S) ? D + 1 : D.
871     Value *ZeroChar = ConstantInt::get(CharTy, 0);
872     Value *Cmp = B.CreateICmpEQ(CharVal, ZeroChar, "stpncpy.char0cmp");
873 
874     Value *Off1 = B.getInt32(1);
875     Value *EndPtr = B.CreateInBoundsGEP(CharTy, Dst, Off1, "stpncpy.end");
876     return B.CreateSelect(Cmp, Dst, EndPtr, "stpncpy.sel");
877   }
878 
879   // If the length of the input string is known set SrcLen to it.
880   uint64_t SrcLen = GetStringLength(Src);
881   if (SrcLen)
882     annotateDereferenceableBytes(CI, 1, SrcLen);
883   else
884     return nullptr;
885 
886   --SrcLen; // Unbias length.
887 
888   if (SrcLen == 0) {
889     // Transform st{p,r}ncpy(D, "", N) to memset(D, '\0', N) for any N.
890     Align MemSetAlign =
891       CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne();
892     CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, MemSetAlign);
893     AttrBuilder ArgAttrs(CI->getContext(), CI->getAttributes().getParamAttrs(0));
894     NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
895         CI->getContext(), 0, ArgAttrs));
896     copyFlags(*CI, NewCI);
897     return Dst;
898   }
899 
900   if (N > SrcLen + 1) {
901     if (N > 128)
902       // Bail if N is large or unknown.
903       return nullptr;
904 
905     // st{p,r}ncpy(D, "a", N) -> memcpy(D, "a\0\0\0", N) for N <= 128.
906     StringRef Str;
907     if (!getConstantStringInfo(Src, Str))
908       return nullptr;
909     std::string SrcStr = Str.str();
910     // Create a bigger, nul-padded array with the same length, SrcLen,
911     // as the original string.
912     SrcStr.resize(N, '\0');
913     Src = B.CreateGlobalString(SrcStr, "str", /*AddressSpace=*/0,
914                                /*M=*/nullptr, /*AddNull=*/false);
915   }
916 
917   Type *PT = Callee->getFunctionType()->getParamType(0);
918   // st{p,r}ncpy(D, S, N) -> memcpy(align 1 D, align 1 S, N) when both
919   // S and N are constant.
920   CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
921                                    ConstantInt::get(DL.getIntPtrType(PT), N));
922   mergeAttributesAndFlags(NewCI, *CI);
923   if (!RetEnd)
924     return Dst;
925 
926   // stpncpy(D, S, N) returns the address of the first null in D if it writes
927   // one, otherwise D + N.
928   Value *Off = B.getInt64(std::min(SrcLen, N));
929   return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, Off, "endptr");
930 }
931 
932 Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
933                                                unsigned CharSize,
934                                                Value *Bound) {
935   Value *Src = CI->getArgOperand(0);
936   Type *CharTy = B.getIntNTy(CharSize);
937 
938   if (isOnlyUsedInZeroEqualityComparison(CI) &&
939       (!Bound || isKnownNonZero(Bound, DL))) {
940     // Fold strlen:
941     //   strlen(x) != 0 --> *x != 0
942     //   strlen(x) == 0 --> *x == 0
943     // and likewise strnlen with constant N > 0:
944     //   strnlen(x, N) != 0 --> *x != 0
945     //   strnlen(x, N) == 0 --> *x == 0
946     return B.CreateZExt(B.CreateLoad(CharTy, Src, "char0"),
947                         CI->getType());
948   }
949 
950   if (Bound) {
951     if (ConstantInt *BoundCst = dyn_cast<ConstantInt>(Bound)) {
952       if (BoundCst->isZero())
953         // Fold strnlen(s, 0) -> 0 for any s, constant or otherwise.
954         return ConstantInt::get(CI->getType(), 0);
955 
956       if (BoundCst->isOne()) {
957         // Fold strnlen(s, 1) -> *s ? 1 : 0 for any s.
958         Value *CharVal = B.CreateLoad(CharTy, Src, "strnlen.char0");
959         Value *ZeroChar = ConstantInt::get(CharTy, 0);
960         Value *Cmp = B.CreateICmpNE(CharVal, ZeroChar, "strnlen.char0cmp");
961         return B.CreateZExt(Cmp, CI->getType());
962       }
963     }
964   }
965 
966   if (uint64_t Len = GetStringLength(Src, CharSize)) {
967     Value *LenC = ConstantInt::get(CI->getType(), Len - 1);
968     // Fold strlen("xyz") -> 3 and strnlen("xyz", 2) -> 2
969     // and strnlen("xyz", Bound) -> min(3, Bound) for nonconstant Bound.
970     if (Bound)
971       return B.CreateBinaryIntrinsic(Intrinsic::umin, LenC, Bound);
972     return LenC;
973   }
974 
975   if (Bound)
976     // Punt for strnlen for now.
977     return nullptr;
978 
979   // If s is a constant pointer pointing to a string literal, we can fold
980   // strlen(s + x) to strlen(s) - x, when x is known to be in the range
981   // [0, strlen(s)] or the string has a single null terminator '\0' at the end.
982   // We only try to simplify strlen when the pointer s points to an array
983   // of CharSize elements. Otherwise, we would need to scale the offset x before
984   // doing the subtraction. This will make the optimization more complex, and
985   // it's not very useful because calling strlen for a pointer of other types is
986   // very uncommon.
987   if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
988     // TODO: Handle subobjects.
989     if (!isGEPBasedOnPointerToString(GEP, CharSize))
990       return nullptr;
991 
992     ConstantDataArraySlice Slice;
993     if (getConstantDataArrayInfo(GEP->getOperand(0), Slice, CharSize)) {
994       uint64_t NullTermIdx;
995       if (Slice.Array == nullptr) {
996         NullTermIdx = 0;
997       } else {
998         NullTermIdx = ~((uint64_t)0);
999         for (uint64_t I = 0, E = Slice.Length; I < E; ++I) {
1000           if (Slice.Array->getElementAsInteger(I + Slice.Offset) == 0) {
1001             NullTermIdx = I;
1002             break;
1003           }
1004         }
1005         // If the string does not have '\0', leave it to strlen to compute
1006         // its length.
1007         if (NullTermIdx == ~((uint64_t)0))
1008           return nullptr;
1009       }
1010 
1011       Value *Offset = GEP->getOperand(2);
1012       KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr);
1013       uint64_t ArrSize =
1014              cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
1015 
1016       // If Offset is not provably in the range [0, NullTermIdx], we can still
1017       // optimize if we can prove that the program has undefined behavior when
1018       // Offset is outside that range. That is the case when GEP->getOperand(0)
1019       // is a pointer to an object whose memory extent is NullTermIdx+1.
1020       if ((Known.isNonNegative() && Known.getMaxValue().ule(NullTermIdx)) ||
1021           (isa<GlobalVariable>(GEP->getOperand(0)) &&
1022            NullTermIdx == ArrSize - 1)) {
1023         Offset = B.CreateSExtOrTrunc(Offset, CI->getType());
1024         return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
1025                            Offset);
1026       }
1027     }
1028   }
1029 
1030   // strlen(x?"foo":"bars") --> x ? 3 : 4
1031   if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
1032     uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize);
1033     uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize);
1034     if (LenTrue && LenFalse) {
1035       ORE.emit([&]() {
1036         return OptimizationRemark("instcombine", "simplify-libcalls", CI)
1037                << "folded strlen(select) to select of constants";
1038       });
1039       return B.CreateSelect(SI->getCondition(),
1040                             ConstantInt::get(CI->getType(), LenTrue - 1),
1041                             ConstantInt::get(CI->getType(), LenFalse - 1));
1042     }
1043   }
1044 
1045   return nullptr;
1046 }
1047 
1048 Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilderBase &B) {
1049   if (Value *V = optimizeStringLength(CI, B, 8))
1050     return V;
1051   annotateNonNullNoUndefBasedOnAccess(CI, 0);
1052   return nullptr;
1053 }
1054 
1055 Value *LibCallSimplifier::optimizeStrNLen(CallInst *CI, IRBuilderBase &B) {
1056   Value *Bound = CI->getArgOperand(1);
1057   if (Value *V = optimizeStringLength(CI, B, 8, Bound))
1058     return V;
1059 
1060   if (isKnownNonZero(Bound, DL))
1061     annotateNonNullNoUndefBasedOnAccess(CI, 0);
1062   return nullptr;
1063 }
1064 
1065 Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilderBase &B) {
1066   Module &M = *CI->getModule();
1067   unsigned WCharSize = TLI->getWCharSize(M) * 8;
1068   // We cannot perform this optimization without wchar_size metadata.
1069   if (WCharSize == 0)
1070     return nullptr;
1071 
1072   return optimizeStringLength(CI, B, WCharSize);
1073 }
1074 
1075 Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilderBase &B) {
1076   StringRef S1, S2;
1077   bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
1078   bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
1079 
1080   // strpbrk(s, "") -> nullptr
1081   // strpbrk("", s) -> nullptr
1082   if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
1083     return Constant::getNullValue(CI->getType());
1084 
1085   // Constant folding.
1086   if (HasS1 && HasS2) {
1087     size_t I = S1.find_first_of(S2);
1088     if (I == StringRef::npos) // No match.
1089       return Constant::getNullValue(CI->getType());
1090 
1091     return B.CreateInBoundsGEP(B.getInt8Ty(), CI->getArgOperand(0),
1092                                B.getInt64(I), "strpbrk");
1093   }
1094 
1095   // strpbrk(s, "a") -> strchr(s, 'a')
1096   if (HasS2 && S2.size() == 1)
1097     return copyFlags(*CI, emitStrChr(CI->getArgOperand(0), S2[0], B, TLI));
1098 
1099   return nullptr;
1100 }
1101 
1102 Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilderBase &B) {
1103   Value *EndPtr = CI->getArgOperand(1);
1104   if (isa<ConstantPointerNull>(EndPtr)) {
1105     // With a null EndPtr, this function won't capture the main argument.
1106     // It would be readonly too, except that it still may write to errno.
1107     CI->addParamAttr(0, Attribute::NoCapture);
1108   }
1109 
1110   return nullptr;
1111 }
1112 
1113 Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilderBase &B) {
1114   StringRef S1, S2;
1115   bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
1116   bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
1117 
1118   // strspn(s, "") -> 0
1119   // strspn("", s) -> 0
1120   if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
1121     return Constant::getNullValue(CI->getType());
1122 
1123   // Constant folding.
1124   if (HasS1 && HasS2) {
1125     size_t Pos = S1.find_first_not_of(S2);
1126     if (Pos == StringRef::npos)
1127       Pos = S1.size();
1128     return ConstantInt::get(CI->getType(), Pos);
1129   }
1130 
1131   return nullptr;
1132 }
1133 
1134 Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilderBase &B) {
1135   StringRef S1, S2;
1136   bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
1137   bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
1138 
1139   // strcspn("", s) -> 0
1140   if (HasS1 && S1.empty())
1141     return Constant::getNullValue(CI->getType());
1142 
1143   // Constant folding.
1144   if (HasS1 && HasS2) {
1145     size_t Pos = S1.find_first_of(S2);
1146     if (Pos == StringRef::npos)
1147       Pos = S1.size();
1148     return ConstantInt::get(CI->getType(), Pos);
1149   }
1150 
1151   // strcspn(s, "") -> strlen(s)
1152   if (HasS2 && S2.empty())
1153     return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B, DL, TLI));
1154 
1155   return nullptr;
1156 }
1157 
1158 Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) {
1159   // fold strstr(x, x) -> x.
1160   if (CI->getArgOperand(0) == CI->getArgOperand(1))
1161     return CI->getArgOperand(0);
1162 
1163   // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
1164   if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
1165     Value *StrLen = emitStrLen(CI->getArgOperand(1), B, DL, TLI);
1166     if (!StrLen)
1167       return nullptr;
1168     Value *StrNCmp = emitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
1169                                  StrLen, B, DL, TLI);
1170     if (!StrNCmp)
1171       return nullptr;
1172     for (User *U : llvm::make_early_inc_range(CI->users())) {
1173       ICmpInst *Old = cast<ICmpInst>(U);
1174       Value *Cmp =
1175           B.CreateICmp(Old->getPredicate(), StrNCmp,
1176                        ConstantInt::getNullValue(StrNCmp->getType()), "cmp");
1177       replaceAllUsesWith(Old, Cmp);
1178     }
1179     return CI;
1180   }
1181 
1182   // See if either input string is a constant string.
1183   StringRef SearchStr, ToFindStr;
1184   bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr);
1185   bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr);
1186 
1187   // fold strstr(x, "") -> x.
1188   if (HasStr2 && ToFindStr.empty())
1189     return CI->getArgOperand(0);
1190 
1191   // If both strings are known, constant fold it.
1192   if (HasStr1 && HasStr2) {
1193     size_t Offset = SearchStr.find(ToFindStr);
1194 
1195     if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
1196       return Constant::getNullValue(CI->getType());
1197 
1198     // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
1199     return B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), CI->getArgOperand(0),
1200                                         Offset, "strstr");
1201   }
1202 
1203   // fold strstr(x, "y") -> strchr(x, 'y').
1204   if (HasStr2 && ToFindStr.size() == 1) {
1205     return emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
1206   }
1207 
1208   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
1209   return nullptr;
1210 }
1211 
1212 Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilderBase &B) {
1213   Value *SrcStr = CI->getArgOperand(0);
1214   Value *Size = CI->getArgOperand(2);
1215   annotateNonNullAndDereferenceable(CI, 0, Size, DL);
1216   Value *CharVal = CI->getArgOperand(1);
1217   ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
1218   Value *NullPtr = Constant::getNullValue(CI->getType());
1219 
1220   if (LenC) {
1221     if (LenC->isZero())
1222       // Fold memrchr(x, y, 0) --> null.
1223       return NullPtr;
1224 
1225     if (LenC->isOne()) {
1226       // Fold memrchr(x, y, 1) --> *x == y ? x : null for any x and y,
1227       // constant or otherwise.
1228       Value *Val = B.CreateLoad(B.getInt8Ty(), SrcStr, "memrchr.char0");
1229       // Slice off the character's high end bits.
1230       CharVal = B.CreateTrunc(CharVal, B.getInt8Ty());
1231       Value *Cmp = B.CreateICmpEQ(Val, CharVal, "memrchr.char0cmp");
1232       return B.CreateSelect(Cmp, SrcStr, NullPtr, "memrchr.sel");
1233     }
1234   }
1235 
1236   StringRef Str;
1237   if (!getConstantStringInfo(SrcStr, Str, /*TrimAtNul=*/false))
1238     return nullptr;
1239 
1240   if (Str.size() == 0)
1241     // If the array is empty fold memrchr(A, C, N) to null for any value
1242     // of C and N on the basis that the only valid value of N is zero
1243     // (otherwise the call is undefined).
1244     return NullPtr;
1245 
1246   uint64_t EndOff = UINT64_MAX;
1247   if (LenC) {
1248     EndOff = LenC->getZExtValue();
1249     if (Str.size() < EndOff)
1250       // Punt out-of-bounds accesses to sanitizers and/or libc.
1251       return nullptr;
1252   }
1253 
1254   if (ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal)) {
1255     // Fold memrchr(S, C, N) for a constant C.
1256     size_t Pos = Str.rfind(CharC->getZExtValue(), EndOff);
1257     if (Pos == StringRef::npos)
1258       // When the character is not in the source array fold the result
1259       // to null regardless of Size.
1260       return NullPtr;
1261 
1262     if (LenC)
1263       // Fold memrchr(s, c, N) --> s + Pos for constant N > Pos.
1264       return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(Pos));
1265 
1266     if (Str.find(Str[Pos]) == Pos) {
1267       // When there is just a single occurrence of C in S, i.e., the one
1268       // in Str[Pos], fold
1269       //   memrchr(s, c, N) --> N <= Pos ? null : s + Pos
1270       // for nonconstant N.
1271       Value *Cmp = B.CreateICmpULE(Size, ConstantInt::get(Size->getType(), Pos),
1272                                    "memrchr.cmp");
1273       Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr,
1274                                            B.getInt64(Pos), "memrchr.ptr_plus");
1275       return B.CreateSelect(Cmp, NullPtr, SrcPlus, "memrchr.sel");
1276     }
1277   }
1278 
1279   // Truncate the string to search at most EndOff characters.
1280   Str = Str.substr(0, EndOff);
1281   if (Str.find_first_not_of(Str[0]) != StringRef::npos)
1282     return nullptr;
1283 
1284   // If the source array consists of all equal characters, then for any
1285   // C and N (whether in bounds or not), fold memrchr(S, C, N) to
1286   //   N != 0 && *S == C ? S + N - 1 : null
1287   Type *SizeTy = Size->getType();
1288   Type *Int8Ty = B.getInt8Ty();
1289   Value *NNeZ = B.CreateICmpNE(Size, ConstantInt::get(SizeTy, 0));
1290   // Slice off the sought character's high end bits.
1291   CharVal = B.CreateTrunc(CharVal, Int8Ty);
1292   Value *CEqS0 = B.CreateICmpEQ(ConstantInt::get(Int8Ty, Str[0]), CharVal);
1293   Value *And = B.CreateLogicalAnd(NNeZ, CEqS0);
1294   Value *SizeM1 = B.CreateSub(Size, ConstantInt::get(SizeTy, 1));
1295   Value *SrcPlus =
1296       B.CreateInBoundsGEP(Int8Ty, SrcStr, SizeM1, "memrchr.ptr_plus");
1297   return B.CreateSelect(And, SrcPlus, NullPtr, "memrchr.sel");
1298 }
1299 
1300 Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
1301   Value *SrcStr = CI->getArgOperand(0);
1302   Value *Size = CI->getArgOperand(2);
1303 
1304   if (isKnownNonZero(Size, DL)) {
1305     annotateNonNullNoUndefBasedOnAccess(CI, 0);
1306     if (isOnlyUsedInEqualityComparison(CI, SrcStr))
1307       return memChrToCharCompare(CI, Size, B, DL);
1308   }
1309 
1310   Value *CharVal = CI->getArgOperand(1);
1311   ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
1312   ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
1313   Value *NullPtr = Constant::getNullValue(CI->getType());
1314 
1315   // memchr(x, y, 0) -> null
1316   if (LenC) {
1317     if (LenC->isZero())
1318       return NullPtr;
1319 
1320     if (LenC->isOne()) {
1321       // Fold memchr(x, y, 1) --> *x == y ? x : null for any x and y,
1322       // constant or otherwise.
1323       Value *Val = B.CreateLoad(B.getInt8Ty(), SrcStr, "memchr.char0");
1324       // Slice off the character's high end bits.
1325       CharVal = B.CreateTrunc(CharVal, B.getInt8Ty());
1326       Value *Cmp = B.CreateICmpEQ(Val, CharVal, "memchr.char0cmp");
1327       return B.CreateSelect(Cmp, SrcStr, NullPtr, "memchr.sel");
1328     }
1329   }
1330 
1331   StringRef Str;
1332   if (!getConstantStringInfo(SrcStr, Str, /*TrimAtNul=*/false))
1333     return nullptr;
1334 
1335   if (CharC) {
1336     size_t Pos = Str.find(CharC->getZExtValue());
1337     if (Pos == StringRef::npos)
1338       // When the character is not in the source array fold the result
1339       // to null regardless of Size.
1340       return NullPtr;
1341 
1342     // Fold memchr(s, c, n) -> n <= Pos ? null : s + Pos
1343     // When the constant Size is less than or equal to the character
1344     // position also fold the result to null.
1345     Value *Cmp = B.CreateICmpULE(Size, ConstantInt::get(Size->getType(), Pos),
1346                                  "memchr.cmp");
1347     Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(Pos),
1348                                          "memchr.ptr");
1349     return B.CreateSelect(Cmp, NullPtr, SrcPlus);
1350   }
1351 
1352   if (Str.size() == 0)
1353     // If the array is empty fold memchr(A, C, N) to null for any value
1354     // of C and N on the basis that the only valid value of N is zero
1355     // (otherwise the call is undefined).
1356     return NullPtr;
1357 
1358   if (LenC)
1359     Str = substr(Str, LenC->getZExtValue());
1360 
1361   size_t Pos = Str.find_first_not_of(Str[0]);
1362   if (Pos == StringRef::npos
1363       || Str.find_first_not_of(Str[Pos], Pos) == StringRef::npos) {
1364     // If the source array consists of at most two consecutive sequences
1365     // of the same characters, then for any C and N (whether in bounds or
1366     // not), fold memchr(S, C, N) to
1367     //   N != 0 && *S == C ? S : null
1368     // or for the two sequences to:
1369     //   N != 0 && *S == C ? S : (N > Pos && S[Pos] == C ? S + Pos : null)
1370     //   ^Sel2                   ^Sel1 are denoted above.
1371     // The latter makes it also possible to fold strchr() calls with strings
1372     // of the same characters.
1373     Type *SizeTy = Size->getType();
1374     Type *Int8Ty = B.getInt8Ty();
1375 
1376     // Slice off the sought character's high end bits.
1377     CharVal = B.CreateTrunc(CharVal, Int8Ty);
1378 
1379     Value *Sel1 = NullPtr;
1380     if (Pos != StringRef::npos) {
1381       // Handle two consecutive sequences of the same characters.
1382       Value *PosVal = ConstantInt::get(SizeTy, Pos);
1383       Value *StrPos = ConstantInt::get(Int8Ty, Str[Pos]);
1384       Value *CEqSPos = B.CreateICmpEQ(CharVal, StrPos);
1385       Value *NGtPos = B.CreateICmp(ICmpInst::ICMP_UGT, Size, PosVal);
1386       Value *And = B.CreateAnd(CEqSPos, NGtPos);
1387       Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, PosVal);
1388       Sel1 = B.CreateSelect(And, SrcPlus, NullPtr, "memchr.sel1");
1389     }
1390 
1391     Value *Str0 = ConstantInt::get(Int8Ty, Str[0]);
1392     Value *CEqS0 = B.CreateICmpEQ(Str0, CharVal);
1393     Value *NNeZ = B.CreateICmpNE(Size, ConstantInt::get(SizeTy, 0));
1394     Value *And = B.CreateAnd(NNeZ, CEqS0);
1395     return B.CreateSelect(And, SrcStr, Sel1, "memchr.sel2");
1396   }
1397 
1398   if (!LenC) {
1399     if (isOnlyUsedInEqualityComparison(CI, SrcStr))
1400       // S is dereferenceable so it's safe to load from it and fold
1401       //   memchr(S, C, N) == S to N && *S == C for any C and N.
1402       // TODO: This is safe even for nonconstant S.
1403       return memChrToCharCompare(CI, Size, B, DL);
1404 
1405     // From now on we need a constant length and constant array.
1406     return nullptr;
1407   }
1408 
1409   bool OptForSize = CI->getFunction()->hasOptSize() ||
1410                     llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
1411                                                 PGSOQueryType::IRPass);
1412 
1413   // If the char is variable but the input str and length are not we can turn
1414   // this memchr call into a simple bit field test. Of course this only works
1415   // when the return value is only checked against null.
1416   //
1417   // It would be really nice to reuse switch lowering here but we can't change
1418   // the CFG at this point.
1419   //
1420   // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n')))
1421   // != 0
1422   //   after bounds check.
1423   if (OptForSize || Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI))
1424     return nullptr;
1425 
1426   unsigned char Max =
1427       *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()),
1428                         reinterpret_cast<const unsigned char *>(Str.end()));
1429 
1430   // Make sure the bit field we're about to create fits in a register on the
1431   // target.
1432   // FIXME: On a 64 bit architecture this prevents us from using the
1433   // interesting range of alpha ascii chars. We could do better by emitting
1434   // two bitfields or shifting the range by 64 if no lower chars are used.
1435   if (!DL.fitsInLegalInteger(Max + 1)) {
1436     // Build chain of ORs
1437     // Transform:
1438     //    memchr("abcd", C, 4) != nullptr
1439     // to:
1440     //    (C == 'a' || C == 'b' || C == 'c' || C == 'd') != 0
1441     std::string SortedStr = Str.str();
1442     llvm::sort(SortedStr);
1443     // Compute the number of of non-contiguous ranges.
1444     unsigned NonContRanges = 1;
1445     for (size_t i = 1; i < SortedStr.size(); ++i) {
1446       if (SortedStr[i] > SortedStr[i - 1] + 1) {
1447         NonContRanges++;
1448       }
1449     }
1450 
1451     // Restrict this optimization to profitable cases with one or two range
1452     // checks.
1453     if (NonContRanges > 2)
1454       return nullptr;
1455 
1456     SmallVector<Value *> CharCompares;
1457     for (unsigned char C : SortedStr)
1458       CharCompares.push_back(
1459           B.CreateICmpEQ(CharVal, ConstantInt::get(CharVal->getType(), C)));
1460 
1461     return B.CreateIntToPtr(B.CreateOr(CharCompares), CI->getType());
1462   }
1463 
1464   // For the bit field use a power-of-2 type with at least 8 bits to avoid
1465   // creating unnecessary illegal types.
1466   unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max));
1467 
1468   // Now build the bit field.
1469   APInt Bitfield(Width, 0);
1470   for (char C : Str)
1471     Bitfield.setBit((unsigned char)C);
1472   Value *BitfieldC = B.getInt(Bitfield);
1473 
1474   // Adjust width of "C" to the bitfield width, then mask off the high bits.
1475   Value *C = B.CreateZExtOrTrunc(CharVal, BitfieldC->getType());
1476   C = B.CreateAnd(C, B.getIntN(Width, 0xFF));
1477 
1478   // First check that the bit field access is within bounds.
1479   Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width),
1480                                "memchr.bounds");
1481 
1482   // Create code that checks if the given bit is set in the field.
1483   Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C);
1484   Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits");
1485 
1486   // Finally merge both checks and cast to pointer type. The inttoptr
1487   // implicitly zexts the i1 to intptr type.
1488   return B.CreateIntToPtr(B.CreateLogicalAnd(Bounds, Bits, "memchr"),
1489                           CI->getType());
1490 }
1491 
1492 // Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant
1493 // arrays LHS and RHS and nonconstant Size.
1494 static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS,
1495                                     Value *Size, bool StrNCmp,
1496                                     IRBuilderBase &B, const DataLayout &DL) {
1497   if (LHS == RHS) // memcmp(s,s,x) -> 0
1498     return Constant::getNullValue(CI->getType());
1499 
1500   StringRef LStr, RStr;
1501   if (!getConstantStringInfo(LHS, LStr, /*TrimAtNul=*/false) ||
1502       !getConstantStringInfo(RHS, RStr, /*TrimAtNul=*/false))
1503     return nullptr;
1504 
1505   // If the contents of both constant arrays are known, fold a call to
1506   // memcmp(A, B, N) to
1507   //   N <= Pos ? 0 : (A < B ? -1 : B < A ? +1 : 0)
1508   // where Pos is the first mismatch between A and B, determined below.
1509 
1510   uint64_t Pos = 0;
1511   Value *Zero = ConstantInt::get(CI->getType(), 0);
1512   for (uint64_t MinSize = std::min(LStr.size(), RStr.size()); ; ++Pos) {
1513     if (Pos == MinSize ||
1514         (StrNCmp && (LStr[Pos] == '\0' && RStr[Pos] == '\0'))) {
1515       // One array is a leading part of the other of equal or greater
1516       // size, or for strncmp, the arrays are equal strings.
1517       // Fold the result to zero.  Size is assumed to be in bounds, since
1518       // otherwise the call would be undefined.
1519       return Zero;
1520     }
1521 
1522     if (LStr[Pos] != RStr[Pos])
1523       break;
1524   }
1525 
1526   // Normalize the result.
1527   typedef unsigned char UChar;
1528   int IRes = UChar(LStr[Pos]) < UChar(RStr[Pos]) ? -1 : 1;
1529   Value *MaxSize = ConstantInt::get(Size->getType(), Pos);
1530   Value *Cmp = B.CreateICmp(ICmpInst::ICMP_ULE, Size, MaxSize);
1531   Value *Res = ConstantInt::get(CI->getType(), IRes);
1532   return B.CreateSelect(Cmp, Zero, Res);
1533 }
1534 
1535 // Optimize a memcmp call CI with constant size Len.
1536 static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
1537                                          uint64_t Len, IRBuilderBase &B,
1538                                          const DataLayout &DL) {
1539   if (Len == 0) // memcmp(s1,s2,0) -> 0
1540     return Constant::getNullValue(CI->getType());
1541 
1542   // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
1543   if (Len == 1) {
1544     Value *LHSV = B.CreateZExt(B.CreateLoad(B.getInt8Ty(), LHS, "lhsc"),
1545                                CI->getType(), "lhsv");
1546     Value *RHSV = B.CreateZExt(B.CreateLoad(B.getInt8Ty(), RHS, "rhsc"),
1547                                CI->getType(), "rhsv");
1548     return B.CreateSub(LHSV, RHSV, "chardiff");
1549   }
1550 
1551   // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
1552   // TODO: The case where both inputs are constants does not need to be limited
1553   // to legal integers or equality comparison. See block below this.
1554   if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
1555     IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
1556     Align PrefAlignment = DL.getPrefTypeAlign(IntType);
1557 
1558     // First, see if we can fold either argument to a constant.
1559     Value *LHSV = nullptr;
1560     if (auto *LHSC = dyn_cast<Constant>(LHS))
1561       LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL);
1562 
1563     Value *RHSV = nullptr;
1564     if (auto *RHSC = dyn_cast<Constant>(RHS))
1565       RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL);
1566 
1567     // Don't generate unaligned loads. If either source is constant data,
1568     // alignment doesn't matter for that source because there is no load.
1569     if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) &&
1570         (RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) {
1571       if (!LHSV)
1572         LHSV = B.CreateLoad(IntType, LHS, "lhsv");
1573       if (!RHSV)
1574         RHSV = B.CreateLoad(IntType, RHS, "rhsv");
1575       return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
1576     }
1577   }
1578 
1579   return nullptr;
1580 }
1581 
1582 // Most simplifications for memcmp also apply to bcmp.
1583 Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
1584                                                    IRBuilderBase &B) {
1585   Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
1586   Value *Size = CI->getArgOperand(2);
1587 
1588   annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
1589 
1590   if (Value *Res = optimizeMemCmpVarSize(CI, LHS, RHS, Size, false, B, DL))
1591     return Res;
1592 
1593   // Handle constant Size.
1594   ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
1595   if (!LenC)
1596     return nullptr;
1597 
1598   return optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL);
1599 }
1600 
1601 Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilderBase &B) {
1602   Module *M = CI->getModule();
1603   if (Value *V = optimizeMemCmpBCmpCommon(CI, B))
1604     return V;
1605 
1606   // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
1607   // bcmp can be more efficient than memcmp because it only has to know that
1608   // there is a difference, not how different one is to the other.
1609   if (isLibFuncEmittable(M, TLI, LibFunc_bcmp) &&
1610       isOnlyUsedInZeroEqualityComparison(CI)) {
1611     Value *LHS = CI->getArgOperand(0);
1612     Value *RHS = CI->getArgOperand(1);
1613     Value *Size = CI->getArgOperand(2);
1614     return copyFlags(*CI, emitBCmp(LHS, RHS, Size, B, DL, TLI));
1615   }
1616 
1617   return nullptr;
1618 }
1619 
1620 Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilderBase &B) {
1621   return optimizeMemCmpBCmpCommon(CI, B);
1622 }
1623 
1624 Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilderBase &B) {
1625   Value *Size = CI->getArgOperand(2);
1626   annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
1627   if (isa<IntrinsicInst>(CI))
1628     return nullptr;
1629 
1630   // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
1631   CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align(1),
1632                                    CI->getArgOperand(1), Align(1), Size);
1633   mergeAttributesAndFlags(NewCI, *CI);
1634   return CI->getArgOperand(0);
1635 }
1636 
1637 Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilderBase &B) {
1638   Value *Dst = CI->getArgOperand(0);
1639   Value *Src = CI->getArgOperand(1);
1640   ConstantInt *StopChar = dyn_cast<ConstantInt>(CI->getArgOperand(2));
1641   ConstantInt *N = dyn_cast<ConstantInt>(CI->getArgOperand(3));
1642   StringRef SrcStr;
1643   if (CI->use_empty() && Dst == Src)
1644     return Dst;
1645   // memccpy(d, s, c, 0) -> nullptr
1646   if (N) {
1647     if (N->isNullValue())
1648       return Constant::getNullValue(CI->getType());
1649     if (!getConstantStringInfo(Src, SrcStr, /*TrimAtNul=*/false) ||
1650         // TODO: Handle zeroinitializer.
1651         !StopChar)
1652       return nullptr;
1653   } else {
1654     return nullptr;
1655   }
1656 
1657   // Wrap arg 'c' of type int to char
1658   size_t Pos = SrcStr.find(StopChar->getSExtValue() & 0xFF);
1659   if (Pos == StringRef::npos) {
1660     if (N->getZExtValue() <= SrcStr.size()) {
1661       copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1),
1662                                     CI->getArgOperand(3)));
1663       return Constant::getNullValue(CI->getType());
1664     }
1665     return nullptr;
1666   }
1667 
1668   Value *NewN =
1669       ConstantInt::get(N->getType(), std::min(uint64_t(Pos + 1), N->getZExtValue()));
1670   // memccpy -> llvm.memcpy
1671   copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1), NewN));
1672   return Pos + 1 <= N->getZExtValue()
1673              ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, NewN)
1674              : Constant::getNullValue(CI->getType());
1675 }
1676 
1677 Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilderBase &B) {
1678   Value *Dst = CI->getArgOperand(0);
1679   Value *N = CI->getArgOperand(2);
1680   // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n
1681   CallInst *NewCI =
1682       B.CreateMemCpy(Dst, Align(1), CI->getArgOperand(1), Align(1), N);
1683   // Propagate attributes, but memcpy has no return value, so make sure that
1684   // any return attributes are compliant.
1685   // TODO: Attach return value attributes to the 1st operand to preserve them?
1686   mergeAttributesAndFlags(NewCI, *CI);
1687   return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
1688 }
1689 
1690 Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilderBase &B) {
1691   Value *Size = CI->getArgOperand(2);
1692   annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
1693   if (isa<IntrinsicInst>(CI))
1694     return nullptr;
1695 
1696   // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n)
1697   CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align(1),
1698                                     CI->getArgOperand(1), Align(1), Size);
1699   mergeAttributesAndFlags(NewCI, *CI);
1700   return CI->getArgOperand(0);
1701 }
1702 
1703 Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilderBase &B) {
1704   Value *Size = CI->getArgOperand(2);
1705   annotateNonNullAndDereferenceable(CI, 0, Size, DL);
1706   if (isa<IntrinsicInst>(CI))
1707     return nullptr;
1708 
1709   // memset(p, v, n) -> llvm.memset(align 1 p, v, n)
1710   Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
1711   CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1));
1712   mergeAttributesAndFlags(NewCI, *CI);
1713   return CI->getArgOperand(0);
1714 }
1715 
1716 Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) {
1717   if (isa<ConstantPointerNull>(CI->getArgOperand(0)))
1718     return copyFlags(*CI, emitMalloc(CI->getArgOperand(1), B, DL, TLI));
1719 
1720   return nullptr;
1721 }
1722 
1723 // When enabled, replace operator new() calls marked with a hot or cold memprof
1724 // attribute with an operator new() call that takes a __hot_cold_t parameter.
1725 // Currently this is supported by the open source version of tcmalloc, see:
1726 // https://github.com/google/tcmalloc/blob/master/tcmalloc/new_extension.h
1727 Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,
1728                                       LibFunc &Func) {
1729   if (!OptimizeHotColdNew)
1730     return nullptr;
1731 
1732   uint8_t HotCold;
1733   if (CI->getAttributes().getFnAttr("memprof").getValueAsString() == "cold")
1734     HotCold = ColdNewHintValue;
1735   else if (CI->getAttributes().getFnAttr("memprof").getValueAsString() ==
1736            "notcold")
1737     HotCold = NotColdNewHintValue;
1738   else if (CI->getAttributes().getFnAttr("memprof").getValueAsString() == "hot")
1739     HotCold = HotNewHintValue;
1740   else
1741     return nullptr;
1742 
1743   // For calls that already pass a hot/cold hint, only update the hint if
1744   // directed by OptimizeExistingHotColdNew. For other calls to new, add a hint
1745   // if cold or hot, and leave as-is for default handling if "notcold" aka warm.
1746   // Note that in cases where we decide it is "notcold", it might be slightly
1747   // better to replace the hinted call with a non hinted call, to avoid the
1748   // extra paramter and the if condition check of the hint value in the
1749   // allocator. This can be considered in the future.
1750   switch (Func) {
1751   case LibFunc_Znwm12__hot_cold_t:
1752     if (OptimizeExistingHotColdNew)
1753       return emitHotColdNew(CI->getArgOperand(0), B, TLI,
1754                             LibFunc_Znwm12__hot_cold_t, HotCold);
1755     break;
1756   case LibFunc_Znwm:
1757     if (HotCold != NotColdNewHintValue)
1758       return emitHotColdNew(CI->getArgOperand(0), B, TLI,
1759                             LibFunc_Znwm12__hot_cold_t, HotCold);
1760     break;
1761   case LibFunc_Znam12__hot_cold_t:
1762     if (OptimizeExistingHotColdNew)
1763       return emitHotColdNew(CI->getArgOperand(0), B, TLI,
1764                             LibFunc_Znam12__hot_cold_t, HotCold);
1765     break;
1766   case LibFunc_Znam:
1767     if (HotCold != NotColdNewHintValue)
1768       return emitHotColdNew(CI->getArgOperand(0), B, TLI,
1769                             LibFunc_Znam12__hot_cold_t, HotCold);
1770     break;
1771   case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
1772     if (OptimizeExistingHotColdNew)
1773       return emitHotColdNewNoThrow(
1774           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1775           LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold);
1776     break;
1777   case LibFunc_ZnwmRKSt9nothrow_t:
1778     if (HotCold != NotColdNewHintValue)
1779       return emitHotColdNewNoThrow(
1780           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1781           LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold);
1782     break;
1783   case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
1784     if (OptimizeExistingHotColdNew)
1785       return emitHotColdNewNoThrow(
1786           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1787           LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold);
1788     break;
1789   case LibFunc_ZnamRKSt9nothrow_t:
1790     if (HotCold != NotColdNewHintValue)
1791       return emitHotColdNewNoThrow(
1792           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1793           LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold);
1794     break;
1795   case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
1796     if (OptimizeExistingHotColdNew)
1797       return emitHotColdNewAligned(
1798           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1799           LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold);
1800     break;
1801   case LibFunc_ZnwmSt11align_val_t:
1802     if (HotCold != NotColdNewHintValue)
1803       return emitHotColdNewAligned(
1804           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1805           LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold);
1806     break;
1807   case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
1808     if (OptimizeExistingHotColdNew)
1809       return emitHotColdNewAligned(
1810           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1811           LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold);
1812     break;
1813   case LibFunc_ZnamSt11align_val_t:
1814     if (HotCold != NotColdNewHintValue)
1815       return emitHotColdNewAligned(
1816           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1817           LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold);
1818     break;
1819   case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
1820     if (OptimizeExistingHotColdNew)
1821       return emitHotColdNewAlignedNoThrow(
1822           CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
1823           TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
1824           HotCold);
1825     break;
1826   case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
1827     if (HotCold != NotColdNewHintValue)
1828       return emitHotColdNewAlignedNoThrow(
1829           CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
1830           TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
1831           HotCold);
1832     break;
1833   case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
1834     if (OptimizeExistingHotColdNew)
1835       return emitHotColdNewAlignedNoThrow(
1836           CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
1837           TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
1838           HotCold);
1839     break;
1840   case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
1841     if (HotCold != NotColdNewHintValue)
1842       return emitHotColdNewAlignedNoThrow(
1843           CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
1844           TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
1845           HotCold);
1846     break;
1847   default:
1848     return nullptr;
1849   }
1850   return nullptr;
1851 }
1852 
1853 //===----------------------------------------------------------------------===//
1854 // Math Library Optimizations
1855 //===----------------------------------------------------------------------===//
1856 
1857 // Replace a libcall \p CI with a call to intrinsic \p IID
1858 static Value *replaceUnaryCall(CallInst *CI, IRBuilderBase &B,
1859                                Intrinsic::ID IID) {
1860   CallInst *NewCall = B.CreateUnaryIntrinsic(IID, CI->getArgOperand(0), CI);
1861   NewCall->takeName(CI);
1862   return copyFlags(*CI, NewCall);
1863 }
1864 
1865 /// Return a variant of Val with float type.
1866 /// Currently this works in two cases: If Val is an FPExtension of a float
1867 /// value to something bigger, simply return the operand.
1868 /// If Val is a ConstantFP but can be converted to a float ConstantFP without
1869 /// loss of precision do so.
1870 static Value *valueHasFloatPrecision(Value *Val) {
1871   if (FPExtInst *Cast = dyn_cast<FPExtInst>(Val)) {
1872     Value *Op = Cast->getOperand(0);
1873     if (Op->getType()->isFloatTy())
1874       return Op;
1875   }
1876   if (ConstantFP *Const = dyn_cast<ConstantFP>(Val)) {
1877     APFloat F = Const->getValueAPF();
1878     bool losesInfo;
1879     (void)F.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
1880                     &losesInfo);
1881     if (!losesInfo)
1882       return ConstantFP::get(Const->getContext(), F);
1883   }
1884   return nullptr;
1885 }
1886 
1887 /// Shrink double -> float functions.
1888 static Value *optimizeDoubleFP(CallInst *CI, IRBuilderBase &B,
1889                                bool isBinary, const TargetLibraryInfo *TLI,
1890                                bool isPrecise = false) {
1891   Function *CalleeFn = CI->getCalledFunction();
1892   if (!CI->getType()->isDoubleTy() || !CalleeFn)
1893     return nullptr;
1894 
1895   // If not all the uses of the function are converted to float, then bail out.
1896   // This matters if the precision of the result is more important than the
1897   // precision of the arguments.
1898   if (isPrecise)
1899     for (User *U : CI->users()) {
1900       FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
1901       if (!Cast || !Cast->getType()->isFloatTy())
1902         return nullptr;
1903     }
1904 
1905   // If this is something like 'g((double) float)', convert to 'gf(float)'.
1906   Value *V[2];
1907   V[0] = valueHasFloatPrecision(CI->getArgOperand(0));
1908   V[1] = isBinary ? valueHasFloatPrecision(CI->getArgOperand(1)) : nullptr;
1909   if (!V[0] || (isBinary && !V[1]))
1910     return nullptr;
1911 
1912   // If call isn't an intrinsic, check that it isn't within a function with the
1913   // same name as the float version of this call, otherwise the result is an
1914   // infinite loop.  For example, from MinGW-w64:
1915   //
1916   // float expf(float val) { return (float) exp((double) val); }
1917   StringRef CalleeName = CalleeFn->getName();
1918   bool IsIntrinsic = CalleeFn->isIntrinsic();
1919   if (!IsIntrinsic) {
1920     StringRef CallerName = CI->getFunction()->getName();
1921     if (!CallerName.empty() && CallerName.back() == 'f' &&
1922         CallerName.size() == (CalleeName.size() + 1) &&
1923         CallerName.starts_with(CalleeName))
1924       return nullptr;
1925   }
1926 
1927   // Propagate the math semantics from the current function to the new function.
1928   IRBuilderBase::FastMathFlagGuard Guard(B);
1929   B.setFastMathFlags(CI->getFastMathFlags());
1930 
1931   // g((double) float) -> (double) gf(float)
1932   Value *R;
1933   if (IsIntrinsic) {
1934     Module *M = CI->getModule();
1935     Intrinsic::ID IID = CalleeFn->getIntrinsicID();
1936     Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
1937     R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]);
1938   } else {
1939     AttributeList CalleeAttrs = CalleeFn->getAttributes();
1940     R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], TLI, CalleeName, B,
1941                                          CalleeAttrs)
1942                  : emitUnaryFloatFnCall(V[0], TLI, CalleeName, B, CalleeAttrs);
1943   }
1944   return B.CreateFPExt(R, B.getDoubleTy());
1945 }
1946 
1947 /// Shrink double -> float for unary functions.
1948 static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilderBase &B,
1949                                     const TargetLibraryInfo *TLI,
1950                                     bool isPrecise = false) {
1951   return optimizeDoubleFP(CI, B, false, TLI, isPrecise);
1952 }
1953 
1954 /// Shrink double -> float for binary functions.
1955 static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilderBase &B,
1956                                      const TargetLibraryInfo *TLI,
1957                                      bool isPrecise = false) {
1958   return optimizeDoubleFP(CI, B, true, TLI, isPrecise);
1959 }
1960 
1961 // cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z)))
1962 Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilderBase &B) {
1963   Value *Real, *Imag;
1964 
1965   if (CI->arg_size() == 1) {
1966 
1967     if (!CI->isFast())
1968       return nullptr;
1969 
1970     Value *Op = CI->getArgOperand(0);
1971     assert(Op->getType()->isArrayTy() && "Unexpected signature for cabs!");
1972 
1973     Real = B.CreateExtractValue(Op, 0, "real");
1974     Imag = B.CreateExtractValue(Op, 1, "imag");
1975 
1976   } else {
1977     assert(CI->arg_size() == 2 && "Unexpected signature for cabs!");
1978 
1979     Real = CI->getArgOperand(0);
1980     Imag = CI->getArgOperand(1);
1981 
1982     // if real or imaginary part is zero, simplify to abs(cimag(z))
1983     // or abs(creal(z))
1984     Value *AbsOp = nullptr;
1985     if (ConstantFP *ConstReal = dyn_cast<ConstantFP>(Real)) {
1986       if (ConstReal->isZero())
1987         AbsOp = Imag;
1988 
1989     } else if (ConstantFP *ConstImag = dyn_cast<ConstantFP>(Imag)) {
1990       if (ConstImag->isZero())
1991         AbsOp = Real;
1992     }
1993 
1994     if (AbsOp) {
1995       IRBuilderBase::FastMathFlagGuard Guard(B);
1996       B.setFastMathFlags(CI->getFastMathFlags());
1997 
1998       return copyFlags(
1999           *CI, B.CreateUnaryIntrinsic(Intrinsic::fabs, AbsOp, nullptr, "cabs"));
2000     }
2001 
2002     if (!CI->isFast())
2003       return nullptr;
2004   }
2005 
2006   // Propagate fast-math flags from the existing call to new instructions.
2007   IRBuilderBase::FastMathFlagGuard Guard(B);
2008   B.setFastMathFlags(CI->getFastMathFlags());
2009 
2010   Value *RealReal = B.CreateFMul(Real, Real);
2011   Value *ImagImag = B.CreateFMul(Imag, Imag);
2012 
2013   return copyFlags(*CI, B.CreateUnaryIntrinsic(Intrinsic::sqrt,
2014                                                B.CreateFAdd(RealReal, ImagImag),
2015                                                nullptr, "cabs"));
2016 }
2017 
2018 // Return a properly extended integer (DstWidth bits wide) if the operation is
2019 // an itofp.
2020 static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
2021   if (isa<SIToFPInst>(I2F) || isa<UIToFPInst>(I2F)) {
2022     Value *Op = cast<Instruction>(I2F)->getOperand(0);
2023     // Make sure that the exponent fits inside an "int" of size DstWidth,
2024     // thus avoiding any range issues that FP has not.
2025     unsigned BitWidth = Op->getType()->getScalarSizeInBits();
2026     if (BitWidth < DstWidth || (BitWidth == DstWidth && isa<SIToFPInst>(I2F))) {
2027       Type *IntTy = Op->getType()->getWithNewBitWidth(DstWidth);
2028       return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, IntTy)
2029                                   : B.CreateZExt(Op, IntTy);
2030     }
2031   }
2032 
2033   return nullptr;
2034 }
2035 
2036 /// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
2037 /// ldexp(1.0, x) for pow(2.0, itofp(x)); exp2(n * x) for pow(2.0 ** n, x);
2038 /// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x).
2039 Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
2040   Module *M = Pow->getModule();
2041   Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
2042   Type *Ty = Pow->getType();
2043   bool Ignored;
2044 
2045   // Evaluate special cases related to a nested function as the base.
2046 
2047   // pow(exp(x), y) -> exp(x * y)
2048   // pow(exp2(x), y) -> exp2(x * y)
2049   // If exp{,2}() is used only once, it is better to fold two transcendental
2050   // math functions into one.  If used again, exp{,2}() would still have to be
2051   // called with the original argument, then keep both original transcendental
2052   // functions.  However, this transformation is only safe with fully relaxed
2053   // math semantics, since, besides rounding differences, it changes overflow
2054   // and underflow behavior quite dramatically.  For example:
2055   //   pow(exp(1000), 0.001) = pow(inf, 0.001) = inf
2056   // Whereas:
2057   //   exp(1000 * 0.001) = exp(1)
2058   // TODO: Loosen the requirement for fully relaxed math semantics.
2059   // TODO: Handle exp10() when more targets have it available.
2060   CallInst *BaseFn = dyn_cast<CallInst>(Base);
2061   if (BaseFn && BaseFn->hasOneUse() && BaseFn->isFast() && Pow->isFast()) {
2062     LibFunc LibFn;
2063 
2064     Function *CalleeFn = BaseFn->getCalledFunction();
2065     if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) &&
2066         isLibFuncEmittable(M, TLI, LibFn)) {
2067       StringRef ExpName;
2068       Intrinsic::ID ID;
2069       Value *ExpFn;
2070       LibFunc LibFnFloat, LibFnDouble, LibFnLongDouble;
2071 
2072       switch (LibFn) {
2073       default:
2074         return nullptr;
2075       case LibFunc_expf:
2076       case LibFunc_exp:
2077       case LibFunc_expl:
2078         ExpName = TLI->getName(LibFunc_exp);
2079         ID = Intrinsic::exp;
2080         LibFnFloat = LibFunc_expf;
2081         LibFnDouble = LibFunc_exp;
2082         LibFnLongDouble = LibFunc_expl;
2083         break;
2084       case LibFunc_exp2f:
2085       case LibFunc_exp2:
2086       case LibFunc_exp2l:
2087         ExpName = TLI->getName(LibFunc_exp2);
2088         ID = Intrinsic::exp2;
2089         LibFnFloat = LibFunc_exp2f;
2090         LibFnDouble = LibFunc_exp2;
2091         LibFnLongDouble = LibFunc_exp2l;
2092         break;
2093       }
2094 
2095       // Create new exp{,2}() with the product as its argument.
2096       Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul");
2097       ExpFn = BaseFn->doesNotAccessMemory()
2098                   ? B.CreateUnaryIntrinsic(ID, FMul, nullptr, ExpName)
2099                   : emitUnaryFloatFnCall(FMul, TLI, LibFnDouble, LibFnFloat,
2100                                          LibFnLongDouble, B,
2101                                          BaseFn->getAttributes());
2102 
2103       // Since the new exp{,2}() is different from the original one, dead code
2104       // elimination cannot be trusted to remove it, since it may have side
2105       // effects (e.g., errno).  When the only consumer for the original
2106       // exp{,2}() is pow(), then it has to be explicitly erased.
2107       substituteInParent(BaseFn, ExpFn);
2108       return ExpFn;
2109     }
2110   }
2111 
2112   // Evaluate special cases related to a constant base.
2113 
2114   const APFloat *BaseF;
2115   if (!match(Base, m_APFloat(BaseF)))
2116     return nullptr;
2117 
2118   AttributeList NoAttrs; // Attributes are only meaningful on the original call
2119 
2120   const bool UseIntrinsic = Pow->doesNotAccessMemory();
2121 
2122   // pow(2.0, itofp(x)) -> ldexp(1.0, x)
2123   if ((UseIntrinsic || !Ty->isVectorTy()) && BaseF->isExactlyValue(2.0) &&
2124       (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) &&
2125       (UseIntrinsic ||
2126        hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl))) {
2127 
2128     // TODO: Shouldn't really need to depend on getIntToFPVal for intrinsic. Can
2129     // just directly use the original integer type.
2130     if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize())) {
2131       Constant *One = ConstantFP::get(Ty, 1.0);
2132 
2133       if (UseIntrinsic) {
2134         return copyFlags(*Pow, B.CreateIntrinsic(Intrinsic::ldexp,
2135                                                  {Ty, ExpoI->getType()},
2136                                                  {One, ExpoI}, Pow, "exp2"));
2137       }
2138 
2139       return copyFlags(*Pow, emitBinaryFloatFnCall(
2140                                  One, ExpoI, TLI, LibFunc_ldexp, LibFunc_ldexpf,
2141                                  LibFunc_ldexpl, B, NoAttrs));
2142     }
2143   }
2144 
2145   // pow(2.0 ** n, x) -> exp2(n * x)
2146   if (hasFloatFn(M, TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
2147     APFloat BaseR = APFloat(1.0);
2148     BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
2149     BaseR = BaseR / *BaseF;
2150     bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger();
2151     const APFloat *NF = IsReciprocal ? &BaseR : BaseF;
2152     APSInt NI(64, false);
2153     if ((IsInteger || IsReciprocal) &&
2154         NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) ==
2155             APFloat::opOK &&
2156         NI > 1 && NI.isPowerOf2()) {
2157       double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
2158       Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
2159       if (Pow->doesNotAccessMemory())
2160         return copyFlags(*Pow, B.CreateUnaryIntrinsic(Intrinsic::exp2, FMul,
2161                                                       nullptr, "exp2"));
2162       else
2163         return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2,
2164                                                     LibFunc_exp2f,
2165                                                     LibFunc_exp2l, B, NoAttrs));
2166     }
2167   }
2168 
2169   // pow(10.0, x) -> exp10(x)
2170   if (BaseF->isExactlyValue(10.0) &&
2171       hasFloatFn(M, TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) {
2172 
2173     if (Pow->doesNotAccessMemory()) {
2174       CallInst *NewExp10 =
2175           B.CreateIntrinsic(Intrinsic::exp10, {Ty}, {Expo}, Pow, "exp10");
2176       return copyFlags(*Pow, NewExp10);
2177     }
2178 
2179     return copyFlags(*Pow, emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10,
2180                                                 LibFunc_exp10f, LibFunc_exp10l,
2181                                                 B, NoAttrs));
2182   }
2183 
2184   // pow(x, y) -> exp2(log2(x) * y)
2185   if (Pow->hasApproxFunc() && Pow->hasNoNaNs() && BaseF->isFiniteNonZero() &&
2186       !BaseF->isNegative()) {
2187     // pow(1, inf) is defined to be 1 but exp2(log2(1) * inf) evaluates to NaN.
2188     // Luckily optimizePow has already handled the x == 1 case.
2189     assert(!match(Base, m_FPOne()) &&
2190            "pow(1.0, y) should have been simplified earlier!");
2191 
2192     Value *Log = nullptr;
2193     if (Ty->isFloatTy())
2194       Log = ConstantFP::get(Ty, std::log2(BaseF->convertToFloat()));
2195     else if (Ty->isDoubleTy())
2196       Log = ConstantFP::get(Ty, std::log2(BaseF->convertToDouble()));
2197 
2198     if (Log) {
2199       Value *FMul = B.CreateFMul(Log, Expo, "mul");
2200       if (Pow->doesNotAccessMemory())
2201         return copyFlags(*Pow, B.CreateUnaryIntrinsic(Intrinsic::exp2, FMul,
2202                                                       nullptr, "exp2"));
2203       else if (hasFloatFn(M, TLI, Ty, LibFunc_exp2, LibFunc_exp2f,
2204                           LibFunc_exp2l))
2205         return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2,
2206                                                     LibFunc_exp2f,
2207                                                     LibFunc_exp2l, B, NoAttrs));
2208     }
2209   }
2210 
2211   return nullptr;
2212 }
2213 
2214 static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
2215                           Module *M, IRBuilderBase &B,
2216                           const TargetLibraryInfo *TLI) {
2217   // If errno is never set, then use the intrinsic for sqrt().
2218   if (NoErrno)
2219     return B.CreateUnaryIntrinsic(Intrinsic::sqrt, V, nullptr, "sqrt");
2220 
2221   // Otherwise, use the libcall for sqrt().
2222   if (hasFloatFn(M, TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf,
2223                  LibFunc_sqrtl))
2224     // TODO: We also should check that the target can in fact lower the sqrt()
2225     // libcall. We currently have no way to ask this question, so we ask if
2226     // the target has a sqrt() libcall, which is not exactly the same.
2227     return emitUnaryFloatFnCall(V, TLI, LibFunc_sqrt, LibFunc_sqrtf,
2228                                 LibFunc_sqrtl, B, Attrs);
2229 
2230   return nullptr;
2231 }
2232 
2233 /// Use square root in place of pow(x, +/-0.5).
2234 Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) {
2235   Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
2236   Module *Mod = Pow->getModule();
2237   Type *Ty = Pow->getType();
2238 
2239   const APFloat *ExpoF;
2240   if (!match(Expo, m_APFloat(ExpoF)) ||
2241       (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
2242     return nullptr;
2243 
2244   // Converting pow(X, -0.5) to 1/sqrt(X) may introduce an extra rounding step,
2245   // so that requires fast-math-flags (afn or reassoc).
2246   if (ExpoF->isNegative() && (!Pow->hasApproxFunc() && !Pow->hasAllowReassoc()))
2247     return nullptr;
2248 
2249   // If we have a pow() library call (accesses memory) and we can't guarantee
2250   // that the base is not an infinity, give up:
2251   // pow(-Inf, 0.5) is optionally required to have a result of +Inf (not setting
2252   // errno), but sqrt(-Inf) is required by various standards to set errno.
2253   if (!Pow->doesNotAccessMemory() && !Pow->hasNoInfs() &&
2254       !isKnownNeverInfinity(Base, 0,
2255                             SimplifyQuery(DL, TLI, /*DT=*/nullptr, AC, Pow)))
2256     return nullptr;
2257 
2258   Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), Mod, B,
2259                      TLI);
2260   if (!Sqrt)
2261     return nullptr;
2262 
2263   // Handle signed zero base by expanding to fabs(sqrt(x)).
2264   if (!Pow->hasNoSignedZeros())
2265     Sqrt = B.CreateUnaryIntrinsic(Intrinsic::fabs, Sqrt, nullptr, "abs");
2266 
2267   Sqrt = copyFlags(*Pow, Sqrt);
2268 
2269   // Handle non finite base by expanding to
2270   // (x == -infinity ? +infinity : sqrt(x)).
2271   if (!Pow->hasNoInfs()) {
2272     Value *PosInf = ConstantFP::getInfinity(Ty),
2273           *NegInf = ConstantFP::getInfinity(Ty, true);
2274     Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf");
2275     Sqrt = B.CreateSelect(FCmp, PosInf, Sqrt);
2276   }
2277 
2278   // If the exponent is negative, then get the reciprocal.
2279   if (ExpoF->isNegative())
2280     Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal");
2281 
2282   return Sqrt;
2283 }
2284 
2285 static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M,
2286                                            IRBuilderBase &B) {
2287   Value *Args[] = {Base, Expo};
2288   Type *Types[] = {Base->getType(), Expo->getType()};
2289   return B.CreateIntrinsic(Intrinsic::powi, Types, Args);
2290 }
2291 
2292 Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
2293   Value *Base = Pow->getArgOperand(0);
2294   Value *Expo = Pow->getArgOperand(1);
2295   Function *Callee = Pow->getCalledFunction();
2296   StringRef Name = Callee->getName();
2297   Type *Ty = Pow->getType();
2298   Module *M = Pow->getModule();
2299   bool AllowApprox = Pow->hasApproxFunc();
2300   bool Ignored;
2301 
2302   // Propagate the math semantics from the call to any created instructions.
2303   IRBuilderBase::FastMathFlagGuard Guard(B);
2304   B.setFastMathFlags(Pow->getFastMathFlags());
2305   // Evaluate special cases related to the base.
2306 
2307   // pow(1.0, x) -> 1.0
2308   if (match(Base, m_FPOne()))
2309     return Base;
2310 
2311   if (Value *Exp = replacePowWithExp(Pow, B))
2312     return Exp;
2313 
2314   // Evaluate special cases related to the exponent.
2315 
2316   // pow(x, -1.0) -> 1.0 / x
2317   if (match(Expo, m_SpecificFP(-1.0)))
2318     return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal");
2319 
2320   // pow(x, +/-0.0) -> 1.0
2321   if (match(Expo, m_AnyZeroFP()))
2322     return ConstantFP::get(Ty, 1.0);
2323 
2324   // pow(x, 1.0) -> x
2325   if (match(Expo, m_FPOne()))
2326     return Base;
2327 
2328   // pow(x, 2.0) -> x * x
2329   if (match(Expo, m_SpecificFP(2.0)))
2330     return B.CreateFMul(Base, Base, "square");
2331 
2332   if (Value *Sqrt = replacePowWithSqrt(Pow, B))
2333     return Sqrt;
2334 
2335   // If we can approximate pow:
2336   // pow(x, n) -> powi(x, n) * sqrt(x) if n has exactly a 0.5 fraction
2337   // pow(x, n) -> powi(x, n) if n is a constant signed integer value
2338   const APFloat *ExpoF;
2339   if (AllowApprox && match(Expo, m_APFloat(ExpoF)) &&
2340       !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) {
2341     APFloat ExpoA(abs(*ExpoF));
2342     APFloat ExpoI(*ExpoF);
2343     Value *Sqrt = nullptr;
2344     if (!ExpoA.isInteger()) {
2345       APFloat Expo2 = ExpoA;
2346       // To check if ExpoA is an integer + 0.5, we add it to itself. If there
2347       // is no floating point exception and the result is an integer, then
2348       // ExpoA == integer + 0.5
2349       if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK)
2350         return nullptr;
2351 
2352       if (!Expo2.isInteger())
2353         return nullptr;
2354 
2355       if (ExpoI.roundToIntegral(APFloat::rmTowardNegative) !=
2356           APFloat::opInexact)
2357         return nullptr;
2358       if (!ExpoI.isInteger())
2359         return nullptr;
2360       ExpoF = &ExpoI;
2361 
2362       Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), M,
2363                          B, TLI);
2364       if (!Sqrt)
2365         return nullptr;
2366     }
2367 
2368     // 0.5 fraction is now optionally handled.
2369     // Do pow -> powi for remaining integer exponent
2370     APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false);
2371     if (ExpoF->isInteger() &&
2372         ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
2373             APFloat::opOK) {
2374       Value *PowI = copyFlags(
2375           *Pow,
2376           createPowWithIntegerExponent(
2377               Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo),
2378               M, B));
2379 
2380       if (PowI && Sqrt)
2381         return B.CreateFMul(PowI, Sqrt);
2382 
2383       return PowI;
2384     }
2385   }
2386 
2387   // powf(x, itofp(y)) -> powi(x, y)
2388   if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
2389     if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
2390       return copyFlags(*Pow, createPowWithIntegerExponent(Base, ExpoI, M, B));
2391   }
2392 
2393   // Shrink pow() to powf() if the arguments are single precision,
2394   // unless the result is expected to be double precision.
2395   if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) &&
2396       hasFloatVersion(M, Name)) {
2397     if (Value *Shrunk = optimizeBinaryDoubleFP(Pow, B, TLI, true))
2398       return Shrunk;
2399   }
2400 
2401   return nullptr;
2402 }
2403 
2404 Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
2405   Module *M = CI->getModule();
2406   Function *Callee = CI->getCalledFunction();
2407   StringRef Name = Callee->getName();
2408   Value *Ret = nullptr;
2409   if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) &&
2410       hasFloatVersion(M, Name))
2411     Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
2412 
2413   // If we have an llvm.exp2 intrinsic, emit the llvm.ldexp intrinsic. If we
2414   // have the libcall, emit the libcall.
2415   //
2416   // TODO: In principle we should be able to just always use the intrinsic for
2417   // any doesNotAccessMemory callsite.
2418 
2419   const bool UseIntrinsic = Callee->isIntrinsic();
2420   // Bail out for vectors because the code below only expects scalars.
2421   Type *Ty = CI->getType();
2422   if (!UseIntrinsic && Ty->isVectorTy())
2423     return Ret;
2424 
2425   // exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= IntSize
2426   // exp2(uitofp(x)) -> ldexp(1.0, zext(x))  if sizeof(x) < IntSize
2427   Value *Op = CI->getArgOperand(0);
2428   if ((isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op)) &&
2429       (UseIntrinsic ||
2430        hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl))) {
2431     if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize())) {
2432       Constant *One = ConstantFP::get(Ty, 1.0);
2433 
2434       if (UseIntrinsic) {
2435         return copyFlags(*CI, B.CreateIntrinsic(Intrinsic::ldexp,
2436                                                 {Ty, Exp->getType()},
2437                                                 {One, Exp}, CI));
2438       }
2439 
2440       IRBuilderBase::FastMathFlagGuard Guard(B);
2441       B.setFastMathFlags(CI->getFastMathFlags());
2442       return copyFlags(*CI, emitBinaryFloatFnCall(
2443                                 One, Exp, TLI, LibFunc_ldexp, LibFunc_ldexpf,
2444                                 LibFunc_ldexpl, B, AttributeList()));
2445     }
2446   }
2447 
2448   return Ret;
2449 }
2450 
2451 Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) {
2452   Module *M = CI->getModule();
2453 
2454   // If we can shrink the call to a float function rather than a double
2455   // function, do that first.
2456   Function *Callee = CI->getCalledFunction();
2457   StringRef Name = Callee->getName();
2458   if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(M, Name))
2459     if (Value *Ret = optimizeBinaryDoubleFP(CI, B, TLI))
2460       return Ret;
2461 
2462   // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to
2463   // the intrinsics for improved optimization (for example, vectorization).
2464   // No-signed-zeros is implied by the definitions of fmax/fmin themselves.
2465   // From the C standard draft WG14/N1256:
2466   // "Ideally, fmax would be sensitive to the sign of zero, for example
2467   // fmax(-0.0, +0.0) would return +0; however, implementation in software
2468   // might be impractical."
2469   IRBuilderBase::FastMathFlagGuard Guard(B);
2470   FastMathFlags FMF = CI->getFastMathFlags();
2471   FMF.setNoSignedZeros();
2472   B.setFastMathFlags(FMF);
2473 
2474   Intrinsic::ID IID = Callee->getName().starts_with("fmin") ? Intrinsic::minnum
2475                                                             : Intrinsic::maxnum;
2476   return copyFlags(*CI, B.CreateBinaryIntrinsic(IID, CI->getArgOperand(0),
2477                                                 CI->getArgOperand(1)));
2478 }
2479 
2480 Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
2481   Function *LogFn = Log->getCalledFunction();
2482   StringRef LogNm = LogFn->getName();
2483   Intrinsic::ID LogID = LogFn->getIntrinsicID();
2484   Module *Mod = Log->getModule();
2485   Type *Ty = Log->getType();
2486   Value *Ret = nullptr;
2487 
2488   if (UnsafeFPShrink && hasFloatVersion(Mod, LogNm))
2489     Ret = optimizeUnaryDoubleFP(Log, B, TLI, true);
2490 
2491   // The earlier call must also be 'fast' in order to do these transforms.
2492   CallInst *Arg = dyn_cast<CallInst>(Log->getArgOperand(0));
2493   if (!Log->isFast() || !Arg || !Arg->isFast() || !Arg->hasOneUse())
2494     return Ret;
2495 
2496   LibFunc LogLb, ExpLb, Exp2Lb, Exp10Lb, PowLb;
2497 
2498   // This is only applicable to log(), log2(), log10().
2499   if (TLI->getLibFunc(LogNm, LogLb))
2500     switch (LogLb) {
2501     case LibFunc_logf:
2502       LogID = Intrinsic::log;
2503       ExpLb = LibFunc_expf;
2504       Exp2Lb = LibFunc_exp2f;
2505       Exp10Lb = LibFunc_exp10f;
2506       PowLb = LibFunc_powf;
2507       break;
2508     case LibFunc_log:
2509       LogID = Intrinsic::log;
2510       ExpLb = LibFunc_exp;
2511       Exp2Lb = LibFunc_exp2;
2512       Exp10Lb = LibFunc_exp10;
2513       PowLb = LibFunc_pow;
2514       break;
2515     case LibFunc_logl:
2516       LogID = Intrinsic::log;
2517       ExpLb = LibFunc_expl;
2518       Exp2Lb = LibFunc_exp2l;
2519       Exp10Lb = LibFunc_exp10l;
2520       PowLb = LibFunc_powl;
2521       break;
2522     case LibFunc_log2f:
2523       LogID = Intrinsic::log2;
2524       ExpLb = LibFunc_expf;
2525       Exp2Lb = LibFunc_exp2f;
2526       Exp10Lb = LibFunc_exp10f;
2527       PowLb = LibFunc_powf;
2528       break;
2529     case LibFunc_log2:
2530       LogID = Intrinsic::log2;
2531       ExpLb = LibFunc_exp;
2532       Exp2Lb = LibFunc_exp2;
2533       Exp10Lb = LibFunc_exp10;
2534       PowLb = LibFunc_pow;
2535       break;
2536     case LibFunc_log2l:
2537       LogID = Intrinsic::log2;
2538       ExpLb = LibFunc_expl;
2539       Exp2Lb = LibFunc_exp2l;
2540       Exp10Lb = LibFunc_exp10l;
2541       PowLb = LibFunc_powl;
2542       break;
2543     case LibFunc_log10f:
2544       LogID = Intrinsic::log10;
2545       ExpLb = LibFunc_expf;
2546       Exp2Lb = LibFunc_exp2f;
2547       Exp10Lb = LibFunc_exp10f;
2548       PowLb = LibFunc_powf;
2549       break;
2550     case LibFunc_log10:
2551       LogID = Intrinsic::log10;
2552       ExpLb = LibFunc_exp;
2553       Exp2Lb = LibFunc_exp2;
2554       Exp10Lb = LibFunc_exp10;
2555       PowLb = LibFunc_pow;
2556       break;
2557     case LibFunc_log10l:
2558       LogID = Intrinsic::log10;
2559       ExpLb = LibFunc_expl;
2560       Exp2Lb = LibFunc_exp2l;
2561       Exp10Lb = LibFunc_exp10l;
2562       PowLb = LibFunc_powl;
2563       break;
2564     default:
2565       return Ret;
2566     }
2567   else if (LogID == Intrinsic::log || LogID == Intrinsic::log2 ||
2568            LogID == Intrinsic::log10) {
2569     if (Ty->getScalarType()->isFloatTy()) {
2570       ExpLb = LibFunc_expf;
2571       Exp2Lb = LibFunc_exp2f;
2572       Exp10Lb = LibFunc_exp10f;
2573       PowLb = LibFunc_powf;
2574     } else if (Ty->getScalarType()->isDoubleTy()) {
2575       ExpLb = LibFunc_exp;
2576       Exp2Lb = LibFunc_exp2;
2577       Exp10Lb = LibFunc_exp10;
2578       PowLb = LibFunc_pow;
2579     } else
2580       return Ret;
2581   } else
2582     return Ret;
2583 
2584   IRBuilderBase::FastMathFlagGuard Guard(B);
2585   B.setFastMathFlags(FastMathFlags::getFast());
2586 
2587   Intrinsic::ID ArgID = Arg->getIntrinsicID();
2588   LibFunc ArgLb = NotLibFunc;
2589   TLI->getLibFunc(*Arg, ArgLb);
2590 
2591   // log(pow(x,y)) -> y*log(x)
2592   AttributeList NoAttrs;
2593   if (ArgLb == PowLb || ArgID == Intrinsic::pow || ArgID == Intrinsic::powi) {
2594     Value *LogX =
2595         Log->doesNotAccessMemory()
2596             ? B.CreateUnaryIntrinsic(LogID, Arg->getOperand(0), nullptr, "log")
2597             : emitUnaryFloatFnCall(Arg->getOperand(0), TLI, LogNm, B, NoAttrs);
2598     Value *Y = Arg->getArgOperand(1);
2599     // Cast exponent to FP if integer.
2600     if (ArgID == Intrinsic::powi)
2601       Y = B.CreateSIToFP(Y, Ty, "cast");
2602     Value *MulY = B.CreateFMul(Y, LogX, "mul");
2603     // Since pow() may have side effects, e.g. errno,
2604     // dead code elimination may not be trusted to remove it.
2605     substituteInParent(Arg, MulY);
2606     return MulY;
2607   }
2608 
2609   // log(exp{,2,10}(y)) -> y*log({e,2,10})
2610   // TODO: There is no exp10() intrinsic yet.
2611   if (ArgLb == ExpLb || ArgLb == Exp2Lb || ArgLb == Exp10Lb ||
2612            ArgID == Intrinsic::exp || ArgID == Intrinsic::exp2) {
2613     Constant *Eul;
2614     if (ArgLb == ExpLb || ArgID == Intrinsic::exp)
2615       // FIXME: Add more precise value of e for long double.
2616       Eul = ConstantFP::get(Log->getType(), numbers::e);
2617     else if (ArgLb == Exp2Lb || ArgID == Intrinsic::exp2)
2618       Eul = ConstantFP::get(Log->getType(), 2.0);
2619     else
2620       Eul = ConstantFP::get(Log->getType(), 10.0);
2621     Value *LogE = Log->doesNotAccessMemory()
2622                       ? B.CreateUnaryIntrinsic(LogID, Eul, nullptr, "log")
2623                       : emitUnaryFloatFnCall(Eul, TLI, LogNm, B, NoAttrs);
2624     Value *MulY = B.CreateFMul(Arg->getArgOperand(0), LogE, "mul");
2625     // Since exp() may have side effects, e.g. errno,
2626     // dead code elimination may not be trusted to remove it.
2627     substituteInParent(Arg, MulY);
2628     return MulY;
2629   }
2630 
2631   return Ret;
2632 }
2633 
2634 // sqrt(exp(X)) -> exp(X * 0.5)
2635 Value *LibCallSimplifier::mergeSqrtToExp(CallInst *CI, IRBuilderBase &B) {
2636   if (!CI->hasAllowReassoc())
2637     return nullptr;
2638 
2639   Function *SqrtFn = CI->getCalledFunction();
2640   CallInst *Arg = dyn_cast<CallInst>(CI->getArgOperand(0));
2641   if (!Arg || !Arg->hasAllowReassoc() || !Arg->hasOneUse())
2642     return nullptr;
2643   Intrinsic::ID ArgID = Arg->getIntrinsicID();
2644   LibFunc ArgLb = NotLibFunc;
2645   TLI->getLibFunc(*Arg, ArgLb);
2646 
2647   LibFunc SqrtLb, ExpLb, Exp2Lb, Exp10Lb;
2648 
2649   if (TLI->getLibFunc(SqrtFn->getName(), SqrtLb))
2650     switch (SqrtLb) {
2651     case LibFunc_sqrtf:
2652       ExpLb = LibFunc_expf;
2653       Exp2Lb = LibFunc_exp2f;
2654       Exp10Lb = LibFunc_exp10f;
2655       break;
2656     case LibFunc_sqrt:
2657       ExpLb = LibFunc_exp;
2658       Exp2Lb = LibFunc_exp2;
2659       Exp10Lb = LibFunc_exp10;
2660       break;
2661     case LibFunc_sqrtl:
2662       ExpLb = LibFunc_expl;
2663       Exp2Lb = LibFunc_exp2l;
2664       Exp10Lb = LibFunc_exp10l;
2665       break;
2666     default:
2667       return nullptr;
2668     }
2669   else if (SqrtFn->getIntrinsicID() == Intrinsic::sqrt) {
2670     if (CI->getType()->getScalarType()->isFloatTy()) {
2671       ExpLb = LibFunc_expf;
2672       Exp2Lb = LibFunc_exp2f;
2673       Exp10Lb = LibFunc_exp10f;
2674     } else if (CI->getType()->getScalarType()->isDoubleTy()) {
2675       ExpLb = LibFunc_exp;
2676       Exp2Lb = LibFunc_exp2;
2677       Exp10Lb = LibFunc_exp10;
2678     } else
2679       return nullptr;
2680   } else
2681     return nullptr;
2682 
2683   if (ArgLb != ExpLb && ArgLb != Exp2Lb && ArgLb != Exp10Lb &&
2684       ArgID != Intrinsic::exp && ArgID != Intrinsic::exp2)
2685     return nullptr;
2686 
2687   IRBuilderBase::InsertPointGuard Guard(B);
2688   B.SetInsertPoint(Arg);
2689   auto *ExpOperand = Arg->getOperand(0);
2690   auto *FMul =
2691       B.CreateFMulFMF(ExpOperand, ConstantFP::get(ExpOperand->getType(), 0.5),
2692                       CI, "merged.sqrt");
2693 
2694   Arg->setOperand(0, FMul);
2695   return Arg;
2696 }
2697 
2698 Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
2699   Module *M = CI->getModule();
2700   Function *Callee = CI->getCalledFunction();
2701   Value *Ret = nullptr;
2702   // TODO: Once we have a way (other than checking for the existince of the
2703   // libcall) to tell whether our target can lower @llvm.sqrt, relax the
2704   // condition below.
2705   if (isLibFuncEmittable(M, TLI, LibFunc_sqrtf) &&
2706       (Callee->getName() == "sqrt" ||
2707        Callee->getIntrinsicID() == Intrinsic::sqrt))
2708     Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
2709 
2710   if (Value *Opt = mergeSqrtToExp(CI, B))
2711     return Opt;
2712 
2713   if (!CI->isFast())
2714     return Ret;
2715 
2716   Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0));
2717   if (!I || I->getOpcode() != Instruction::FMul || !I->isFast())
2718     return Ret;
2719 
2720   // We're looking for a repeated factor in a multiplication tree,
2721   // so we can do this fold: sqrt(x * x) -> fabs(x);
2722   // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y).
2723   Value *Op0 = I->getOperand(0);
2724   Value *Op1 = I->getOperand(1);
2725   Value *RepeatOp = nullptr;
2726   Value *OtherOp = nullptr;
2727   if (Op0 == Op1) {
2728     // Simple match: the operands of the multiply are identical.
2729     RepeatOp = Op0;
2730   } else {
2731     // Look for a more complicated pattern: one of the operands is itself
2732     // a multiply, so search for a common factor in that multiply.
2733     // Note: We don't bother looking any deeper than this first level or for
2734     // variations of this pattern because instcombine's visitFMUL and/or the
2735     // reassociation pass should give us this form.
2736     Value *OtherMul0, *OtherMul1;
2737     if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) {
2738       // Pattern: sqrt((x * y) * z)
2739       if (OtherMul0 == OtherMul1 && cast<Instruction>(Op0)->isFast()) {
2740         // Matched: sqrt((x * x) * z)
2741         RepeatOp = OtherMul0;
2742         OtherOp = Op1;
2743       }
2744     }
2745   }
2746   if (!RepeatOp)
2747     return Ret;
2748 
2749   // Fast math flags for any created instructions should match the sqrt
2750   // and multiply.
2751   IRBuilderBase::FastMathFlagGuard Guard(B);
2752   B.setFastMathFlags(I->getFastMathFlags());
2753 
2754   // If we found a repeated factor, hoist it out of the square root and
2755   // replace it with the fabs of that factor.
2756   Value *FabsCall =
2757       B.CreateUnaryIntrinsic(Intrinsic::fabs, RepeatOp, nullptr, "fabs");
2758   if (OtherOp) {
2759     // If we found a non-repeated factor, we still need to get its square
2760     // root. We then multiply that by the value that was simplified out
2761     // of the square root calculation.
2762     Value *SqrtCall =
2763         B.CreateUnaryIntrinsic(Intrinsic::sqrt, OtherOp, nullptr, "sqrt");
2764     return copyFlags(*CI, B.CreateFMul(FabsCall, SqrtCall));
2765   }
2766   return copyFlags(*CI, FabsCall);
2767 }
2768 
2769 Value *LibCallSimplifier::optimizeTrigInversionPairs(CallInst *CI,
2770                                                      IRBuilderBase &B) {
2771   Module *M = CI->getModule();
2772   Function *Callee = CI->getCalledFunction();
2773   Value *Ret = nullptr;
2774   StringRef Name = Callee->getName();
2775   if (UnsafeFPShrink &&
2776       (Name == "tan" || Name == "atanh" || Name == "sinh" || Name == "cosh" ||
2777        Name == "asinh") &&
2778       hasFloatVersion(M, Name))
2779     Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
2780 
2781   Value *Op1 = CI->getArgOperand(0);
2782   auto *OpC = dyn_cast<CallInst>(Op1);
2783   if (!OpC)
2784     return Ret;
2785 
2786   // Both calls must be 'fast' in order to remove them.
2787   if (!CI->isFast() || !OpC->isFast())
2788     return Ret;
2789 
2790   // tan(atan(x)) -> x
2791   // atanh(tanh(x)) -> x
2792   // sinh(asinh(x)) -> x
2793   // asinh(sinh(x)) -> x
2794   // cosh(acosh(x)) -> x
2795   LibFunc Func;
2796   Function *F = OpC->getCalledFunction();
2797   if (F && TLI->getLibFunc(F->getName(), Func) &&
2798       isLibFuncEmittable(M, TLI, Func)) {
2799     LibFunc inverseFunc = llvm::StringSwitch<LibFunc>(Callee->getName())
2800                               .Case("tan", LibFunc_atan)
2801                               .Case("atanh", LibFunc_tanh)
2802                               .Case("sinh", LibFunc_asinh)
2803                               .Case("cosh", LibFunc_acosh)
2804                               .Case("tanf", LibFunc_atanf)
2805                               .Case("atanhf", LibFunc_tanhf)
2806                               .Case("sinhf", LibFunc_asinhf)
2807                               .Case("coshf", LibFunc_acoshf)
2808                               .Case("tanl", LibFunc_atanl)
2809                               .Case("atanhl", LibFunc_tanhl)
2810                               .Case("sinhl", LibFunc_asinhl)
2811                               .Case("coshl", LibFunc_acoshl)
2812                               .Case("asinh", LibFunc_sinh)
2813                               .Case("asinhf", LibFunc_sinhf)
2814                               .Case("asinhl", LibFunc_sinhl)
2815                               .Default(NumLibFuncs); // Used as error value
2816     if (Func == inverseFunc)
2817       Ret = OpC->getArgOperand(0);
2818   }
2819   return Ret;
2820 }
2821 
2822 static bool isTrigLibCall(CallInst *CI) {
2823   // We can only hope to do anything useful if we can ignore things like errno
2824   // and floating-point exceptions.
2825   // We already checked the prototype.
2826   return CI->doesNotThrow() && CI->doesNotAccessMemory();
2827 }
2828 
2829 static bool insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg,
2830                              bool UseFloat, Value *&Sin, Value *&Cos,
2831                              Value *&SinCos, const TargetLibraryInfo *TLI) {
2832   Module *M = OrigCallee->getParent();
2833   Type *ArgTy = Arg->getType();
2834   Type *ResTy;
2835   StringRef Name;
2836 
2837   Triple T(OrigCallee->getParent()->getTargetTriple());
2838   if (UseFloat) {
2839     Name = "__sincospif_stret";
2840 
2841     assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
2842     // x86_64 can't use {float, float} since that would be returned in both
2843     // xmm0 and xmm1, which isn't what a real struct would do.
2844     ResTy = T.getArch() == Triple::x86_64
2845                 ? static_cast<Type *>(FixedVectorType::get(ArgTy, 2))
2846                 : static_cast<Type *>(StructType::get(ArgTy, ArgTy));
2847   } else {
2848     Name = "__sincospi_stret";
2849     ResTy = StructType::get(ArgTy, ArgTy);
2850   }
2851 
2852   if (!isLibFuncEmittable(M, TLI, Name))
2853     return false;
2854   LibFunc TheLibFunc;
2855   TLI->getLibFunc(Name, TheLibFunc);
2856   FunctionCallee Callee = getOrInsertLibFunc(
2857       M, *TLI, TheLibFunc, OrigCallee->getAttributes(), ResTy, ArgTy);
2858 
2859   if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
2860     // If the argument is an instruction, it must dominate all uses so put our
2861     // sincos call there.
2862     B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
2863   } else {
2864     // Otherwise (e.g. for a constant) the beginning of the function is as
2865     // good a place as any.
2866     BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
2867     B.SetInsertPoint(&EntryBB, EntryBB.begin());
2868   }
2869 
2870   SinCos = B.CreateCall(Callee, Arg, "sincospi");
2871 
2872   if (SinCos->getType()->isStructTy()) {
2873     Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
2874     Cos = B.CreateExtractValue(SinCos, 1, "cospi");
2875   } else {
2876     Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
2877                                  "sinpi");
2878     Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
2879                                  "cospi");
2880   }
2881 
2882   return true;
2883 }
2884 
2885 static Value *optimizeSymmetricCall(CallInst *CI, bool IsEven,
2886                                     IRBuilderBase &B) {
2887   Value *X;
2888   Value *Src = CI->getArgOperand(0);
2889 
2890   if (match(Src, m_OneUse(m_FNeg(m_Value(X))))) {
2891     IRBuilderBase::FastMathFlagGuard Guard(B);
2892     B.setFastMathFlags(CI->getFastMathFlags());
2893 
2894     auto *CallInst = copyFlags(*CI, B.CreateCall(CI->getCalledFunction(), {X}));
2895     if (IsEven) {
2896       // Even function: f(-x) = f(x)
2897       return CallInst;
2898     }
2899     // Odd function: f(-x) = -f(x)
2900     return B.CreateFNeg(CallInst);
2901   }
2902 
2903   // Even function: f(abs(x)) = f(x), f(copysign(x, y)) = f(x)
2904   if (IsEven && (match(Src, m_FAbs(m_Value(X))) ||
2905                  match(Src, m_CopySign(m_Value(X), m_Value())))) {
2906     IRBuilderBase::FastMathFlagGuard Guard(B);
2907     B.setFastMathFlags(CI->getFastMathFlags());
2908 
2909     auto *CallInst = copyFlags(*CI, B.CreateCall(CI->getCalledFunction(), {X}));
2910     return CallInst;
2911   }
2912 
2913   return nullptr;
2914 }
2915 
2916 Value *LibCallSimplifier::optimizeSymmetric(CallInst *CI, LibFunc Func,
2917                                             IRBuilderBase &B) {
2918   switch (Func) {
2919   case LibFunc_cos:
2920   case LibFunc_cosf:
2921   case LibFunc_cosl:
2922     return optimizeSymmetricCall(CI, /*IsEven*/ true, B);
2923 
2924   case LibFunc_sin:
2925   case LibFunc_sinf:
2926   case LibFunc_sinl:
2927 
2928   case LibFunc_tan:
2929   case LibFunc_tanf:
2930   case LibFunc_tanl:
2931 
2932   case LibFunc_erf:
2933   case LibFunc_erff:
2934   case LibFunc_erfl:
2935     return optimizeSymmetricCall(CI, /*IsEven*/ false, B);
2936 
2937   default:
2938     return nullptr;
2939   }
2940 }
2941 
2942 Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBase &B) {
2943   // Make sure the prototype is as expected, otherwise the rest of the
2944   // function is probably invalid and likely to abort.
2945   if (!isTrigLibCall(CI))
2946     return nullptr;
2947 
2948   Value *Arg = CI->getArgOperand(0);
2949   SmallVector<CallInst *, 1> SinCalls;
2950   SmallVector<CallInst *, 1> CosCalls;
2951   SmallVector<CallInst *, 1> SinCosCalls;
2952 
2953   bool IsFloat = Arg->getType()->isFloatTy();
2954 
2955   // Look for all compatible sinpi, cospi and sincospi calls with the same
2956   // argument. If there are enough (in some sense) we can make the
2957   // substitution.
2958   Function *F = CI->getFunction();
2959   for (User *U : Arg->users())
2960     classifyArgUse(U, F, IsFloat, SinCalls, CosCalls, SinCosCalls);
2961 
2962   // It's only worthwhile if both sinpi and cospi are actually used.
2963   if (SinCalls.empty() || CosCalls.empty())
2964     return nullptr;
2965 
2966   Value *Sin, *Cos, *SinCos;
2967   if (!insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
2968                         SinCos, TLI))
2969     return nullptr;
2970 
2971   auto replaceTrigInsts = [this](SmallVectorImpl<CallInst *> &Calls,
2972                                  Value *Res) {
2973     for (CallInst *C : Calls)
2974       replaceAllUsesWith(C, Res);
2975   };
2976 
2977   replaceTrigInsts(SinCalls, Sin);
2978   replaceTrigInsts(CosCalls, Cos);
2979   replaceTrigInsts(SinCosCalls, SinCos);
2980 
2981   return IsSin ? Sin : Cos;
2982 }
2983 
2984 void LibCallSimplifier::classifyArgUse(
2985     Value *Val, Function *F, bool IsFloat,
2986     SmallVectorImpl<CallInst *> &SinCalls,
2987     SmallVectorImpl<CallInst *> &CosCalls,
2988     SmallVectorImpl<CallInst *> &SinCosCalls) {
2989   auto *CI = dyn_cast<CallInst>(Val);
2990   if (!CI || CI->use_empty())
2991     return;
2992 
2993   // Don't consider calls in other functions.
2994   if (CI->getFunction() != F)
2995     return;
2996 
2997   Module *M = CI->getModule();
2998   Function *Callee = CI->getCalledFunction();
2999   LibFunc Func;
3000   if (!Callee || !TLI->getLibFunc(*Callee, Func) ||
3001       !isLibFuncEmittable(M, TLI, Func) ||
3002       !isTrigLibCall(CI))
3003     return;
3004 
3005   if (IsFloat) {
3006     if (Func == LibFunc_sinpif)
3007       SinCalls.push_back(CI);
3008     else if (Func == LibFunc_cospif)
3009       CosCalls.push_back(CI);
3010     else if (Func == LibFunc_sincospif_stret)
3011       SinCosCalls.push_back(CI);
3012   } else {
3013     if (Func == LibFunc_sinpi)
3014       SinCalls.push_back(CI);
3015     else if (Func == LibFunc_cospi)
3016       CosCalls.push_back(CI);
3017     else if (Func == LibFunc_sincospi_stret)
3018       SinCosCalls.push_back(CI);
3019   }
3020 }
3021 
3022 /// Constant folds remquo
3023 Value *LibCallSimplifier::optimizeRemquo(CallInst *CI, IRBuilderBase &B) {
3024   const APFloat *X, *Y;
3025   if (!match(CI->getArgOperand(0), m_APFloat(X)) ||
3026       !match(CI->getArgOperand(1), m_APFloat(Y)))
3027     return nullptr;
3028 
3029   APFloat::opStatus Status;
3030   APFloat Quot = *X;
3031   Status = Quot.divide(*Y, APFloat::rmNearestTiesToEven);
3032   if (Status != APFloat::opOK && Status != APFloat::opInexact)
3033     return nullptr;
3034   APFloat Rem = *X;
3035   if (Rem.remainder(*Y) != APFloat::opOK)
3036     return nullptr;
3037 
3038   // TODO: We can only keep at least the three of the last bits of x/y
3039   unsigned IntBW = TLI->getIntSize();
3040   APSInt QuotInt(IntBW, /*isUnsigned=*/false);
3041   bool IsExact;
3042   Status =
3043       Quot.convertToInteger(QuotInt, APFloat::rmNearestTiesToEven, &IsExact);
3044   if (Status != APFloat::opOK && Status != APFloat::opInexact)
3045     return nullptr;
3046 
3047   B.CreateAlignedStore(
3048       ConstantInt::get(B.getIntNTy(IntBW), QuotInt.getExtValue()),
3049       CI->getArgOperand(2), CI->getParamAlign(2));
3050   return ConstantFP::get(CI->getType(), Rem);
3051 }
3052 
3053 //===----------------------------------------------------------------------===//
3054 // Integer Library Call Optimizations
3055 //===----------------------------------------------------------------------===//
3056 
3057 Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilderBase &B) {
3058   // All variants of ffs return int which need not be 32 bits wide.
3059   // ffs{,l,ll}(x) -> x != 0 ? (int)llvm.cttz(x)+1 : 0
3060   Type *RetType = CI->getType();
3061   Value *Op = CI->getArgOperand(0);
3062   Type *ArgType = Op->getType();
3063   Value *V = B.CreateIntrinsic(Intrinsic::cttz, {ArgType}, {Op, B.getTrue()},
3064                                nullptr, "cttz");
3065   V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
3066   V = B.CreateIntCast(V, RetType, false);
3067 
3068   Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
3069   return B.CreateSelect(Cond, V, ConstantInt::get(RetType, 0));
3070 }
3071 
3072 Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilderBase &B) {
3073   // All variants of fls return int which need not be 32 bits wide.
3074   // fls{,l,ll}(x) -> (int)(sizeInBits(x) - llvm.ctlz(x, false))
3075   Value *Op = CI->getArgOperand(0);
3076   Type *ArgType = Op->getType();
3077   Value *V = B.CreateIntrinsic(Intrinsic::ctlz, {ArgType}, {Op, B.getFalse()},
3078                                nullptr, "ctlz");
3079   V = B.CreateSub(ConstantInt::get(V->getType(), ArgType->getIntegerBitWidth()),
3080                   V);
3081   return B.CreateIntCast(V, CI->getType(), false);
3082 }
3083 
3084 Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilderBase &B) {
3085   // abs(x) -> x <s 0 ? -x : x
3086   // The negation has 'nsw' because abs of INT_MIN is undefined.
3087   Value *X = CI->getArgOperand(0);
3088   Value *IsNeg = B.CreateIsNeg(X);
3089   Value *NegX = B.CreateNSWNeg(X, "neg");
3090   return B.CreateSelect(IsNeg, NegX, X);
3091 }
3092 
3093 Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilderBase &B) {
3094   // isdigit(c) -> (c-'0') <u 10
3095   Value *Op = CI->getArgOperand(0);
3096   Type *ArgType = Op->getType();
3097   Op = B.CreateSub(Op, ConstantInt::get(ArgType, '0'), "isdigittmp");
3098   Op = B.CreateICmpULT(Op, ConstantInt::get(ArgType, 10), "isdigit");
3099   return B.CreateZExt(Op, CI->getType());
3100 }
3101 
3102 Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilderBase &B) {
3103   // isascii(c) -> c <u 128
3104   Value *Op = CI->getArgOperand(0);
3105   Type *ArgType = Op->getType();
3106   Op = B.CreateICmpULT(Op, ConstantInt::get(ArgType, 128), "isascii");
3107   return B.CreateZExt(Op, CI->getType());
3108 }
3109 
3110 Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilderBase &B) {
3111   // toascii(c) -> c & 0x7f
3112   return B.CreateAnd(CI->getArgOperand(0),
3113                      ConstantInt::get(CI->getType(), 0x7F));
3114 }
3115 
3116 // Fold calls to atoi, atol, and atoll.
3117 Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilderBase &B) {
3118   CI->addParamAttr(0, Attribute::NoCapture);
3119 
3120   StringRef Str;
3121   if (!getConstantStringInfo(CI->getArgOperand(0), Str))
3122     return nullptr;
3123 
3124   return convertStrToInt(CI, Str, nullptr, 10, /*AsSigned=*/true, B);
3125 }
3126 
3127 // Fold calls to strtol, strtoll, strtoul, and strtoull.
3128 Value *LibCallSimplifier::optimizeStrToInt(CallInst *CI, IRBuilderBase &B,
3129                                            bool AsSigned) {
3130   Value *EndPtr = CI->getArgOperand(1);
3131   if (isa<ConstantPointerNull>(EndPtr)) {
3132     // With a null EndPtr, this function won't capture the main argument.
3133     // It would be readonly too, except that it still may write to errno.
3134     CI->addParamAttr(0, Attribute::NoCapture);
3135     EndPtr = nullptr;
3136   } else if (!isKnownNonZero(EndPtr, DL))
3137     return nullptr;
3138 
3139   StringRef Str;
3140   if (!getConstantStringInfo(CI->getArgOperand(0), Str))
3141     return nullptr;
3142 
3143   if (ConstantInt *CInt = dyn_cast<ConstantInt>(CI->getArgOperand(2))) {
3144     return convertStrToInt(CI, Str, EndPtr, CInt->getSExtValue(), AsSigned, B);
3145   }
3146 
3147   return nullptr;
3148 }
3149 
3150 //===----------------------------------------------------------------------===//
3151 // Formatting and IO Library Call Optimizations
3152 //===----------------------------------------------------------------------===//
3153 
3154 static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg);
3155 
3156 Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilderBase &B,
3157                                                  int StreamArg) {
3158   Function *Callee = CI->getCalledFunction();
3159   // Error reporting calls should be cold, mark them as such.
3160   // This applies even to non-builtin calls: it is only a hint and applies to
3161   // functions that the frontend might not understand as builtins.
3162 
3163   // This heuristic was suggested in:
3164   // Improving Static Branch Prediction in a Compiler
3165   // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
3166   // Proceedings of PACT'98, Oct. 1998, IEEE
3167   if (!CI->hasFnAttr(Attribute::Cold) &&
3168       isReportingError(Callee, CI, StreamArg)) {
3169     CI->addFnAttr(Attribute::Cold);
3170   }
3171 
3172   return nullptr;
3173 }
3174 
3175 static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
3176   if (!Callee || !Callee->isDeclaration())
3177     return false;
3178 
3179   if (StreamArg < 0)
3180     return true;
3181 
3182   // These functions might be considered cold, but only if their stream
3183   // argument is stderr.
3184 
3185   if (StreamArg >= (int)CI->arg_size())
3186     return false;
3187   LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg));
3188   if (!LI)
3189     return false;
3190   GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand());
3191   if (!GV || !GV->isDeclaration())
3192     return false;
3193   return GV->getName() == "stderr";
3194 }
3195 
3196 Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
3197   // Check for a fixed format string.
3198   StringRef FormatStr;
3199   if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
3200     return nullptr;
3201 
3202   // Empty format string -> noop.
3203   if (FormatStr.empty()) // Tolerate printf's declared void.
3204     return CI->use_empty() ? (Value *)CI : ConstantInt::get(CI->getType(), 0);
3205 
3206   // Do not do any of the following transformations if the printf return value
3207   // is used, in general the printf return value is not compatible with either
3208   // putchar() or puts().
3209   if (!CI->use_empty())
3210     return nullptr;
3211 
3212   Type *IntTy = CI->getType();
3213   // printf("x") -> putchar('x'), even for "%" and "%%".
3214   if (FormatStr.size() == 1 || FormatStr == "%%") {
3215     // Convert the character to unsigned char before passing it to putchar
3216     // to avoid host-specific sign extension in the IR.  Putchar converts
3217     // it to unsigned char regardless.
3218     Value *IntChar = ConstantInt::get(IntTy, (unsigned char)FormatStr[0]);
3219     return copyFlags(*CI, emitPutChar(IntChar, B, TLI));
3220   }
3221 
3222   // Try to remove call or emit putchar/puts.
3223   if (FormatStr == "%s" && CI->arg_size() > 1) {
3224     StringRef OperandStr;
3225     if (!getConstantStringInfo(CI->getOperand(1), OperandStr))
3226       return nullptr;
3227     // printf("%s", "") --> NOP
3228     if (OperandStr.empty())
3229       return (Value *)CI;
3230     // printf("%s", "a") --> putchar('a')
3231     if (OperandStr.size() == 1) {
3232       // Convert the character to unsigned char before passing it to putchar
3233       // to avoid host-specific sign extension in the IR.  Putchar converts
3234       // it to unsigned char regardless.
3235       Value *IntChar = ConstantInt::get(IntTy, (unsigned char)OperandStr[0]);
3236       return copyFlags(*CI, emitPutChar(IntChar, B, TLI));
3237     }
3238     // printf("%s", str"\n") --> puts(str)
3239     if (OperandStr.back() == '\n') {
3240       OperandStr = OperandStr.drop_back();
3241       Value *GV = B.CreateGlobalString(OperandStr, "str");
3242       return copyFlags(*CI, emitPutS(GV, B, TLI));
3243     }
3244     return nullptr;
3245   }
3246 
3247   // printf("foo\n") --> puts("foo")
3248   if (FormatStr.back() == '\n' &&
3249       !FormatStr.contains('%')) { // No format characters.
3250     // Create a string literal with no \n on it.  We expect the constant merge
3251     // pass to be run after this pass, to merge duplicate strings.
3252     FormatStr = FormatStr.drop_back();
3253     Value *GV = B.CreateGlobalString(FormatStr, "str");
3254     return copyFlags(*CI, emitPutS(GV, B, TLI));
3255   }
3256 
3257   // Optimize specific format strings.
3258   // printf("%c", chr) --> putchar(chr)
3259   if (FormatStr == "%c" && CI->arg_size() > 1 &&
3260       CI->getArgOperand(1)->getType()->isIntegerTy()) {
3261     // Convert the argument to the type expected by putchar, i.e., int, which
3262     // need not be 32 bits wide but which is the same as printf's return type.
3263     Value *IntChar = B.CreateIntCast(CI->getArgOperand(1), IntTy, false);
3264     return copyFlags(*CI, emitPutChar(IntChar, B, TLI));
3265   }
3266 
3267   // printf("%s\n", str) --> puts(str)
3268   if (FormatStr == "%s\n" && CI->arg_size() > 1 &&
3269       CI->getArgOperand(1)->getType()->isPointerTy())
3270     return copyFlags(*CI, emitPutS(CI->getArgOperand(1), B, TLI));
3271   return nullptr;
3272 }
3273 
3274 Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) {
3275 
3276   Module *M = CI->getModule();
3277   Function *Callee = CI->getCalledFunction();
3278   FunctionType *FT = Callee->getFunctionType();
3279   if (Value *V = optimizePrintFString(CI, B)) {
3280     return V;
3281   }
3282 
3283   annotateNonNullNoUndefBasedOnAccess(CI, 0);
3284 
3285   // printf(format, ...) -> iprintf(format, ...) if no floating point
3286   // arguments.
3287   if (isLibFuncEmittable(M, TLI, LibFunc_iprintf) &&
3288       !callHasFloatingPointArgument(CI)) {
3289     FunctionCallee IPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_iprintf, FT,
3290                                                   Callee->getAttributes());
3291     CallInst *New = cast<CallInst>(CI->clone());
3292     New->setCalledFunction(IPrintFFn);
3293     B.Insert(New);
3294     return New;
3295   }
3296 
3297   // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point
3298   // arguments.
3299   if (isLibFuncEmittable(M, TLI, LibFunc_small_printf) &&
3300       !callHasFP128Argument(CI)) {
3301     auto SmallPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_small_printf, FT,
3302                                             Callee->getAttributes());
3303     CallInst *New = cast<CallInst>(CI->clone());
3304     New->setCalledFunction(SmallPrintFFn);
3305     B.Insert(New);
3306     return New;
3307   }
3308 
3309   return nullptr;
3310 }
3311 
3312 Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
3313                                                 IRBuilderBase &B) {
3314   // Check for a fixed format string.
3315   StringRef FormatStr;
3316   if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
3317     return nullptr;
3318 
3319   // If we just have a format string (nothing else crazy) transform it.
3320   Value *Dest = CI->getArgOperand(0);
3321   if (CI->arg_size() == 2) {
3322     // Make sure there's no % in the constant array.  We could try to handle
3323     // %% -> % in the future if we cared.
3324     if (FormatStr.contains('%'))
3325       return nullptr; // we found a format specifier, bail out.
3326 
3327     // sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1)
3328     B.CreateMemCpy(
3329         Dest, Align(1), CI->getArgOperand(1), Align(1),
3330         ConstantInt::get(DL.getIntPtrType(CI->getContext()),
3331                          FormatStr.size() + 1)); // Copy the null byte.
3332     return ConstantInt::get(CI->getType(), FormatStr.size());
3333   }
3334 
3335   // The remaining optimizations require the format string to be "%s" or "%c"
3336   // and have an extra operand.
3337   if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() < 3)
3338     return nullptr;
3339 
3340   // Decode the second character of the format string.
3341   if (FormatStr[1] == 'c') {
3342     // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
3343     if (!CI->getArgOperand(2)->getType()->isIntegerTy())
3344       return nullptr;
3345     Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
3346     Value *Ptr = Dest;
3347     B.CreateStore(V, Ptr);
3348     Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
3349     B.CreateStore(B.getInt8(0), Ptr);
3350 
3351     return ConstantInt::get(CI->getType(), 1);
3352   }
3353 
3354   if (FormatStr[1] == 's') {
3355     // sprintf(dest, "%s", str) -> llvm.memcpy(align 1 dest, align 1 str,
3356     // strlen(str)+1)
3357     if (!CI->getArgOperand(2)->getType()->isPointerTy())
3358       return nullptr;
3359 
3360     if (CI->use_empty())
3361       // sprintf(dest, "%s", str) -> strcpy(dest, str)
3362       return copyFlags(*CI, emitStrCpy(Dest, CI->getArgOperand(2), B, TLI));
3363 
3364     uint64_t SrcLen = GetStringLength(CI->getArgOperand(2));
3365     if (SrcLen) {
3366       B.CreateMemCpy(
3367           Dest, Align(1), CI->getArgOperand(2), Align(1),
3368           ConstantInt::get(DL.getIntPtrType(CI->getContext()), SrcLen));
3369       // Returns total number of characters written without null-character.
3370       return ConstantInt::get(CI->getType(), SrcLen - 1);
3371     } else if (Value *V = emitStpCpy(Dest, CI->getArgOperand(2), B, TLI)) {
3372       // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest
3373       Value *PtrDiff = B.CreatePtrDiff(B.getInt8Ty(), V, Dest);
3374       return B.CreateIntCast(PtrDiff, CI->getType(), false);
3375     }
3376 
3377     bool OptForSize = CI->getFunction()->hasOptSize() ||
3378                       llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
3379                                                   PGSOQueryType::IRPass);
3380     if (OptForSize)
3381       return nullptr;
3382 
3383     Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI);
3384     if (!Len)
3385       return nullptr;
3386     Value *IncLen =
3387         B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc");
3388     B.CreateMemCpy(Dest, Align(1), CI->getArgOperand(2), Align(1), IncLen);
3389 
3390     // The sprintf result is the unincremented number of bytes in the string.
3391     return B.CreateIntCast(Len, CI->getType(), false);
3392   }
3393   return nullptr;
3394 }
3395 
3396 Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) {
3397   Module *M = CI->getModule();
3398   Function *Callee = CI->getCalledFunction();
3399   FunctionType *FT = Callee->getFunctionType();
3400   if (Value *V = optimizeSPrintFString(CI, B)) {
3401     return V;
3402   }
3403 
3404   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
3405 
3406   // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
3407   // point arguments.
3408   if (isLibFuncEmittable(M, TLI, LibFunc_siprintf) &&
3409       !callHasFloatingPointArgument(CI)) {
3410     FunctionCallee SIPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_siprintf,
3411                                                    FT, Callee->getAttributes());
3412     CallInst *New = cast<CallInst>(CI->clone());
3413     New->setCalledFunction(SIPrintFFn);
3414     B.Insert(New);
3415     return New;
3416   }
3417 
3418   // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit
3419   // floating point arguments.
3420   if (isLibFuncEmittable(M, TLI, LibFunc_small_sprintf) &&
3421       !callHasFP128Argument(CI)) {
3422     auto SmallSPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_small_sprintf, FT,
3423                                              Callee->getAttributes());
3424     CallInst *New = cast<CallInst>(CI->clone());
3425     New->setCalledFunction(SmallSPrintFFn);
3426     B.Insert(New);
3427     return New;
3428   }
3429 
3430   return nullptr;
3431 }
3432 
3433 // Transform an snprintf call CI with the bound N to format the string Str
3434 // either to a call to memcpy, or to single character a store, or to nothing,
3435 // and fold the result to a constant.  A nonnull StrArg refers to the string
3436 // argument being formatted.  Otherwise the call is one with N < 2 and
3437 // the "%c" directive to format a single character.
3438 Value *LibCallSimplifier::emitSnPrintfMemCpy(CallInst *CI, Value *StrArg,
3439                                              StringRef Str, uint64_t N,
3440                                              IRBuilderBase &B) {
3441   assert(StrArg || (N < 2 && Str.size() == 1));
3442 
3443   unsigned IntBits = TLI->getIntSize();
3444   uint64_t IntMax = maxIntN(IntBits);
3445   if (Str.size() > IntMax)
3446     // Bail if the string is longer than INT_MAX.  POSIX requires
3447     // implementations to set errno to EOVERFLOW in this case, in
3448     // addition to when N is larger than that (checked by the caller).
3449     return nullptr;
3450 
3451   Value *StrLen = ConstantInt::get(CI->getType(), Str.size());
3452   if (N == 0)
3453     return StrLen;
3454 
3455   // Set to the number of bytes to copy fron StrArg which is also
3456   // the offset of the terinating nul.
3457   uint64_t NCopy;
3458   if (N > Str.size())
3459     // Copy the full string, including the terminating nul (which must
3460     // be present regardless of the bound).
3461     NCopy = Str.size() + 1;
3462   else
3463     NCopy = N - 1;
3464 
3465   Value *DstArg = CI->getArgOperand(0);
3466   if (NCopy && StrArg)
3467     // Transform the call to lvm.memcpy(dst, fmt, N).
3468     copyFlags(
3469          *CI,
3470           B.CreateMemCpy(
3471                          DstArg, Align(1), StrArg, Align(1),
3472               ConstantInt::get(DL.getIntPtrType(CI->getContext()), NCopy)));
3473 
3474   if (N > Str.size())
3475     // Return early when the whole format string, including the final nul,
3476     // has been copied.
3477     return StrLen;
3478 
3479   // Otherwise, when truncating the string append a terminating nul.
3480   Type *Int8Ty = B.getInt8Ty();
3481   Value *NulOff = B.getIntN(IntBits, NCopy);
3482   Value *DstEnd = B.CreateInBoundsGEP(Int8Ty, DstArg, NulOff, "endptr");
3483   B.CreateStore(ConstantInt::get(Int8Ty, 0), DstEnd);
3484   return StrLen;
3485 }
3486 
3487 Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI,
3488                                                  IRBuilderBase &B) {
3489   // Check for size
3490   ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
3491   if (!Size)
3492     return nullptr;
3493 
3494   uint64_t N = Size->getZExtValue();
3495   uint64_t IntMax = maxIntN(TLI->getIntSize());
3496   if (N > IntMax)
3497     // Bail if the bound exceeds INT_MAX.  POSIX requires implementations
3498     // to set errno to EOVERFLOW in this case.
3499     return nullptr;
3500 
3501   Value *DstArg = CI->getArgOperand(0);
3502   Value *FmtArg = CI->getArgOperand(2);
3503 
3504   // Check for a fixed format string.
3505   StringRef FormatStr;
3506   if (!getConstantStringInfo(FmtArg, FormatStr))
3507     return nullptr;
3508 
3509   // If we just have a format string (nothing else crazy) transform it.
3510   if (CI->arg_size() == 3) {
3511     if (FormatStr.contains('%'))
3512       // Bail if the format string contains a directive and there are
3513       // no arguments.  We could handle "%%" in the future.
3514       return nullptr;
3515 
3516     return emitSnPrintfMemCpy(CI, FmtArg, FormatStr, N, B);
3517   }
3518 
3519   // The remaining optimizations require the format string to be "%s" or "%c"
3520   // and have an extra operand.
3521   if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() != 4)
3522     return nullptr;
3523 
3524   // Decode the second character of the format string.
3525   if (FormatStr[1] == 'c') {
3526     if (N <= 1) {
3527       // Use an arbitary string of length 1 to transform the call into
3528       // either a nul store (N == 1) or a no-op (N == 0) and fold it
3529       // to one.
3530       StringRef CharStr("*");
3531       return emitSnPrintfMemCpy(CI, nullptr, CharStr, N, B);
3532     }
3533 
3534     // snprintf(dst, size, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
3535     if (!CI->getArgOperand(3)->getType()->isIntegerTy())
3536       return nullptr;
3537     Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char");
3538     Value *Ptr = DstArg;
3539     B.CreateStore(V, Ptr);
3540     Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
3541     B.CreateStore(B.getInt8(0), Ptr);
3542     return ConstantInt::get(CI->getType(), 1);
3543   }
3544 
3545   if (FormatStr[1] != 's')
3546     return nullptr;
3547 
3548   Value *StrArg = CI->getArgOperand(3);
3549   // snprintf(dest, size, "%s", str) to llvm.memcpy(dest, str, len+1, 1)
3550   StringRef Str;
3551   if (!getConstantStringInfo(StrArg, Str))
3552     return nullptr;
3553 
3554   return emitSnPrintfMemCpy(CI, StrArg, Str, N, B);
3555 }
3556 
3557 Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilderBase &B) {
3558   if (Value *V = optimizeSnPrintFString(CI, B)) {
3559     return V;
3560   }
3561 
3562   if (isKnownNonZero(CI->getOperand(1), DL))
3563     annotateNonNullNoUndefBasedOnAccess(CI, 0);
3564   return nullptr;
3565 }
3566 
3567 Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI,
3568                                                 IRBuilderBase &B) {
3569   optimizeErrorReporting(CI, B, 0);
3570 
3571   // All the optimizations depend on the format string.
3572   StringRef FormatStr;
3573   if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
3574     return nullptr;
3575 
3576   // Do not do any of the following transformations if the fprintf return
3577   // value is used, in general the fprintf return value is not compatible
3578   // with fwrite(), fputc() or fputs().
3579   if (!CI->use_empty())
3580     return nullptr;
3581 
3582   // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
3583   if (CI->arg_size() == 2) {
3584     // Could handle %% -> % if we cared.
3585     if (FormatStr.contains('%'))
3586       return nullptr; // We found a format specifier.
3587 
3588     unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
3589     Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
3590     return copyFlags(
3591         *CI, emitFWrite(CI->getArgOperand(1),
3592                         ConstantInt::get(SizeTTy, FormatStr.size()),
3593                         CI->getArgOperand(0), B, DL, TLI));
3594   }
3595 
3596   // The remaining optimizations require the format string to be "%s" or "%c"
3597   // and have an extra operand.
3598   if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() < 3)
3599     return nullptr;
3600 
3601   // Decode the second character of the format string.
3602   if (FormatStr[1] == 'c') {
3603     // fprintf(F, "%c", chr) --> fputc((int)chr, F)
3604     if (!CI->getArgOperand(2)->getType()->isIntegerTy())
3605       return nullptr;
3606     Type *IntTy = B.getIntNTy(TLI->getIntSize());
3607     Value *V = B.CreateIntCast(CI->getArgOperand(2), IntTy, /*isSigned*/ true,
3608                                "chari");
3609     return copyFlags(*CI, emitFPutC(V, CI->getArgOperand(0), B, TLI));
3610   }
3611 
3612   if (FormatStr[1] == 's') {
3613     // fprintf(F, "%s", str) --> fputs(str, F)
3614     if (!CI->getArgOperand(2)->getType()->isPointerTy())
3615       return nullptr;
3616     return copyFlags(
3617         *CI, emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI));
3618   }
3619   return nullptr;
3620 }
3621 
3622 Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilderBase &B) {
3623   Module *M = CI->getModule();
3624   Function *Callee = CI->getCalledFunction();
3625   FunctionType *FT = Callee->getFunctionType();
3626   if (Value *V = optimizeFPrintFString(CI, B)) {
3627     return V;
3628   }
3629 
3630   // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
3631   // floating point arguments.
3632   if (isLibFuncEmittable(M, TLI, LibFunc_fiprintf) &&
3633       !callHasFloatingPointArgument(CI)) {
3634     FunctionCallee FIPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_fiprintf,
3635                                                    FT, Callee->getAttributes());
3636     CallInst *New = cast<CallInst>(CI->clone());
3637     New->setCalledFunction(FIPrintFFn);
3638     B.Insert(New);
3639     return New;
3640   }
3641 
3642   // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no
3643   // 128-bit floating point arguments.
3644   if (isLibFuncEmittable(M, TLI, LibFunc_small_fprintf) &&
3645       !callHasFP128Argument(CI)) {
3646     auto SmallFPrintFFn =
3647         getOrInsertLibFunc(M, *TLI, LibFunc_small_fprintf, FT,
3648                            Callee->getAttributes());
3649     CallInst *New = cast<CallInst>(CI->clone());
3650     New->setCalledFunction(SmallFPrintFFn);
3651     B.Insert(New);
3652     return New;
3653   }
3654 
3655   return nullptr;
3656 }
3657 
3658 Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilderBase &B) {
3659   optimizeErrorReporting(CI, B, 3);
3660 
3661   // Get the element size and count.
3662   ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
3663   ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
3664   if (SizeC && CountC) {
3665     uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue();
3666 
3667     // If this is writing zero records, remove the call (it's a noop).
3668     if (Bytes == 0)
3669       return ConstantInt::get(CI->getType(), 0);
3670 
3671     // If this is writing one byte, turn it into fputc.
3672     // This optimisation is only valid, if the return value is unused.
3673     if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
3674       Value *Char = B.CreateLoad(B.getInt8Ty(), CI->getArgOperand(0), "char");
3675       Type *IntTy = B.getIntNTy(TLI->getIntSize());
3676       Value *Cast = B.CreateIntCast(Char, IntTy, /*isSigned*/ true, "chari");
3677       Value *NewCI = emitFPutC(Cast, CI->getArgOperand(3), B, TLI);
3678       return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
3679     }
3680   }
3681 
3682   return nullptr;
3683 }
3684 
3685 Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilderBase &B) {
3686   optimizeErrorReporting(CI, B, 1);
3687 
3688   // Don't rewrite fputs to fwrite when optimising for size because fwrite
3689   // requires more arguments and thus extra MOVs are required.
3690   bool OptForSize = CI->getFunction()->hasOptSize() ||
3691                     llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
3692                                                 PGSOQueryType::IRPass);
3693   if (OptForSize)
3694     return nullptr;
3695 
3696   // We can't optimize if return value is used.
3697   if (!CI->use_empty())
3698     return nullptr;
3699 
3700   // fputs(s,F) --> fwrite(s,strlen(s),1,F)
3701   uint64_t Len = GetStringLength(CI->getArgOperand(0));
3702   if (!Len)
3703     return nullptr;
3704 
3705   // Known to have no uses (see above).
3706   unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
3707   Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
3708   return copyFlags(
3709       *CI,
3710       emitFWrite(CI->getArgOperand(0),
3711                  ConstantInt::get(SizeTTy, Len - 1),
3712                  CI->getArgOperand(1), B, DL, TLI));
3713 }
3714 
3715 Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilderBase &B) {
3716   annotateNonNullNoUndefBasedOnAccess(CI, 0);
3717   if (!CI->use_empty())
3718     return nullptr;
3719 
3720   // Check for a constant string.
3721   // puts("") -> putchar('\n')
3722   StringRef Str;
3723   if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty()) {
3724     // putchar takes an argument of the same type as puts returns, i.e.,
3725     // int, which need not be 32 bits wide.
3726     Type *IntTy = CI->getType();
3727     return copyFlags(*CI, emitPutChar(ConstantInt::get(IntTy, '\n'), B, TLI));
3728   }
3729 
3730   return nullptr;
3731 }
3732 
3733 Value *LibCallSimplifier::optimizeExit(CallInst *CI) {
3734 
3735   // Mark 'exit' as cold if its not exit(0) (success).
3736   const APInt *C;
3737   if (!CI->hasFnAttr(Attribute::Cold) &&
3738       match(CI->getArgOperand(0), m_APInt(C)) && !C->isZero()) {
3739     CI->addFnAttr(Attribute::Cold);
3740   }
3741   return nullptr;
3742 }
3743 
3744 Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilderBase &B) {
3745   // bcopy(src, dst, n) -> llvm.memmove(dst, src, n)
3746   return copyFlags(*CI, B.CreateMemMove(CI->getArgOperand(1), Align(1),
3747                                         CI->getArgOperand(0), Align(1),
3748                                         CI->getArgOperand(2)));
3749 }
3750 
3751 bool LibCallSimplifier::hasFloatVersion(const Module *M, StringRef FuncName) {
3752   SmallString<20> FloatFuncName = FuncName;
3753   FloatFuncName += 'f';
3754   return isLibFuncEmittable(M, TLI, FloatFuncName);
3755 }
3756 
3757 Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
3758                                                       IRBuilderBase &Builder) {
3759   Module *M = CI->getModule();
3760   LibFunc Func;
3761   Function *Callee = CI->getCalledFunction();
3762   // Check for string/memory library functions.
3763   if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) {
3764     // Make sure we never change the calling convention.
3765     assert(
3766         (ignoreCallingConv(Func) ||
3767          TargetLibraryInfoImpl::isCallingConvCCompatible(CI)) &&
3768         "Optimizing string/memory libcall would change the calling convention");
3769     switch (Func) {
3770     case LibFunc_strcat:
3771       return optimizeStrCat(CI, Builder);
3772     case LibFunc_strncat:
3773       return optimizeStrNCat(CI, Builder);
3774     case LibFunc_strchr:
3775       return optimizeStrChr(CI, Builder);
3776     case LibFunc_strrchr:
3777       return optimizeStrRChr(CI, Builder);
3778     case LibFunc_strcmp:
3779       return optimizeStrCmp(CI, Builder);
3780     case LibFunc_strncmp:
3781       return optimizeStrNCmp(CI, Builder);
3782     case LibFunc_strcpy:
3783       return optimizeStrCpy(CI, Builder);
3784     case LibFunc_stpcpy:
3785       return optimizeStpCpy(CI, Builder);
3786     case LibFunc_strlcpy:
3787       return optimizeStrLCpy(CI, Builder);
3788     case LibFunc_stpncpy:
3789       return optimizeStringNCpy(CI, /*RetEnd=*/true, Builder);
3790     case LibFunc_strncpy:
3791       return optimizeStringNCpy(CI, /*RetEnd=*/false, Builder);
3792     case LibFunc_strlen:
3793       return optimizeStrLen(CI, Builder);
3794     case LibFunc_strnlen:
3795       return optimizeStrNLen(CI, Builder);
3796     case LibFunc_strpbrk:
3797       return optimizeStrPBrk(CI, Builder);
3798     case LibFunc_strndup:
3799       return optimizeStrNDup(CI, Builder);
3800     case LibFunc_strtol:
3801     case LibFunc_strtod:
3802     case LibFunc_strtof:
3803     case LibFunc_strtoul:
3804     case LibFunc_strtoll:
3805     case LibFunc_strtold:
3806     case LibFunc_strtoull:
3807       return optimizeStrTo(CI, Builder);
3808     case LibFunc_strspn:
3809       return optimizeStrSpn(CI, Builder);
3810     case LibFunc_strcspn:
3811       return optimizeStrCSpn(CI, Builder);
3812     case LibFunc_strstr:
3813       return optimizeStrStr(CI, Builder);
3814     case LibFunc_memchr:
3815       return optimizeMemChr(CI, Builder);
3816     case LibFunc_memrchr:
3817       return optimizeMemRChr(CI, Builder);
3818     case LibFunc_bcmp:
3819       return optimizeBCmp(CI, Builder);
3820     case LibFunc_memcmp:
3821       return optimizeMemCmp(CI, Builder);
3822     case LibFunc_memcpy:
3823       return optimizeMemCpy(CI, Builder);
3824     case LibFunc_memccpy:
3825       return optimizeMemCCpy(CI, Builder);
3826     case LibFunc_mempcpy:
3827       return optimizeMemPCpy(CI, Builder);
3828     case LibFunc_memmove:
3829       return optimizeMemMove(CI, Builder);
3830     case LibFunc_memset:
3831       return optimizeMemSet(CI, Builder);
3832     case LibFunc_realloc:
3833       return optimizeRealloc(CI, Builder);
3834     case LibFunc_wcslen:
3835       return optimizeWcslen(CI, Builder);
3836     case LibFunc_bcopy:
3837       return optimizeBCopy(CI, Builder);
3838     case LibFunc_Znwm:
3839     case LibFunc_ZnwmRKSt9nothrow_t:
3840     case LibFunc_ZnwmSt11align_val_t:
3841     case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
3842     case LibFunc_Znam:
3843     case LibFunc_ZnamRKSt9nothrow_t:
3844     case LibFunc_ZnamSt11align_val_t:
3845     case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
3846     case LibFunc_Znwm12__hot_cold_t:
3847     case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
3848     case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
3849     case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
3850     case LibFunc_Znam12__hot_cold_t:
3851     case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
3852     case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
3853     case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
3854       return optimizeNew(CI, Builder, Func);
3855     default:
3856       break;
3857     }
3858   }
3859   return nullptr;
3860 }
3861 
3862 /// Constant folding nan/nanf/nanl.
3863 static Value *optimizeNaN(CallInst *CI) {
3864   StringRef CharSeq;
3865   if (!getConstantStringInfo(CI->getArgOperand(0), CharSeq))
3866     return nullptr;
3867 
3868   APInt Fill;
3869   // Treat empty strings as if they were zero.
3870   if (CharSeq.empty())
3871     Fill = APInt(32, 0);
3872   else if (CharSeq.getAsInteger(0, Fill))
3873     return nullptr;
3874 
3875   return ConstantFP::getQNaN(CI->getType(), /*Negative=*/false, &Fill);
3876 }
3877 
3878 Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
3879                                                        LibFunc Func,
3880                                                        IRBuilderBase &Builder) {
3881   const Module *M = CI->getModule();
3882 
3883   // Don't optimize calls that require strict floating point semantics.
3884   if (CI->isStrictFP())
3885     return nullptr;
3886 
3887   if (Value *V = optimizeSymmetric(CI, Func, Builder))
3888     return V;
3889 
3890   switch (Func) {
3891   case LibFunc_sinpif:
3892   case LibFunc_sinpi:
3893     return optimizeSinCosPi(CI, /*IsSin*/true, Builder);
3894   case LibFunc_cospif:
3895   case LibFunc_cospi:
3896     return optimizeSinCosPi(CI, /*IsSin*/false, Builder);
3897   case LibFunc_powf:
3898   case LibFunc_pow:
3899   case LibFunc_powl:
3900     return optimizePow(CI, Builder);
3901   case LibFunc_exp2l:
3902   case LibFunc_exp2:
3903   case LibFunc_exp2f:
3904     return optimizeExp2(CI, Builder);
3905   case LibFunc_fabsf:
3906   case LibFunc_fabs:
3907   case LibFunc_fabsl:
3908     return replaceUnaryCall(CI, Builder, Intrinsic::fabs);
3909   case LibFunc_sqrtf:
3910   case LibFunc_sqrt:
3911   case LibFunc_sqrtl:
3912     return optimizeSqrt(CI, Builder);
3913   case LibFunc_logf:
3914   case LibFunc_log:
3915   case LibFunc_logl:
3916   case LibFunc_log10f:
3917   case LibFunc_log10:
3918   case LibFunc_log10l:
3919   case LibFunc_log1pf:
3920   case LibFunc_log1p:
3921   case LibFunc_log1pl:
3922   case LibFunc_log2f:
3923   case LibFunc_log2:
3924   case LibFunc_log2l:
3925   case LibFunc_logbf:
3926   case LibFunc_logb:
3927   case LibFunc_logbl:
3928     return optimizeLog(CI, Builder);
3929   case LibFunc_tan:
3930   case LibFunc_tanf:
3931   case LibFunc_tanl:
3932   case LibFunc_sinh:
3933   case LibFunc_sinhf:
3934   case LibFunc_sinhl:
3935   case LibFunc_asinh:
3936   case LibFunc_asinhf:
3937   case LibFunc_asinhl:
3938   case LibFunc_cosh:
3939   case LibFunc_coshf:
3940   case LibFunc_coshl:
3941   case LibFunc_atanh:
3942   case LibFunc_atanhf:
3943   case LibFunc_atanhl:
3944     return optimizeTrigInversionPairs(CI, Builder);
3945   case LibFunc_ceil:
3946     return replaceUnaryCall(CI, Builder, Intrinsic::ceil);
3947   case LibFunc_floor:
3948     return replaceUnaryCall(CI, Builder, Intrinsic::floor);
3949   case LibFunc_round:
3950     return replaceUnaryCall(CI, Builder, Intrinsic::round);
3951   case LibFunc_roundeven:
3952     return replaceUnaryCall(CI, Builder, Intrinsic::roundeven);
3953   case LibFunc_nearbyint:
3954     return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint);
3955   case LibFunc_rint:
3956     return replaceUnaryCall(CI, Builder, Intrinsic::rint);
3957   case LibFunc_trunc:
3958     return replaceUnaryCall(CI, Builder, Intrinsic::trunc);
3959   case LibFunc_acos:
3960   case LibFunc_acosh:
3961   case LibFunc_asin:
3962   case LibFunc_atan:
3963   case LibFunc_cbrt:
3964   case LibFunc_exp:
3965   case LibFunc_exp10:
3966   case LibFunc_expm1:
3967   case LibFunc_cos:
3968   case LibFunc_sin:
3969   case LibFunc_tanh:
3970     if (UnsafeFPShrink && hasFloatVersion(M, CI->getCalledFunction()->getName()))
3971       return optimizeUnaryDoubleFP(CI, Builder, TLI, true);
3972     return nullptr;
3973   case LibFunc_copysign:
3974     if (hasFloatVersion(M, CI->getCalledFunction()->getName()))
3975       return optimizeBinaryDoubleFP(CI, Builder, TLI);
3976     return nullptr;
3977   case LibFunc_fminf:
3978   case LibFunc_fmin:
3979   case LibFunc_fminl:
3980   case LibFunc_fmaxf:
3981   case LibFunc_fmax:
3982   case LibFunc_fmaxl:
3983     return optimizeFMinFMax(CI, Builder);
3984   case LibFunc_cabs:
3985   case LibFunc_cabsf:
3986   case LibFunc_cabsl:
3987     return optimizeCAbs(CI, Builder);
3988   case LibFunc_remquo:
3989   case LibFunc_remquof:
3990   case LibFunc_remquol:
3991     return optimizeRemquo(CI, Builder);
3992   case LibFunc_nan:
3993   case LibFunc_nanf:
3994   case LibFunc_nanl:
3995     return optimizeNaN(CI);
3996   default:
3997     return nullptr;
3998   }
3999 }
4000 
4001 Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
4002   Module *M = CI->getModule();
4003   assert(!CI->isMustTailCall() && "These transforms aren't musttail safe.");
4004 
4005   // TODO: Split out the code below that operates on FP calls so that
4006   //       we can all non-FP calls with the StrictFP attribute to be
4007   //       optimized.
4008   if (CI->isNoBuiltin())
4009     return nullptr;
4010 
4011   LibFunc Func;
4012   Function *Callee = CI->getCalledFunction();
4013   bool IsCallingConvC = TargetLibraryInfoImpl::isCallingConvCCompatible(CI);
4014 
4015   SmallVector<OperandBundleDef, 2> OpBundles;
4016   CI->getOperandBundlesAsDefs(OpBundles);
4017 
4018   IRBuilderBase::OperandBundlesGuard Guard(Builder);
4019   Builder.setDefaultOperandBundles(OpBundles);
4020 
4021   // Command-line parameter overrides instruction attribute.
4022   // This can't be moved to optimizeFloatingPointLibCall() because it may be
4023   // used by the intrinsic optimizations.
4024   if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
4025     UnsafeFPShrink = EnableUnsafeFPShrink;
4026   else if (isa<FPMathOperator>(CI) && CI->isFast())
4027     UnsafeFPShrink = true;
4028 
4029   // First, check for intrinsics.
4030   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
4031     if (!IsCallingConvC)
4032       return nullptr;
4033     // The FP intrinsics have corresponding constrained versions so we don't
4034     // need to check for the StrictFP attribute here.
4035     switch (II->getIntrinsicID()) {
4036     case Intrinsic::pow:
4037       return optimizePow(CI, Builder);
4038     case Intrinsic::exp2:
4039       return optimizeExp2(CI, Builder);
4040     case Intrinsic::log:
4041     case Intrinsic::log2:
4042     case Intrinsic::log10:
4043       return optimizeLog(CI, Builder);
4044     case Intrinsic::sqrt:
4045       return optimizeSqrt(CI, Builder);
4046     case Intrinsic::memset:
4047       return optimizeMemSet(CI, Builder);
4048     case Intrinsic::memcpy:
4049       return optimizeMemCpy(CI, Builder);
4050     case Intrinsic::memmove:
4051       return optimizeMemMove(CI, Builder);
4052     default:
4053       return nullptr;
4054     }
4055   }
4056 
4057   // Also try to simplify calls to fortified library functions.
4058   if (Value *SimplifiedFortifiedCI =
4059           FortifiedSimplifier.optimizeCall(CI, Builder))
4060     return SimplifiedFortifiedCI;
4061 
4062   // Then check for known library functions.
4063   if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) {
4064     // We never change the calling convention.
4065     if (!ignoreCallingConv(Func) && !IsCallingConvC)
4066       return nullptr;
4067     if (Value *V = optimizeStringMemoryLibCall(CI, Builder))
4068       return V;
4069     if (Value *V = optimizeFloatingPointLibCall(CI, Func, Builder))
4070       return V;
4071     switch (Func) {
4072     case LibFunc_ffs:
4073     case LibFunc_ffsl:
4074     case LibFunc_ffsll:
4075       return optimizeFFS(CI, Builder);
4076     case LibFunc_fls:
4077     case LibFunc_flsl:
4078     case LibFunc_flsll:
4079       return optimizeFls(CI, Builder);
4080     case LibFunc_abs:
4081     case LibFunc_labs:
4082     case LibFunc_llabs:
4083       return optimizeAbs(CI, Builder);
4084     case LibFunc_isdigit:
4085       return optimizeIsDigit(CI, Builder);
4086     case LibFunc_isascii:
4087       return optimizeIsAscii(CI, Builder);
4088     case LibFunc_toascii:
4089       return optimizeToAscii(CI, Builder);
4090     case LibFunc_atoi:
4091     case LibFunc_atol:
4092     case LibFunc_atoll:
4093       return optimizeAtoi(CI, Builder);
4094     case LibFunc_strtol:
4095     case LibFunc_strtoll:
4096       return optimizeStrToInt(CI, Builder, /*AsSigned=*/true);
4097     case LibFunc_strtoul:
4098     case LibFunc_strtoull:
4099       return optimizeStrToInt(CI, Builder, /*AsSigned=*/false);
4100     case LibFunc_printf:
4101       return optimizePrintF(CI, Builder);
4102     case LibFunc_sprintf:
4103       return optimizeSPrintF(CI, Builder);
4104     case LibFunc_snprintf:
4105       return optimizeSnPrintF(CI, Builder);
4106     case LibFunc_fprintf:
4107       return optimizeFPrintF(CI, Builder);
4108     case LibFunc_fwrite:
4109       return optimizeFWrite(CI, Builder);
4110     case LibFunc_fputs:
4111       return optimizeFPuts(CI, Builder);
4112     case LibFunc_puts:
4113       return optimizePuts(CI, Builder);
4114     case LibFunc_perror:
4115       return optimizeErrorReporting(CI, Builder);
4116     case LibFunc_vfprintf:
4117     case LibFunc_fiprintf:
4118       return optimizeErrorReporting(CI, Builder, 0);
4119     case LibFunc_exit:
4120     case LibFunc_Exit:
4121       return optimizeExit(CI);
4122     default:
4123       return nullptr;
4124     }
4125   }
4126   return nullptr;
4127 }
4128 
4129 LibCallSimplifier::LibCallSimplifier(
4130     const DataLayout &DL, const TargetLibraryInfo *TLI, AssumptionCache *AC,
4131     OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
4132     ProfileSummaryInfo *PSI,
4133     function_ref<void(Instruction *, Value *)> Replacer,
4134     function_ref<void(Instruction *)> Eraser)
4135     : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), AC(AC), ORE(ORE), BFI(BFI),
4136       PSI(PSI), Replacer(Replacer), Eraser(Eraser) {}
4137 
4138 void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
4139   // Indirect through the replacer used in this instance.
4140   Replacer(I, With);
4141 }
4142 
4143 void LibCallSimplifier::eraseFromParent(Instruction *I) {
4144   Eraser(I);
4145 }
4146 
4147 // TODO:
4148 //   Additional cases that we need to add to this file:
4149 //
4150 // cbrt:
4151 //   * cbrt(expN(X))  -> expN(x/3)
4152 //   * cbrt(sqrt(x))  -> pow(x,1/6)
4153 //   * cbrt(cbrt(x))  -> pow(x,1/9)
4154 //
4155 // exp, expf, expl:
4156 //   * exp(log(x))  -> x
4157 //
4158 // log, logf, logl:
4159 //   * log(exp(x))   -> x
4160 //   * log(exp(y))   -> y*log(e)
4161 //   * log(exp10(y)) -> y*log(10)
4162 //   * log(sqrt(x))  -> 0.5*log(x)
4163 //
4164 // pow, powf, powl:
4165 //   * pow(sqrt(x),y) -> pow(x,y*0.5)
4166 //   * pow(pow(x,y),z)-> pow(x,y*z)
4167 //
4168 // signbit:
4169 //   * signbit(cnst) -> cnst'
4170 //   * signbit(nncst) -> 0 (if pstv is a non-negative constant)
4171 //
4172 // sqrt, sqrtf, sqrtl:
4173 //   * sqrt(expN(x))  -> expN(x*0.5)
4174 //   * sqrt(Nroot(x)) -> pow(x,1/(2*N))
4175 //   * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
4176 //
4177 
4178 //===----------------------------------------------------------------------===//
4179 // Fortified Library Call Optimizations
4180 //===----------------------------------------------------------------------===//
4181 
4182 bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(
4183     CallInst *CI, unsigned ObjSizeOp, std::optional<unsigned> SizeOp,
4184     std::optional<unsigned> StrOp, std::optional<unsigned> FlagOp) {
4185   // If this function takes a flag argument, the implementation may use it to
4186   // perform extra checks. Don't fold into the non-checking variant.
4187   if (FlagOp) {
4188     ConstantInt *Flag = dyn_cast<ConstantInt>(CI->getArgOperand(*FlagOp));
4189     if (!Flag || !Flag->isZero())
4190       return false;
4191   }
4192 
4193   if (SizeOp && CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(*SizeOp))
4194     return true;
4195 
4196   if (ConstantInt *ObjSizeCI =
4197           dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) {
4198     if (ObjSizeCI->isMinusOne())
4199       return true;
4200     // If the object size wasn't -1 (unknown), bail out if we were asked to.
4201     if (OnlyLowerUnknownSize)
4202       return false;
4203     if (StrOp) {
4204       uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp));
4205       // If the length is 0 we don't know how long it is and so we can't
4206       // remove the check.
4207       if (Len)
4208         annotateDereferenceableBytes(CI, *StrOp, Len);
4209       else
4210         return false;
4211       return ObjSizeCI->getZExtValue() >= Len;
4212     }
4213 
4214     if (SizeOp) {
4215       if (ConstantInt *SizeCI =
4216               dyn_cast<ConstantInt>(CI->getArgOperand(*SizeOp)))
4217         return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
4218     }
4219   }
4220   return false;
4221 }
4222 
4223 Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
4224                                                      IRBuilderBase &B) {
4225   if (isFortifiedCallFoldable(CI, 3, 2)) {
4226     CallInst *NewCI =
4227         B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
4228                        Align(1), CI->getArgOperand(2));
4229     mergeAttributesAndFlags(NewCI, *CI);
4230     return CI->getArgOperand(0);
4231   }
4232   return nullptr;
4233 }
4234 
4235 Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
4236                                                       IRBuilderBase &B) {
4237   if (isFortifiedCallFoldable(CI, 3, 2)) {
4238     CallInst *NewCI =
4239         B.CreateMemMove(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
4240                         Align(1), CI->getArgOperand(2));
4241     mergeAttributesAndFlags(NewCI, *CI);
4242     return CI->getArgOperand(0);
4243   }
4244   return nullptr;
4245 }
4246 
4247 Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
4248                                                      IRBuilderBase &B) {
4249   if (isFortifiedCallFoldable(CI, 3, 2)) {
4250     Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
4251     CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val,
4252                                      CI->getArgOperand(2), Align(1));
4253     mergeAttributesAndFlags(NewCI, *CI);
4254     return CI->getArgOperand(0);
4255   }
4256   return nullptr;
4257 }
4258 
4259 Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI,
4260                                                       IRBuilderBase &B) {
4261   const DataLayout &DL = CI->getDataLayout();
4262   if (isFortifiedCallFoldable(CI, 3, 2))
4263     if (Value *Call = emitMemPCpy(CI->getArgOperand(0), CI->getArgOperand(1),
4264                                   CI->getArgOperand(2), B, DL, TLI)) {
4265       return mergeAttributesAndFlags(cast<CallInst>(Call), *CI);
4266     }
4267   return nullptr;
4268 }
4269 
4270 Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
4271                                                       IRBuilderBase &B,
4272                                                       LibFunc Func) {
4273   const DataLayout &DL = CI->getDataLayout();
4274   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
4275         *ObjSize = CI->getArgOperand(2);
4276 
4277   // __stpcpy_chk(x,x,...)  -> x+strlen(x)
4278   if (Func == LibFunc_stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
4279     Value *StrLen = emitStrLen(Src, B, DL, TLI);
4280     return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
4281   }
4282 
4283   // If a) we don't have any length information, or b) we know this will
4284   // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
4285   // st[rp]cpy_chk call which may fail at runtime if the size is too long.
4286   // TODO: It might be nice to get a maximum length out of the possible
4287   // string lengths for varying.
4288   if (isFortifiedCallFoldable(CI, 2, std::nullopt, 1)) {
4289     if (Func == LibFunc_strcpy_chk)
4290       return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI));
4291     else
4292       return copyFlags(*CI, emitStpCpy(Dst, Src, B, TLI));
4293   }
4294 
4295   if (OnlyLowerUnknownSize)
4296     return nullptr;
4297 
4298   // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
4299   uint64_t Len = GetStringLength(Src);
4300   if (Len)
4301     annotateDereferenceableBytes(CI, 1, Len);
4302   else
4303     return nullptr;
4304 
4305   unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
4306   Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
4307   Value *LenV = ConstantInt::get(SizeTTy, Len);
4308   Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
4309   // If the function was an __stpcpy_chk, and we were able to fold it into
4310   // a __memcpy_chk, we still need to return the correct end pointer.
4311   if (Ret && Func == LibFunc_stpcpy_chk)
4312     return B.CreateInBoundsGEP(B.getInt8Ty(), Dst,
4313                                ConstantInt::get(SizeTTy, Len - 1));
4314   return copyFlags(*CI, cast<CallInst>(Ret));
4315 }
4316 
4317 Value *FortifiedLibCallSimplifier::optimizeStrLenChk(CallInst *CI,
4318                                                      IRBuilderBase &B) {
4319   if (isFortifiedCallFoldable(CI, 1, std::nullopt, 0))
4320     return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B,
4321                                      CI->getDataLayout(), TLI));
4322   return nullptr;
4323 }
4324 
4325 Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
4326                                                        IRBuilderBase &B,
4327                                                        LibFunc Func) {
4328   if (isFortifiedCallFoldable(CI, 3, 2)) {
4329     if (Func == LibFunc_strncpy_chk)
4330       return copyFlags(*CI,
4331                        emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
4332                                    CI->getArgOperand(2), B, TLI));
4333     else
4334       return copyFlags(*CI,
4335                        emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
4336                                    CI->getArgOperand(2), B, TLI));
4337   }
4338 
4339   return nullptr;
4340 }
4341 
4342 Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
4343                                                       IRBuilderBase &B) {
4344   if (isFortifiedCallFoldable(CI, 4, 3))
4345     return copyFlags(
4346         *CI, emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1),
4347                          CI->getArgOperand(2), CI->getArgOperand(3), B, TLI));
4348 
4349   return nullptr;
4350 }
4351 
4352 Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
4353                                                        IRBuilderBase &B) {
4354   if (isFortifiedCallFoldable(CI, 3, 1, std::nullopt, 2)) {
4355     SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 5));
4356     return copyFlags(*CI,
4357                      emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
4358                                   CI->getArgOperand(4), VariadicArgs, B, TLI));
4359   }
4360 
4361   return nullptr;
4362 }
4363 
4364 Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI,
4365                                                       IRBuilderBase &B) {
4366   if (isFortifiedCallFoldable(CI, 2, std::nullopt, std::nullopt, 1)) {
4367     SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 4));
4368     return copyFlags(*CI,
4369                      emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
4370                                  VariadicArgs, B, TLI));
4371   }
4372 
4373   return nullptr;
4374 }
4375 
4376 Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI,
4377                                                      IRBuilderBase &B) {
4378   if (isFortifiedCallFoldable(CI, 2))
4379     return copyFlags(
4380         *CI, emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI));
4381 
4382   return nullptr;
4383 }
4384 
4385 Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI,
4386                                                    IRBuilderBase &B) {
4387   if (isFortifiedCallFoldable(CI, 3))
4388     return copyFlags(*CI,
4389                      emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1),
4390                                  CI->getArgOperand(2), B, TLI));
4391 
4392   return nullptr;
4393 }
4394 
4395 Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI,
4396                                                       IRBuilderBase &B) {
4397   if (isFortifiedCallFoldable(CI, 3))
4398     return copyFlags(*CI,
4399                      emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1),
4400                                  CI->getArgOperand(2), B, TLI));
4401 
4402   return nullptr;
4403 }
4404 
4405 Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI,
4406                                                       IRBuilderBase &B) {
4407   if (isFortifiedCallFoldable(CI, 3))
4408     return copyFlags(*CI,
4409                      emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1),
4410                                  CI->getArgOperand(2), B, TLI));
4411 
4412   return nullptr;
4413 }
4414 
4415 Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI,
4416                                                         IRBuilderBase &B) {
4417   if (isFortifiedCallFoldable(CI, 3, 1, std::nullopt, 2))
4418     return copyFlags(
4419         *CI, emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
4420                            CI->getArgOperand(4), CI->getArgOperand(5), B, TLI));
4421 
4422   return nullptr;
4423 }
4424 
4425 Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI,
4426                                                        IRBuilderBase &B) {
4427   if (isFortifiedCallFoldable(CI, 2, std::nullopt, std::nullopt, 1))
4428     return copyFlags(*CI,
4429                      emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
4430                                   CI->getArgOperand(4), B, TLI));
4431 
4432   return nullptr;
4433 }
4434 
4435 Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI,
4436                                                 IRBuilderBase &Builder) {
4437   // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here.
4438   // Some clang users checked for _chk libcall availability using:
4439   //   __has_builtin(__builtin___memcpy_chk)
4440   // When compiling with -fno-builtin, this is always true.
4441   // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we
4442   // end up with fortified libcalls, which isn't acceptable in a freestanding
4443   // environment which only provides their non-fortified counterparts.
4444   //
4445   // Until we change clang and/or teach external users to check for availability
4446   // differently, disregard the "nobuiltin" attribute and TLI::has.
4447   //
4448   // PR23093.
4449 
4450   LibFunc Func;
4451   Function *Callee = CI->getCalledFunction();
4452   bool IsCallingConvC = TargetLibraryInfoImpl::isCallingConvCCompatible(CI);
4453 
4454   SmallVector<OperandBundleDef, 2> OpBundles;
4455   CI->getOperandBundlesAsDefs(OpBundles);
4456 
4457   IRBuilderBase::OperandBundlesGuard Guard(Builder);
4458   Builder.setDefaultOperandBundles(OpBundles);
4459 
4460   // First, check that this is a known library functions and that the prototype
4461   // is correct.
4462   if (!TLI->getLibFunc(*Callee, Func))
4463     return nullptr;
4464 
4465   // We never change the calling convention.
4466   if (!ignoreCallingConv(Func) && !IsCallingConvC)
4467     return nullptr;
4468 
4469   switch (Func) {
4470   case LibFunc_memcpy_chk:
4471     return optimizeMemCpyChk(CI, Builder);
4472   case LibFunc_mempcpy_chk:
4473     return optimizeMemPCpyChk(CI, Builder);
4474   case LibFunc_memmove_chk:
4475     return optimizeMemMoveChk(CI, Builder);
4476   case LibFunc_memset_chk:
4477     return optimizeMemSetChk(CI, Builder);
4478   case LibFunc_stpcpy_chk:
4479   case LibFunc_strcpy_chk:
4480     return optimizeStrpCpyChk(CI, Builder, Func);
4481   case LibFunc_strlen_chk:
4482     return optimizeStrLenChk(CI, Builder);
4483   case LibFunc_stpncpy_chk:
4484   case LibFunc_strncpy_chk:
4485     return optimizeStrpNCpyChk(CI, Builder, Func);
4486   case LibFunc_memccpy_chk:
4487     return optimizeMemCCpyChk(CI, Builder);
4488   case LibFunc_snprintf_chk:
4489     return optimizeSNPrintfChk(CI, Builder);
4490   case LibFunc_sprintf_chk:
4491     return optimizeSPrintfChk(CI, Builder);
4492   case LibFunc_strcat_chk:
4493     return optimizeStrCatChk(CI, Builder);
4494   case LibFunc_strlcat_chk:
4495     return optimizeStrLCat(CI, Builder);
4496   case LibFunc_strncat_chk:
4497     return optimizeStrNCatChk(CI, Builder);
4498   case LibFunc_strlcpy_chk:
4499     return optimizeStrLCpyChk(CI, Builder);
4500   case LibFunc_vsnprintf_chk:
4501     return optimizeVSNPrintfChk(CI, Builder);
4502   case LibFunc_vsprintf_chk:
4503     return optimizeVSPrintfChk(CI, Builder);
4504   default:
4505     break;
4506   }
4507   return nullptr;
4508 }
4509 
4510 FortifiedLibCallSimplifier::FortifiedLibCallSimplifier(
4511     const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize)
4512     : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}
4513