xref: /llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp (revision fa789dffb1e12c2aece0187aeacc48dfb1768340)
1 //===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the library calls simplifier. It does not implement
10 // any pass, but can't be used by other passes to do simplifications.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/Analysis/ConstantFolding.h"
19 #include "llvm/Analysis/Loads.h"
20 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
21 #include "llvm/Analysis/TargetLibraryInfo.h"
22 #include "llvm/Analysis/ValueTracking.h"
23 #include "llvm/IR/AttributeMask.h"
24 #include "llvm/IR/DataLayout.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/IR/PatternMatch.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/KnownBits.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/TargetParser/Triple.h"
36 #include "llvm/Transforms/Utils/BuildLibCalls.h"
37 #include "llvm/Transforms/Utils/Local.h"
38 #include "llvm/Transforms/Utils/SizeOpts.h"
39 
40 #include <cmath>
41 
42 using namespace llvm;
43 using namespace PatternMatch;
44 
45 static cl::opt<bool>
46     EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
47                          cl::init(false),
48                          cl::desc("Enable unsafe double to float "
49                                   "shrinking for math lib calls"));
50 
51 // Enable conversion of operator new calls with a MemProf hot or cold hint
52 // to an operator new call that takes a hot/cold hint. Off by default since
53 // not all allocators currently support this extension.
54 static cl::opt<bool>
55     OptimizeHotColdNew("optimize-hot-cold-new", cl::Hidden, cl::init(false),
56                        cl::desc("Enable hot/cold operator new library calls"));
57 static cl::opt<bool> OptimizeExistingHotColdNew(
58     "optimize-existing-hot-cold-new", cl::Hidden, cl::init(false),
59     cl::desc(
60         "Enable optimization of existing hot/cold operator new library calls"));
61 
62 namespace {
63 
64 // Specialized parser to ensure the hint is an 8 bit value (we can't specify
65 // uint8_t to opt<> as that is interpreted to mean that we are passing a char
66 // option with a specific set of values.
67 struct HotColdHintParser : public cl::parser<unsigned> {
68   HotColdHintParser(cl::Option &O) : cl::parser<unsigned>(O) {}
69 
70   bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, unsigned &Value) {
71     if (Arg.getAsInteger(0, Value))
72       return O.error("'" + Arg + "' value invalid for uint argument!");
73 
74     if (Value > 255)
75       return O.error("'" + Arg + "' value must be in the range [0, 255]!");
76 
77     return false;
78   }
79 };
80 
81 } // end anonymous namespace
82 
83 // Hot/cold operator new takes an 8 bit hotness hint, where 0 is the coldest
84 // and 255 is the hottest. Default to 1 value away from the coldest and hottest
85 // hints, so that the compiler hinted allocations are slightly less strong than
86 // manually inserted hints at the two extremes.
87 static cl::opt<unsigned, false, HotColdHintParser> ColdNewHintValue(
88     "cold-new-hint-value", cl::Hidden, cl::init(1),
89     cl::desc("Value to pass to hot/cold operator new for cold allocation"));
90 static cl::opt<unsigned, false, HotColdHintParser>
91     NotColdNewHintValue("notcold-new-hint-value", cl::Hidden, cl::init(128),
92                         cl::desc("Value to pass to hot/cold operator new for "
93                                  "notcold (warm) allocation"));
94 static cl::opt<unsigned, false, HotColdHintParser> HotNewHintValue(
95     "hot-new-hint-value", cl::Hidden, cl::init(254),
96     cl::desc("Value to pass to hot/cold operator new for hot allocation"));
97 
98 //===----------------------------------------------------------------------===//
99 // Helper Functions
100 //===----------------------------------------------------------------------===//
101 
102 static bool ignoreCallingConv(LibFunc Func) {
103   return Func == LibFunc_abs || Func == LibFunc_labs ||
104          Func == LibFunc_llabs || Func == LibFunc_strlen;
105 }
106 
107 /// Return true if it is only used in equality comparisons with With.
108 static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
109   for (User *U : V->users()) {
110     if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
111       if (IC->isEquality() && IC->getOperand(1) == With)
112         continue;
113     // Unknown instruction.
114     return false;
115   }
116   return true;
117 }
118 
119 static bool callHasFloatingPointArgument(const CallInst *CI) {
120   return any_of(CI->operands(), [](const Use &OI) {
121     return OI->getType()->isFloatingPointTy();
122   });
123 }
124 
125 static bool callHasFP128Argument(const CallInst *CI) {
126   return any_of(CI->operands(), [](const Use &OI) {
127     return OI->getType()->isFP128Ty();
128   });
129 }
130 
131 // Convert the entire string Str representing an integer in Base, up to
132 // the terminating nul if present, to a constant according to the rules
133 // of strtoul[l] or, when AsSigned is set, of strtol[l].  On success
134 // return the result, otherwise null.
135 // The function assumes the string is encoded in ASCII and carefully
136 // avoids converting sequences (including "") that the corresponding
137 // library call might fail and set errno for.
138 static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr,
139                               uint64_t Base, bool AsSigned, IRBuilderBase &B) {
140   if (Base < 2 || Base > 36)
141     if (Base != 0)
142       // Fail for an invalid base (required by POSIX).
143       return nullptr;
144 
145   // Current offset into the original string to reflect in EndPtr.
146   size_t Offset = 0;
147   // Strip leading whitespace.
148   for ( ; Offset != Str.size(); ++Offset)
149     if (!isSpace((unsigned char)Str[Offset])) {
150       Str = Str.substr(Offset);
151       break;
152     }
153 
154   if (Str.empty())
155     // Fail for empty subject sequences (POSIX allows but doesn't require
156     // strtol[l]/strtoul[l] to fail with EINVAL).
157     return nullptr;
158 
159   // Strip but remember the sign.
160   bool Negate = Str[0] == '-';
161   if (Str[0] == '-' || Str[0] == '+') {
162     Str = Str.drop_front();
163     if (Str.empty())
164       // Fail for a sign with nothing after it.
165       return nullptr;
166     ++Offset;
167   }
168 
169   // Set Max to the absolute value of the minimum (for signed), or
170   // to the maximum (for unsigned) value representable in the type.
171   Type *RetTy = CI->getType();
172   unsigned NBits = RetTy->getPrimitiveSizeInBits();
173   uint64_t Max = AsSigned && Negate ? 1 : 0;
174   Max += AsSigned ? maxIntN(NBits) : maxUIntN(NBits);
175 
176   // Autodetect Base if it's zero and consume the "0x" prefix.
177   if (Str.size() > 1) {
178     if (Str[0] == '0') {
179       if (toUpper((unsigned char)Str[1]) == 'X') {
180         if (Str.size() == 2 || (Base && Base != 16))
181           // Fail if Base doesn't allow the "0x" prefix or for the prefix
182           // alone that implementations like BSD set errno to EINVAL for.
183           return nullptr;
184 
185         Str = Str.drop_front(2);
186         Offset += 2;
187         Base = 16;
188       }
189       else if (Base == 0)
190         Base = 8;
191     } else if (Base == 0)
192       Base = 10;
193   }
194   else if (Base == 0)
195     Base = 10;
196 
197   // Convert the rest of the subject sequence, not including the sign,
198   // to its uint64_t representation (this assumes the source character
199   // set is ASCII).
200   uint64_t Result = 0;
201   for (unsigned i = 0; i != Str.size(); ++i) {
202     unsigned char DigVal = Str[i];
203     if (isDigit(DigVal))
204       DigVal = DigVal - '0';
205     else {
206       DigVal = toUpper(DigVal);
207       if (isAlpha(DigVal))
208         DigVal = DigVal - 'A' + 10;
209       else
210         return nullptr;
211     }
212 
213     if (DigVal >= Base)
214       // Fail if the digit is not valid in the Base.
215       return nullptr;
216 
217     // Add the digit and fail if the result is not representable in
218     // the (unsigned form of the) destination type.
219     bool VFlow;
220     Result = SaturatingMultiplyAdd(Result, Base, (uint64_t)DigVal, &VFlow);
221     if (VFlow || Result > Max)
222       return nullptr;
223   }
224 
225   if (EndPtr) {
226     // Store the pointer to the end.
227     Value *Off = B.getInt64(Offset + Str.size());
228     Value *StrBeg = CI->getArgOperand(0);
229     Value *StrEnd = B.CreateInBoundsGEP(B.getInt8Ty(), StrBeg, Off, "endptr");
230     B.CreateStore(StrEnd, EndPtr);
231   }
232 
233   if (Negate)
234     // Unsigned negation doesn't overflow.
235     Result = -Result;
236 
237   return ConstantInt::get(RetTy, Result);
238 }
239 
240 static bool isOnlyUsedInComparisonWithZero(Value *V) {
241   for (User *U : V->users()) {
242     if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
243       if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
244         if (C->isNullValue())
245           continue;
246     // Unknown instruction.
247     return false;
248   }
249   return true;
250 }
251 
252 static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
253                                  const DataLayout &DL) {
254   if (!isOnlyUsedInComparisonWithZero(CI))
255     return false;
256 
257   if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL))
258     return false;
259 
260   if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory))
261     return false;
262 
263   return true;
264 }
265 
266 static void annotateDereferenceableBytes(CallInst *CI,
267                                          ArrayRef<unsigned> ArgNos,
268                                          uint64_t DereferenceableBytes) {
269   const Function *F = CI->getCaller();
270   if (!F)
271     return;
272   for (unsigned ArgNo : ArgNos) {
273     uint64_t DerefBytes = DereferenceableBytes;
274     unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace();
275     if (!llvm::NullPointerIsDefined(F, AS) ||
276         CI->paramHasAttr(ArgNo, Attribute::NonNull))
277       DerefBytes = std::max(CI->getParamDereferenceableOrNullBytes(ArgNo),
278                             DereferenceableBytes);
279 
280     if (CI->getParamDereferenceableBytes(ArgNo) < DerefBytes) {
281       CI->removeParamAttr(ArgNo, Attribute::Dereferenceable);
282       if (!llvm::NullPointerIsDefined(F, AS) ||
283           CI->paramHasAttr(ArgNo, Attribute::NonNull))
284         CI->removeParamAttr(ArgNo, Attribute::DereferenceableOrNull);
285       CI->addParamAttr(ArgNo, Attribute::getWithDereferenceableBytes(
286                                   CI->getContext(), DerefBytes));
287     }
288   }
289 }
290 
291 static void annotateNonNullNoUndefBasedOnAccess(CallInst *CI,
292                                          ArrayRef<unsigned> ArgNos) {
293   Function *F = CI->getCaller();
294   if (!F)
295     return;
296 
297   for (unsigned ArgNo : ArgNos) {
298     if (!CI->paramHasAttr(ArgNo, Attribute::NoUndef))
299       CI->addParamAttr(ArgNo, Attribute::NoUndef);
300 
301     if (!CI->paramHasAttr(ArgNo, Attribute::NonNull)) {
302       unsigned AS =
303           CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace();
304       if (llvm::NullPointerIsDefined(F, AS))
305         continue;
306       CI->addParamAttr(ArgNo, Attribute::NonNull);
307     }
308 
309     annotateDereferenceableBytes(CI, ArgNo, 1);
310   }
311 }
312 
313 static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef<unsigned> ArgNos,
314                                Value *Size, const DataLayout &DL) {
315   if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size)) {
316     annotateNonNullNoUndefBasedOnAccess(CI, ArgNos);
317     annotateDereferenceableBytes(CI, ArgNos, LenC->getZExtValue());
318   } else if (isKnownNonZero(Size, DL)) {
319     annotateNonNullNoUndefBasedOnAccess(CI, ArgNos);
320     const APInt *X, *Y;
321     uint64_t DerefMin = 1;
322     if (match(Size, m_Select(m_Value(), m_APInt(X), m_APInt(Y)))) {
323       DerefMin = std::min(X->getZExtValue(), Y->getZExtValue());
324       annotateDereferenceableBytes(CI, ArgNos, DerefMin);
325     }
326   }
327 }
328 
329 // Copy CallInst "flags" like musttail, notail, and tail. Return New param for
330 // easier chaining. Calls to emit* and B.createCall should probably be wrapped
331 // in this function when New is created to replace Old. Callers should take
332 // care to check Old.isMustTailCall() if they aren't replacing Old directly
333 // with New.
334 static Value *copyFlags(const CallInst &Old, Value *New) {
335   assert(!Old.isMustTailCall() && "do not copy musttail call flags");
336   assert(!Old.isNoTailCall() && "do not copy notail call flags");
337   if (auto *NewCI = dyn_cast_or_null<CallInst>(New))
338     NewCI->setTailCallKind(Old.getTailCallKind());
339   return New;
340 }
341 
342 static Value *mergeAttributesAndFlags(CallInst *NewCI, const CallInst &Old) {
343   NewCI->setAttributes(AttributeList::get(
344       NewCI->getContext(), {NewCI->getAttributes(), Old.getAttributes()}));
345   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
346   return copyFlags(Old, NewCI);
347 }
348 
349 // Helper to avoid truncating the length if size_t is 32-bits.
350 static StringRef substr(StringRef Str, uint64_t Len) {
351   return Len >= Str.size() ? Str : Str.substr(0, Len);
352 }
353 
354 //===----------------------------------------------------------------------===//
355 // String and Memory Library Call Optimizations
356 //===----------------------------------------------------------------------===//
357 
358 Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilderBase &B) {
359   // Extract some information from the instruction
360   Value *Dst = CI->getArgOperand(0);
361   Value *Src = CI->getArgOperand(1);
362   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
363 
364   // See if we can get the length of the input string.
365   uint64_t Len = GetStringLength(Src);
366   if (Len)
367     annotateDereferenceableBytes(CI, 1, Len);
368   else
369     return nullptr;
370   --Len; // Unbias length.
371 
372   // Handle the simple, do-nothing case: strcat(x, "") -> x
373   if (Len == 0)
374     return Dst;
375 
376   return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, Len, B));
377 }
378 
379 Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
380                                            IRBuilderBase &B) {
381   // We need to find the end of the destination string.  That's where the
382   // memory is to be moved to. We just generate a call to strlen.
383   Value *DstLen = emitStrLen(Dst, B, DL, TLI);
384   if (!DstLen)
385     return nullptr;
386 
387   // Now that we have the destination's length, we must index into the
388   // destination's pointer to get the actual memcpy destination (end of
389   // the string .. we're concatenating).
390   Value *CpyDst = B.CreateInBoundsGEP(B.getInt8Ty(), Dst, DstLen, "endptr");
391 
392   // We have enough information to now generate the memcpy call to do the
393   // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
394   B.CreateMemCpy(
395       CpyDst, Align(1), Src, Align(1),
396       ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1));
397   return Dst;
398 }
399 
400 Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilderBase &B) {
401   // Extract some information from the instruction.
402   Value *Dst = CI->getArgOperand(0);
403   Value *Src = CI->getArgOperand(1);
404   Value *Size = CI->getArgOperand(2);
405   uint64_t Len;
406   annotateNonNullNoUndefBasedOnAccess(CI, 0);
407   if (isKnownNonZero(Size, DL))
408     annotateNonNullNoUndefBasedOnAccess(CI, 1);
409 
410   // We don't do anything if length is not constant.
411   ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size);
412   if (LengthArg) {
413     Len = LengthArg->getZExtValue();
414     // strncat(x, c, 0) -> x
415     if (!Len)
416       return Dst;
417   } else {
418     return nullptr;
419   }
420 
421   // See if we can get the length of the input string.
422   uint64_t SrcLen = GetStringLength(Src);
423   if (SrcLen) {
424     annotateDereferenceableBytes(CI, 1, SrcLen);
425     --SrcLen; // Unbias length.
426   } else {
427     return nullptr;
428   }
429 
430   // strncat(x, "", c) -> x
431   if (SrcLen == 0)
432     return Dst;
433 
434   // We don't optimize this case.
435   if (Len < SrcLen)
436     return nullptr;
437 
438   // strncat(x, s, c) -> strcat(x, s)
439   // s is constant so the strcat can be optimized further.
440   return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, SrcLen, B));
441 }
442 
443 // Helper to transform memchr(S, C, N) == S to N && *S == C and, when
444 // NBytes is null, strchr(S, C) to *S == C.  A precondition of the function
445 // is that either S is dereferenceable or the value of N is nonzero.
446 static Value* memChrToCharCompare(CallInst *CI, Value *NBytes,
447                                   IRBuilderBase &B, const DataLayout &DL)
448 {
449   Value *Src = CI->getArgOperand(0);
450   Value *CharVal = CI->getArgOperand(1);
451 
452   // Fold memchr(A, C, N) == A to N && *A == C.
453   Type *CharTy = B.getInt8Ty();
454   Value *Char0 = B.CreateLoad(CharTy, Src);
455   CharVal = B.CreateTrunc(CharVal, CharTy);
456   Value *Cmp = B.CreateICmpEQ(Char0, CharVal, "char0cmp");
457 
458   if (NBytes) {
459     Value *Zero = ConstantInt::get(NBytes->getType(), 0);
460     Value *And = B.CreateICmpNE(NBytes, Zero);
461     Cmp = B.CreateLogicalAnd(And, Cmp);
462   }
463 
464   Value *NullPtr = Constant::getNullValue(CI->getType());
465   return B.CreateSelect(Cmp, Src, NullPtr);
466 }
467 
468 Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
469   Value *SrcStr = CI->getArgOperand(0);
470   Value *CharVal = CI->getArgOperand(1);
471   annotateNonNullNoUndefBasedOnAccess(CI, 0);
472 
473   if (isOnlyUsedInEqualityComparison(CI, SrcStr))
474     return memChrToCharCompare(CI, nullptr, B, DL);
475 
476   // If the second operand is non-constant, see if we can compute the length
477   // of the input string and turn this into memchr.
478   ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
479   if (!CharC) {
480     uint64_t Len = GetStringLength(SrcStr);
481     if (Len)
482       annotateDereferenceableBytes(CI, 0, Len);
483     else
484       return nullptr;
485 
486     Function *Callee = CI->getCalledFunction();
487     FunctionType *FT = Callee->getFunctionType();
488     unsigned IntBits = TLI->getIntSize();
489     if (!FT->getParamType(1)->isIntegerTy(IntBits)) // memchr needs 'int'.
490       return nullptr;
491 
492     unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
493     Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
494     return copyFlags(*CI,
495                      emitMemChr(SrcStr, CharVal, // include nul.
496                                 ConstantInt::get(SizeTTy, Len), B,
497                                 DL, TLI));
498   }
499 
500   if (CharC->isZero()) {
501     Value *NullPtr = Constant::getNullValue(CI->getType());
502     if (isOnlyUsedInEqualityComparison(CI, NullPtr))
503       // Pre-empt the transformation to strlen below and fold
504       // strchr(A, '\0') == null to false.
505       return B.CreateIntToPtr(B.getTrue(), CI->getType());
506   }
507 
508   // Otherwise, the character is a constant, see if the first argument is
509   // a string literal.  If so, we can constant fold.
510   StringRef Str;
511   if (!getConstantStringInfo(SrcStr, Str)) {
512     if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
513       if (Value *StrLen = emitStrLen(SrcStr, B, DL, TLI))
514         return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, StrLen, "strchr");
515     return nullptr;
516   }
517 
518   // Compute the offset, make sure to handle the case when we're searching for
519   // zero (a weird way to spell strlen).
520   size_t I = (0xFF & CharC->getSExtValue()) == 0
521                  ? Str.size()
522                  : Str.find(CharC->getSExtValue());
523   if (I == StringRef::npos) // Didn't find the char.  strchr returns null.
524     return Constant::getNullValue(CI->getType());
525 
526   // strchr(s+n,c)  -> gep(s+n+i,c)
527   return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr");
528 }
529 
530 Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilderBase &B) {
531   Value *SrcStr = CI->getArgOperand(0);
532   Value *CharVal = CI->getArgOperand(1);
533   ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
534   annotateNonNullNoUndefBasedOnAccess(CI, 0);
535 
536   StringRef Str;
537   if (!getConstantStringInfo(SrcStr, Str)) {
538     // strrchr(s, 0) -> strchr(s, 0)
539     if (CharC && CharC->isZero())
540       return copyFlags(*CI, emitStrChr(SrcStr, '\0', B, TLI));
541     return nullptr;
542   }
543 
544   unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
545   Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
546 
547   // Try to expand strrchr to the memrchr nonstandard extension if it's
548   // available, or simply fail otherwise.
549   uint64_t NBytes = Str.size() + 1;   // Include the terminating nul.
550   Value *Size = ConstantInt::get(SizeTTy, NBytes);
551   return copyFlags(*CI, emitMemRChr(SrcStr, CharVal, Size, B, DL, TLI));
552 }
553 
554 Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) {
555   Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
556   if (Str1P == Str2P) // strcmp(x,x)  -> 0
557     return ConstantInt::get(CI->getType(), 0);
558 
559   StringRef Str1, Str2;
560   bool HasStr1 = getConstantStringInfo(Str1P, Str1);
561   bool HasStr2 = getConstantStringInfo(Str2P, Str2);
562 
563   // strcmp(x, y)  -> cnst  (if both x and y are constant strings)
564   if (HasStr1 && HasStr2)
565     return ConstantInt::get(CI->getType(),
566                             std::clamp(Str1.compare(Str2), -1, 1));
567 
568   if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
569     return B.CreateNeg(B.CreateZExt(
570         B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
571 
572   if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
573     return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
574                         CI->getType());
575 
576   // strcmp(P, "x") -> memcmp(P, "x", 2)
577   uint64_t Len1 = GetStringLength(Str1P);
578   if (Len1)
579     annotateDereferenceableBytes(CI, 0, Len1);
580   uint64_t Len2 = GetStringLength(Str2P);
581   if (Len2)
582     annotateDereferenceableBytes(CI, 1, Len2);
583 
584   if (Len1 && Len2) {
585     return copyFlags(
586         *CI, emitMemCmp(Str1P, Str2P,
587                         ConstantInt::get(DL.getIntPtrType(CI->getContext()),
588                                          std::min(Len1, Len2)),
589                         B, DL, TLI));
590   }
591 
592   // strcmp to memcmp
593   if (!HasStr1 && HasStr2) {
594     if (canTransformToMemCmp(CI, Str1P, Len2, DL))
595       return copyFlags(
596           *CI,
597           emitMemCmp(Str1P, Str2P,
598                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2),
599                      B, DL, TLI));
600   } else if (HasStr1 && !HasStr2) {
601     if (canTransformToMemCmp(CI, Str2P, Len1, DL))
602       return copyFlags(
603           *CI,
604           emitMemCmp(Str1P, Str2P,
605                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1),
606                      B, DL, TLI));
607   }
608 
609   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
610   return nullptr;
611 }
612 
613 // Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant
614 // arrays LHS and RHS and nonconstant Size.
615 static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS,
616                                     Value *Size, bool StrNCmp,
617                                     IRBuilderBase &B, const DataLayout &DL);
618 
619 Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) {
620   Value *Str1P = CI->getArgOperand(0);
621   Value *Str2P = CI->getArgOperand(1);
622   Value *Size = CI->getArgOperand(2);
623   if (Str1P == Str2P) // strncmp(x,x,n)  -> 0
624     return ConstantInt::get(CI->getType(), 0);
625 
626   if (isKnownNonZero(Size, DL))
627     annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
628   // Get the length argument if it is constant.
629   uint64_t Length;
630   if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size))
631     Length = LengthArg->getZExtValue();
632   else
633     return optimizeMemCmpVarSize(CI, Str1P, Str2P, Size, true, B, DL);
634 
635   if (Length == 0) // strncmp(x,y,0)   -> 0
636     return ConstantInt::get(CI->getType(), 0);
637 
638   if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
639     return copyFlags(*CI, emitMemCmp(Str1P, Str2P, Size, B, DL, TLI));
640 
641   StringRef Str1, Str2;
642   bool HasStr1 = getConstantStringInfo(Str1P, Str1);
643   bool HasStr2 = getConstantStringInfo(Str2P, Str2);
644 
645   // strncmp(x, y)  -> cnst  (if both x and y are constant strings)
646   if (HasStr1 && HasStr2) {
647     // Avoid truncating the 64-bit Length to 32 bits in ILP32.
648     StringRef SubStr1 = substr(Str1, Length);
649     StringRef SubStr2 = substr(Str2, Length);
650     return ConstantInt::get(CI->getType(),
651                             std::clamp(SubStr1.compare(SubStr2), -1, 1));
652   }
653 
654   if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
655     return B.CreateNeg(B.CreateZExt(
656         B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
657 
658   if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
659     return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
660                         CI->getType());
661 
662   uint64_t Len1 = GetStringLength(Str1P);
663   if (Len1)
664     annotateDereferenceableBytes(CI, 0, Len1);
665   uint64_t Len2 = GetStringLength(Str2P);
666   if (Len2)
667     annotateDereferenceableBytes(CI, 1, Len2);
668 
669   // strncmp to memcmp
670   if (!HasStr1 && HasStr2) {
671     Len2 = std::min(Len2, Length);
672     if (canTransformToMemCmp(CI, Str1P, Len2, DL))
673       return copyFlags(
674           *CI,
675           emitMemCmp(Str1P, Str2P,
676                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2),
677                      B, DL, TLI));
678   } else if (HasStr1 && !HasStr2) {
679     Len1 = std::min(Len1, Length);
680     if (canTransformToMemCmp(CI, Str2P, Len1, DL))
681       return copyFlags(
682           *CI,
683           emitMemCmp(Str1P, Str2P,
684                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1),
685                      B, DL, TLI));
686   }
687 
688   return nullptr;
689 }
690 
691 Value *LibCallSimplifier::optimizeStrNDup(CallInst *CI, IRBuilderBase &B) {
692   Value *Src = CI->getArgOperand(0);
693   ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
694   uint64_t SrcLen = GetStringLength(Src);
695   if (SrcLen && Size) {
696     annotateDereferenceableBytes(CI, 0, SrcLen);
697     if (SrcLen <= Size->getZExtValue() + 1)
698       return copyFlags(*CI, emitStrDup(Src, B, TLI));
699   }
700 
701   return nullptr;
702 }
703 
704 Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) {
705   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
706   if (Dst == Src) // strcpy(x,x)  -> x
707     return Src;
708 
709   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
710   // See if we can get the length of the input string.
711   uint64_t Len = GetStringLength(Src);
712   if (Len)
713     annotateDereferenceableBytes(CI, 1, Len);
714   else
715     return nullptr;
716 
717   // We have enough information to now generate the memcpy call to do the
718   // copy for us.  Make a memcpy to copy the nul byte with align = 1.
719   CallInst *NewCI =
720       B.CreateMemCpy(Dst, Align(1), Src, Align(1),
721                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
722   mergeAttributesAndFlags(NewCI, *CI);
723   return Dst;
724 }
725 
726 Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
727   Function *Callee = CI->getCalledFunction();
728   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
729 
730   // stpcpy(d,s) -> strcpy(d,s) if the result is not used.
731   if (CI->use_empty())
732     return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI));
733 
734   if (Dst == Src) { // stpcpy(x,x)  -> x+strlen(x)
735     Value *StrLen = emitStrLen(Src, B, DL, TLI);
736     return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
737   }
738 
739   // See if we can get the length of the input string.
740   uint64_t Len = GetStringLength(Src);
741   if (Len)
742     annotateDereferenceableBytes(CI, 1, Len);
743   else
744     return nullptr;
745 
746   Type *PT = Callee->getFunctionType()->getParamType(0);
747   Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
748   Value *DstEnd = B.CreateInBoundsGEP(
749       B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
750 
751   // We have enough information to now generate the memcpy call to do the
752   // copy for us.  Make a memcpy to copy the nul byte with align = 1.
753   CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), LenV);
754   mergeAttributesAndFlags(NewCI, *CI);
755   return DstEnd;
756 }
757 
758 // Optimize a call to size_t strlcpy(char*, const char*, size_t).
759 
760 Value *LibCallSimplifier::optimizeStrLCpy(CallInst *CI, IRBuilderBase &B) {
761   Value *Size = CI->getArgOperand(2);
762   if (isKnownNonZero(Size, DL))
763     // Like snprintf, the function stores into the destination only when
764     // the size argument is nonzero.
765     annotateNonNullNoUndefBasedOnAccess(CI, 0);
766   // The function reads the source argument regardless of Size (it returns
767   // its length).
768   annotateNonNullNoUndefBasedOnAccess(CI, 1);
769 
770   uint64_t NBytes;
771   if (ConstantInt *SizeC = dyn_cast<ConstantInt>(Size))
772     NBytes = SizeC->getZExtValue();
773   else
774     return nullptr;
775 
776   Value *Dst = CI->getArgOperand(0);
777   Value *Src = CI->getArgOperand(1);
778   if (NBytes <= 1) {
779     if (NBytes == 1)
780       // For a call to strlcpy(D, S, 1) first store a nul in *D.
781       B.CreateStore(B.getInt8(0), Dst);
782 
783     // Transform strlcpy(D, S, 0) to a call to strlen(S).
784     return copyFlags(*CI, emitStrLen(Src, B, DL, TLI));
785   }
786 
787   // Try to determine the length of the source, substituting its size
788   // when it's not nul-terminated (as it's required to be) to avoid
789   // reading past its end.
790   StringRef Str;
791   if (!getConstantStringInfo(Src, Str, /*TrimAtNul=*/false))
792     return nullptr;
793 
794   uint64_t SrcLen = Str.find('\0');
795   // Set if the terminating nul should be copied by the call to memcpy
796   // below.
797   bool NulTerm = SrcLen < NBytes;
798 
799   if (NulTerm)
800     // Overwrite NBytes with the number of bytes to copy, including
801     // the terminating nul.
802     NBytes = SrcLen + 1;
803   else {
804     // Set the length of the source for the function to return to its
805     // size, and cap NBytes at the same.
806     SrcLen = std::min(SrcLen, uint64_t(Str.size()));
807     NBytes = std::min(NBytes - 1, SrcLen);
808   }
809 
810   if (SrcLen == 0) {
811     // Transform strlcpy(D, "", N) to (*D = '\0, 0).
812     B.CreateStore(B.getInt8(0), Dst);
813     return ConstantInt::get(CI->getType(), 0);
814   }
815 
816   Function *Callee = CI->getCalledFunction();
817   Type *PT = Callee->getFunctionType()->getParamType(0);
818   // Transform strlcpy(D, S, N) to memcpy(D, S, N') where N' is the lower
819   // bound on strlen(S) + 1 and N, optionally followed by a nul store to
820   // D[N' - 1] if necessary.
821   CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
822                         ConstantInt::get(DL.getIntPtrType(PT), NBytes));
823   mergeAttributesAndFlags(NewCI, *CI);
824 
825   if (!NulTerm) {
826     Value *EndOff = ConstantInt::get(CI->getType(), NBytes);
827     Value *EndPtr = B.CreateInBoundsGEP(B.getInt8Ty(), Dst, EndOff);
828     B.CreateStore(B.getInt8(0), EndPtr);
829   }
830 
831   // Like snprintf, strlcpy returns the number of nonzero bytes that would
832   // have been copied if the bound had been sufficiently big (which in this
833   // case is strlen(Src)).
834   return ConstantInt::get(CI->getType(), SrcLen);
835 }
836 
837 // Optimize a call CI to either stpncpy when RetEnd is true, or to strncpy
838 // otherwise.
839 Value *LibCallSimplifier::optimizeStringNCpy(CallInst *CI, bool RetEnd,
840                                              IRBuilderBase &B) {
841   Function *Callee = CI->getCalledFunction();
842   Value *Dst = CI->getArgOperand(0);
843   Value *Src = CI->getArgOperand(1);
844   Value *Size = CI->getArgOperand(2);
845 
846   if (isKnownNonZero(Size, DL)) {
847     // Both st{p,r}ncpy(D, S, N) access the source and destination arrays
848     // only when N is nonzero.
849     annotateNonNullNoUndefBasedOnAccess(CI, 0);
850     annotateNonNullNoUndefBasedOnAccess(CI, 1);
851   }
852 
853   // If the "bound" argument is known set N to it.  Otherwise set it to
854   // UINT64_MAX and handle it later.
855   uint64_t N = UINT64_MAX;
856   if (ConstantInt *SizeC = dyn_cast<ConstantInt>(Size))
857     N = SizeC->getZExtValue();
858 
859   if (N == 0)
860     // Fold st{p,r}ncpy(D, S, 0) to D.
861     return Dst;
862 
863   if (N == 1) {
864     Type *CharTy = B.getInt8Ty();
865     Value *CharVal = B.CreateLoad(CharTy, Src, "stxncpy.char0");
866     B.CreateStore(CharVal, Dst);
867     if (!RetEnd)
868       // Transform strncpy(D, S, 1) to return (*D = *S), D.
869       return Dst;
870 
871     // Transform stpncpy(D, S, 1) to return (*D = *S) ? D + 1 : D.
872     Value *ZeroChar = ConstantInt::get(CharTy, 0);
873     Value *Cmp = B.CreateICmpEQ(CharVal, ZeroChar, "stpncpy.char0cmp");
874 
875     Value *Off1 = B.getInt32(1);
876     Value *EndPtr = B.CreateInBoundsGEP(CharTy, Dst, Off1, "stpncpy.end");
877     return B.CreateSelect(Cmp, Dst, EndPtr, "stpncpy.sel");
878   }
879 
880   // If the length of the input string is known set SrcLen to it.
881   uint64_t SrcLen = GetStringLength(Src);
882   if (SrcLen)
883     annotateDereferenceableBytes(CI, 1, SrcLen);
884   else
885     return nullptr;
886 
887   --SrcLen; // Unbias length.
888 
889   if (SrcLen == 0) {
890     // Transform st{p,r}ncpy(D, "", N) to memset(D, '\0', N) for any N.
891     Align MemSetAlign =
892       CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne();
893     CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, MemSetAlign);
894     AttrBuilder ArgAttrs(CI->getContext(), CI->getAttributes().getParamAttrs(0));
895     NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
896         CI->getContext(), 0, ArgAttrs));
897     copyFlags(*CI, NewCI);
898     return Dst;
899   }
900 
901   if (N > SrcLen + 1) {
902     if (N > 128)
903       // Bail if N is large or unknown.
904       return nullptr;
905 
906     // st{p,r}ncpy(D, "a", N) -> memcpy(D, "a\0\0\0", N) for N <= 128.
907     StringRef Str;
908     if (!getConstantStringInfo(Src, Str))
909       return nullptr;
910     std::string SrcStr = Str.str();
911     // Create a bigger, nul-padded array with the same length, SrcLen,
912     // as the original string.
913     SrcStr.resize(N, '\0');
914     Src = B.CreateGlobalString(SrcStr, "str", /*AddressSpace=*/0,
915                                /*M=*/nullptr, /*AddNull=*/false);
916   }
917 
918   Type *PT = Callee->getFunctionType()->getParamType(0);
919   // st{p,r}ncpy(D, S, N) -> memcpy(align 1 D, align 1 S, N) when both
920   // S and N are constant.
921   CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
922                                    ConstantInt::get(DL.getIntPtrType(PT), N));
923   mergeAttributesAndFlags(NewCI, *CI);
924   if (!RetEnd)
925     return Dst;
926 
927   // stpncpy(D, S, N) returns the address of the first null in D if it writes
928   // one, otherwise D + N.
929   Value *Off = B.getInt64(std::min(SrcLen, N));
930   return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, Off, "endptr");
931 }
932 
933 Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
934                                                unsigned CharSize,
935                                                Value *Bound) {
936   Value *Src = CI->getArgOperand(0);
937   Type *CharTy = B.getIntNTy(CharSize);
938 
939   if (isOnlyUsedInZeroEqualityComparison(CI) &&
940       (!Bound || isKnownNonZero(Bound, DL))) {
941     // Fold strlen:
942     //   strlen(x) != 0 --> *x != 0
943     //   strlen(x) == 0 --> *x == 0
944     // and likewise strnlen with constant N > 0:
945     //   strnlen(x, N) != 0 --> *x != 0
946     //   strnlen(x, N) == 0 --> *x == 0
947     return B.CreateZExt(B.CreateLoad(CharTy, Src, "char0"),
948                         CI->getType());
949   }
950 
951   if (Bound) {
952     if (ConstantInt *BoundCst = dyn_cast<ConstantInt>(Bound)) {
953       if (BoundCst->isZero())
954         // Fold strnlen(s, 0) -> 0 for any s, constant or otherwise.
955         return ConstantInt::get(CI->getType(), 0);
956 
957       if (BoundCst->isOne()) {
958         // Fold strnlen(s, 1) -> *s ? 1 : 0 for any s.
959         Value *CharVal = B.CreateLoad(CharTy, Src, "strnlen.char0");
960         Value *ZeroChar = ConstantInt::get(CharTy, 0);
961         Value *Cmp = B.CreateICmpNE(CharVal, ZeroChar, "strnlen.char0cmp");
962         return B.CreateZExt(Cmp, CI->getType());
963       }
964     }
965   }
966 
967   if (uint64_t Len = GetStringLength(Src, CharSize)) {
968     Value *LenC = ConstantInt::get(CI->getType(), Len - 1);
969     // Fold strlen("xyz") -> 3 and strnlen("xyz", 2) -> 2
970     // and strnlen("xyz", Bound) -> min(3, Bound) for nonconstant Bound.
971     if (Bound)
972       return B.CreateBinaryIntrinsic(Intrinsic::umin, LenC, Bound);
973     return LenC;
974   }
975 
976   if (Bound)
977     // Punt for strnlen for now.
978     return nullptr;
979 
980   // If s is a constant pointer pointing to a string literal, we can fold
981   // strlen(s + x) to strlen(s) - x, when x is known to be in the range
982   // [0, strlen(s)] or the string has a single null terminator '\0' at the end.
983   // We only try to simplify strlen when the pointer s points to an array
984   // of CharSize elements. Otherwise, we would need to scale the offset x before
985   // doing the subtraction. This will make the optimization more complex, and
986   // it's not very useful because calling strlen for a pointer of other types is
987   // very uncommon.
988   if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
989     // TODO: Handle subobjects.
990     if (!isGEPBasedOnPointerToString(GEP, CharSize))
991       return nullptr;
992 
993     ConstantDataArraySlice Slice;
994     if (getConstantDataArrayInfo(GEP->getOperand(0), Slice, CharSize)) {
995       uint64_t NullTermIdx;
996       if (Slice.Array == nullptr) {
997         NullTermIdx = 0;
998       } else {
999         NullTermIdx = ~((uint64_t)0);
1000         for (uint64_t I = 0, E = Slice.Length; I < E; ++I) {
1001           if (Slice.Array->getElementAsInteger(I + Slice.Offset) == 0) {
1002             NullTermIdx = I;
1003             break;
1004           }
1005         }
1006         // If the string does not have '\0', leave it to strlen to compute
1007         // its length.
1008         if (NullTermIdx == ~((uint64_t)0))
1009           return nullptr;
1010       }
1011 
1012       Value *Offset = GEP->getOperand(2);
1013       KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr);
1014       uint64_t ArrSize =
1015              cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
1016 
1017       // If Offset is not provably in the range [0, NullTermIdx], we can still
1018       // optimize if we can prove that the program has undefined behavior when
1019       // Offset is outside that range. That is the case when GEP->getOperand(0)
1020       // is a pointer to an object whose memory extent is NullTermIdx+1.
1021       if ((Known.isNonNegative() && Known.getMaxValue().ule(NullTermIdx)) ||
1022           (isa<GlobalVariable>(GEP->getOperand(0)) &&
1023            NullTermIdx == ArrSize - 1)) {
1024         Offset = B.CreateSExtOrTrunc(Offset, CI->getType());
1025         return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
1026                            Offset);
1027       }
1028     }
1029   }
1030 
1031   // strlen(x?"foo":"bars") --> x ? 3 : 4
1032   if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
1033     uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize);
1034     uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize);
1035     if (LenTrue && LenFalse) {
1036       ORE.emit([&]() {
1037         return OptimizationRemark("instcombine", "simplify-libcalls", CI)
1038                << "folded strlen(select) to select of constants";
1039       });
1040       return B.CreateSelect(SI->getCondition(),
1041                             ConstantInt::get(CI->getType(), LenTrue - 1),
1042                             ConstantInt::get(CI->getType(), LenFalse - 1));
1043     }
1044   }
1045 
1046   return nullptr;
1047 }
1048 
1049 Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilderBase &B) {
1050   if (Value *V = optimizeStringLength(CI, B, 8))
1051     return V;
1052   annotateNonNullNoUndefBasedOnAccess(CI, 0);
1053   return nullptr;
1054 }
1055 
1056 Value *LibCallSimplifier::optimizeStrNLen(CallInst *CI, IRBuilderBase &B) {
1057   Value *Bound = CI->getArgOperand(1);
1058   if (Value *V = optimizeStringLength(CI, B, 8, Bound))
1059     return V;
1060 
1061   if (isKnownNonZero(Bound, DL))
1062     annotateNonNullNoUndefBasedOnAccess(CI, 0);
1063   return nullptr;
1064 }
1065 
1066 Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilderBase &B) {
1067   Module &M = *CI->getModule();
1068   unsigned WCharSize = TLI->getWCharSize(M) * 8;
1069   // We cannot perform this optimization without wchar_size metadata.
1070   if (WCharSize == 0)
1071     return nullptr;
1072 
1073   return optimizeStringLength(CI, B, WCharSize);
1074 }
1075 
1076 Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilderBase &B) {
1077   StringRef S1, S2;
1078   bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
1079   bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
1080 
1081   // strpbrk(s, "") -> nullptr
1082   // strpbrk("", s) -> nullptr
1083   if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
1084     return Constant::getNullValue(CI->getType());
1085 
1086   // Constant folding.
1087   if (HasS1 && HasS2) {
1088     size_t I = S1.find_first_of(S2);
1089     if (I == StringRef::npos) // No match.
1090       return Constant::getNullValue(CI->getType());
1091 
1092     return B.CreateInBoundsGEP(B.getInt8Ty(), CI->getArgOperand(0),
1093                                B.getInt64(I), "strpbrk");
1094   }
1095 
1096   // strpbrk(s, "a") -> strchr(s, 'a')
1097   if (HasS2 && S2.size() == 1)
1098     return copyFlags(*CI, emitStrChr(CI->getArgOperand(0), S2[0], B, TLI));
1099 
1100   return nullptr;
1101 }
1102 
1103 Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilderBase &B) {
1104   Value *EndPtr = CI->getArgOperand(1);
1105   if (isa<ConstantPointerNull>(EndPtr)) {
1106     // With a null EndPtr, this function won't capture the main argument.
1107     // It would be readonly too, except that it still may write to errno.
1108     CI->addParamAttr(0, Attribute::NoCapture);
1109   }
1110 
1111   return nullptr;
1112 }
1113 
1114 Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilderBase &B) {
1115   StringRef S1, S2;
1116   bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
1117   bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
1118 
1119   // strspn(s, "") -> 0
1120   // strspn("", s) -> 0
1121   if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
1122     return Constant::getNullValue(CI->getType());
1123 
1124   // Constant folding.
1125   if (HasS1 && HasS2) {
1126     size_t Pos = S1.find_first_not_of(S2);
1127     if (Pos == StringRef::npos)
1128       Pos = S1.size();
1129     return ConstantInt::get(CI->getType(), Pos);
1130   }
1131 
1132   return nullptr;
1133 }
1134 
1135 Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilderBase &B) {
1136   StringRef S1, S2;
1137   bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
1138   bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
1139 
1140   // strcspn("", s) -> 0
1141   if (HasS1 && S1.empty())
1142     return Constant::getNullValue(CI->getType());
1143 
1144   // Constant folding.
1145   if (HasS1 && HasS2) {
1146     size_t Pos = S1.find_first_of(S2);
1147     if (Pos == StringRef::npos)
1148       Pos = S1.size();
1149     return ConstantInt::get(CI->getType(), Pos);
1150   }
1151 
1152   // strcspn(s, "") -> strlen(s)
1153   if (HasS2 && S2.empty())
1154     return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B, DL, TLI));
1155 
1156   return nullptr;
1157 }
1158 
1159 Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) {
1160   // fold strstr(x, x) -> x.
1161   if (CI->getArgOperand(0) == CI->getArgOperand(1))
1162     return CI->getArgOperand(0);
1163 
1164   // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
1165   if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
1166     Value *StrLen = emitStrLen(CI->getArgOperand(1), B, DL, TLI);
1167     if (!StrLen)
1168       return nullptr;
1169     Value *StrNCmp = emitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
1170                                  StrLen, B, DL, TLI);
1171     if (!StrNCmp)
1172       return nullptr;
1173     for (User *U : llvm::make_early_inc_range(CI->users())) {
1174       ICmpInst *Old = cast<ICmpInst>(U);
1175       Value *Cmp =
1176           B.CreateICmp(Old->getPredicate(), StrNCmp,
1177                        ConstantInt::getNullValue(StrNCmp->getType()), "cmp");
1178       replaceAllUsesWith(Old, Cmp);
1179     }
1180     return CI;
1181   }
1182 
1183   // See if either input string is a constant string.
1184   StringRef SearchStr, ToFindStr;
1185   bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr);
1186   bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr);
1187 
1188   // fold strstr(x, "") -> x.
1189   if (HasStr2 && ToFindStr.empty())
1190     return CI->getArgOperand(0);
1191 
1192   // If both strings are known, constant fold it.
1193   if (HasStr1 && HasStr2) {
1194     size_t Offset = SearchStr.find(ToFindStr);
1195 
1196     if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
1197       return Constant::getNullValue(CI->getType());
1198 
1199     // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
1200     return B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), CI->getArgOperand(0),
1201                                         Offset, "strstr");
1202   }
1203 
1204   // fold strstr(x, "y") -> strchr(x, 'y').
1205   if (HasStr2 && ToFindStr.size() == 1) {
1206     return emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
1207   }
1208 
1209   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
1210   return nullptr;
1211 }
1212 
1213 Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilderBase &B) {
1214   Value *SrcStr = CI->getArgOperand(0);
1215   Value *Size = CI->getArgOperand(2);
1216   annotateNonNullAndDereferenceable(CI, 0, Size, DL);
1217   Value *CharVal = CI->getArgOperand(1);
1218   ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
1219   Value *NullPtr = Constant::getNullValue(CI->getType());
1220 
1221   if (LenC) {
1222     if (LenC->isZero())
1223       // Fold memrchr(x, y, 0) --> null.
1224       return NullPtr;
1225 
1226     if (LenC->isOne()) {
1227       // Fold memrchr(x, y, 1) --> *x == y ? x : null for any x and y,
1228       // constant or otherwise.
1229       Value *Val = B.CreateLoad(B.getInt8Ty(), SrcStr, "memrchr.char0");
1230       // Slice off the character's high end bits.
1231       CharVal = B.CreateTrunc(CharVal, B.getInt8Ty());
1232       Value *Cmp = B.CreateICmpEQ(Val, CharVal, "memrchr.char0cmp");
1233       return B.CreateSelect(Cmp, SrcStr, NullPtr, "memrchr.sel");
1234     }
1235   }
1236 
1237   StringRef Str;
1238   if (!getConstantStringInfo(SrcStr, Str, /*TrimAtNul=*/false))
1239     return nullptr;
1240 
1241   if (Str.size() == 0)
1242     // If the array is empty fold memrchr(A, C, N) to null for any value
1243     // of C and N on the basis that the only valid value of N is zero
1244     // (otherwise the call is undefined).
1245     return NullPtr;
1246 
1247   uint64_t EndOff = UINT64_MAX;
1248   if (LenC) {
1249     EndOff = LenC->getZExtValue();
1250     if (Str.size() < EndOff)
1251       // Punt out-of-bounds accesses to sanitizers and/or libc.
1252       return nullptr;
1253   }
1254 
1255   if (ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal)) {
1256     // Fold memrchr(S, C, N) for a constant C.
1257     size_t Pos = Str.rfind(CharC->getZExtValue(), EndOff);
1258     if (Pos == StringRef::npos)
1259       // When the character is not in the source array fold the result
1260       // to null regardless of Size.
1261       return NullPtr;
1262 
1263     if (LenC)
1264       // Fold memrchr(s, c, N) --> s + Pos for constant N > Pos.
1265       return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(Pos));
1266 
1267     if (Str.find(Str[Pos]) == Pos) {
1268       // When there is just a single occurrence of C in S, i.e., the one
1269       // in Str[Pos], fold
1270       //   memrchr(s, c, N) --> N <= Pos ? null : s + Pos
1271       // for nonconstant N.
1272       Value *Cmp = B.CreateICmpULE(Size, ConstantInt::get(Size->getType(), Pos),
1273                                    "memrchr.cmp");
1274       Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr,
1275                                            B.getInt64(Pos), "memrchr.ptr_plus");
1276       return B.CreateSelect(Cmp, NullPtr, SrcPlus, "memrchr.sel");
1277     }
1278   }
1279 
1280   // Truncate the string to search at most EndOff characters.
1281   Str = Str.substr(0, EndOff);
1282   if (Str.find_first_not_of(Str[0]) != StringRef::npos)
1283     return nullptr;
1284 
1285   // If the source array consists of all equal characters, then for any
1286   // C and N (whether in bounds or not), fold memrchr(S, C, N) to
1287   //   N != 0 && *S == C ? S + N - 1 : null
1288   Type *SizeTy = Size->getType();
1289   Type *Int8Ty = B.getInt8Ty();
1290   Value *NNeZ = B.CreateICmpNE(Size, ConstantInt::get(SizeTy, 0));
1291   // Slice off the sought character's high end bits.
1292   CharVal = B.CreateTrunc(CharVal, Int8Ty);
1293   Value *CEqS0 = B.CreateICmpEQ(ConstantInt::get(Int8Ty, Str[0]), CharVal);
1294   Value *And = B.CreateLogicalAnd(NNeZ, CEqS0);
1295   Value *SizeM1 = B.CreateSub(Size, ConstantInt::get(SizeTy, 1));
1296   Value *SrcPlus =
1297       B.CreateInBoundsGEP(Int8Ty, SrcStr, SizeM1, "memrchr.ptr_plus");
1298   return B.CreateSelect(And, SrcPlus, NullPtr, "memrchr.sel");
1299 }
1300 
1301 Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
1302   Value *SrcStr = CI->getArgOperand(0);
1303   Value *Size = CI->getArgOperand(2);
1304 
1305   if (isKnownNonZero(Size, DL)) {
1306     annotateNonNullNoUndefBasedOnAccess(CI, 0);
1307     if (isOnlyUsedInEqualityComparison(CI, SrcStr))
1308       return memChrToCharCompare(CI, Size, B, DL);
1309   }
1310 
1311   Value *CharVal = CI->getArgOperand(1);
1312   ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
1313   ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
1314   Value *NullPtr = Constant::getNullValue(CI->getType());
1315 
1316   // memchr(x, y, 0) -> null
1317   if (LenC) {
1318     if (LenC->isZero())
1319       return NullPtr;
1320 
1321     if (LenC->isOne()) {
1322       // Fold memchr(x, y, 1) --> *x == y ? x : null for any x and y,
1323       // constant or otherwise.
1324       Value *Val = B.CreateLoad(B.getInt8Ty(), SrcStr, "memchr.char0");
1325       // Slice off the character's high end bits.
1326       CharVal = B.CreateTrunc(CharVal, B.getInt8Ty());
1327       Value *Cmp = B.CreateICmpEQ(Val, CharVal, "memchr.char0cmp");
1328       return B.CreateSelect(Cmp, SrcStr, NullPtr, "memchr.sel");
1329     }
1330   }
1331 
1332   StringRef Str;
1333   if (!getConstantStringInfo(SrcStr, Str, /*TrimAtNul=*/false))
1334     return nullptr;
1335 
1336   if (CharC) {
1337     size_t Pos = Str.find(CharC->getZExtValue());
1338     if (Pos == StringRef::npos)
1339       // When the character is not in the source array fold the result
1340       // to null regardless of Size.
1341       return NullPtr;
1342 
1343     // Fold memchr(s, c, n) -> n <= Pos ? null : s + Pos
1344     // When the constant Size is less than or equal to the character
1345     // position also fold the result to null.
1346     Value *Cmp = B.CreateICmpULE(Size, ConstantInt::get(Size->getType(), Pos),
1347                                  "memchr.cmp");
1348     Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(Pos),
1349                                          "memchr.ptr");
1350     return B.CreateSelect(Cmp, NullPtr, SrcPlus);
1351   }
1352 
1353   if (Str.size() == 0)
1354     // If the array is empty fold memchr(A, C, N) to null for any value
1355     // of C and N on the basis that the only valid value of N is zero
1356     // (otherwise the call is undefined).
1357     return NullPtr;
1358 
1359   if (LenC)
1360     Str = substr(Str, LenC->getZExtValue());
1361 
1362   size_t Pos = Str.find_first_not_of(Str[0]);
1363   if (Pos == StringRef::npos
1364       || Str.find_first_not_of(Str[Pos], Pos) == StringRef::npos) {
1365     // If the source array consists of at most two consecutive sequences
1366     // of the same characters, then for any C and N (whether in bounds or
1367     // not), fold memchr(S, C, N) to
1368     //   N != 0 && *S == C ? S : null
1369     // or for the two sequences to:
1370     //   N != 0 && *S == C ? S : (N > Pos && S[Pos] == C ? S + Pos : null)
1371     //   ^Sel2                   ^Sel1 are denoted above.
1372     // The latter makes it also possible to fold strchr() calls with strings
1373     // of the same characters.
1374     Type *SizeTy = Size->getType();
1375     Type *Int8Ty = B.getInt8Ty();
1376 
1377     // Slice off the sought character's high end bits.
1378     CharVal = B.CreateTrunc(CharVal, Int8Ty);
1379 
1380     Value *Sel1 = NullPtr;
1381     if (Pos != StringRef::npos) {
1382       // Handle two consecutive sequences of the same characters.
1383       Value *PosVal = ConstantInt::get(SizeTy, Pos);
1384       Value *StrPos = ConstantInt::get(Int8Ty, Str[Pos]);
1385       Value *CEqSPos = B.CreateICmpEQ(CharVal, StrPos);
1386       Value *NGtPos = B.CreateICmp(ICmpInst::ICMP_UGT, Size, PosVal);
1387       Value *And = B.CreateAnd(CEqSPos, NGtPos);
1388       Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, PosVal);
1389       Sel1 = B.CreateSelect(And, SrcPlus, NullPtr, "memchr.sel1");
1390     }
1391 
1392     Value *Str0 = ConstantInt::get(Int8Ty, Str[0]);
1393     Value *CEqS0 = B.CreateICmpEQ(Str0, CharVal);
1394     Value *NNeZ = B.CreateICmpNE(Size, ConstantInt::get(SizeTy, 0));
1395     Value *And = B.CreateAnd(NNeZ, CEqS0);
1396     return B.CreateSelect(And, SrcStr, Sel1, "memchr.sel2");
1397   }
1398 
1399   if (!LenC) {
1400     if (isOnlyUsedInEqualityComparison(CI, SrcStr))
1401       // S is dereferenceable so it's safe to load from it and fold
1402       //   memchr(S, C, N) == S to N && *S == C for any C and N.
1403       // TODO: This is safe even for nonconstant S.
1404       return memChrToCharCompare(CI, Size, B, DL);
1405 
1406     // From now on we need a constant length and constant array.
1407     return nullptr;
1408   }
1409 
1410   bool OptForSize = CI->getFunction()->hasOptSize() ||
1411                     llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
1412                                                 PGSOQueryType::IRPass);
1413 
1414   // If the char is variable but the input str and length are not we can turn
1415   // this memchr call into a simple bit field test. Of course this only works
1416   // when the return value is only checked against null.
1417   //
1418   // It would be really nice to reuse switch lowering here but we can't change
1419   // the CFG at this point.
1420   //
1421   // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n')))
1422   // != 0
1423   //   after bounds check.
1424   if (OptForSize || Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI))
1425     return nullptr;
1426 
1427   unsigned char Max =
1428       *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()),
1429                         reinterpret_cast<const unsigned char *>(Str.end()));
1430 
1431   // Make sure the bit field we're about to create fits in a register on the
1432   // target.
1433   // FIXME: On a 64 bit architecture this prevents us from using the
1434   // interesting range of alpha ascii chars. We could do better by emitting
1435   // two bitfields or shifting the range by 64 if no lower chars are used.
1436   if (!DL.fitsInLegalInteger(Max + 1)) {
1437     // Build chain of ORs
1438     // Transform:
1439     //    memchr("abcd", C, 4) != nullptr
1440     // to:
1441     //    (C == 'a' || C == 'b' || C == 'c' || C == 'd') != 0
1442     std::string SortedStr = Str.str();
1443     llvm::sort(SortedStr);
1444     // Compute the number of of non-contiguous ranges.
1445     unsigned NonContRanges = 1;
1446     for (size_t i = 1; i < SortedStr.size(); ++i) {
1447       if (SortedStr[i] > SortedStr[i - 1] + 1) {
1448         NonContRanges++;
1449       }
1450     }
1451 
1452     // Restrict this optimization to profitable cases with one or two range
1453     // checks.
1454     if (NonContRanges > 2)
1455       return nullptr;
1456 
1457     // Slice off the character's high end bits.
1458     CharVal = B.CreateTrunc(CharVal, B.getInt8Ty());
1459 
1460     SmallVector<Value *> CharCompares;
1461     for (unsigned char C : SortedStr)
1462       CharCompares.push_back(B.CreateICmpEQ(CharVal, B.getInt8(C)));
1463 
1464     return B.CreateIntToPtr(B.CreateOr(CharCompares), CI->getType());
1465   }
1466 
1467   // For the bit field use a power-of-2 type with at least 8 bits to avoid
1468   // creating unnecessary illegal types.
1469   unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max));
1470 
1471   // Now build the bit field.
1472   APInt Bitfield(Width, 0);
1473   for (char C : Str)
1474     Bitfield.setBit((unsigned char)C);
1475   Value *BitfieldC = B.getInt(Bitfield);
1476 
1477   // Adjust width of "C" to the bitfield width, then mask off the high bits.
1478   Value *C = B.CreateZExtOrTrunc(CharVal, BitfieldC->getType());
1479   C = B.CreateAnd(C, B.getIntN(Width, 0xFF));
1480 
1481   // First check that the bit field access is within bounds.
1482   Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width),
1483                                "memchr.bounds");
1484 
1485   // Create code that checks if the given bit is set in the field.
1486   Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C);
1487   Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits");
1488 
1489   // Finally merge both checks and cast to pointer type. The inttoptr
1490   // implicitly zexts the i1 to intptr type.
1491   return B.CreateIntToPtr(B.CreateLogicalAnd(Bounds, Bits, "memchr"),
1492                           CI->getType());
1493 }
1494 
1495 // Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant
1496 // arrays LHS and RHS and nonconstant Size.
1497 static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS,
1498                                     Value *Size, bool StrNCmp,
1499                                     IRBuilderBase &B, const DataLayout &DL) {
1500   if (LHS == RHS) // memcmp(s,s,x) -> 0
1501     return Constant::getNullValue(CI->getType());
1502 
1503   StringRef LStr, RStr;
1504   if (!getConstantStringInfo(LHS, LStr, /*TrimAtNul=*/false) ||
1505       !getConstantStringInfo(RHS, RStr, /*TrimAtNul=*/false))
1506     return nullptr;
1507 
1508   // If the contents of both constant arrays are known, fold a call to
1509   // memcmp(A, B, N) to
1510   //   N <= Pos ? 0 : (A < B ? -1 : B < A ? +1 : 0)
1511   // where Pos is the first mismatch between A and B, determined below.
1512 
1513   uint64_t Pos = 0;
1514   Value *Zero = ConstantInt::get(CI->getType(), 0);
1515   for (uint64_t MinSize = std::min(LStr.size(), RStr.size()); ; ++Pos) {
1516     if (Pos == MinSize ||
1517         (StrNCmp && (LStr[Pos] == '\0' && RStr[Pos] == '\0'))) {
1518       // One array is a leading part of the other of equal or greater
1519       // size, or for strncmp, the arrays are equal strings.
1520       // Fold the result to zero.  Size is assumed to be in bounds, since
1521       // otherwise the call would be undefined.
1522       return Zero;
1523     }
1524 
1525     if (LStr[Pos] != RStr[Pos])
1526       break;
1527   }
1528 
1529   // Normalize the result.
1530   typedef unsigned char UChar;
1531   int IRes = UChar(LStr[Pos]) < UChar(RStr[Pos]) ? -1 : 1;
1532   Value *MaxSize = ConstantInt::get(Size->getType(), Pos);
1533   Value *Cmp = B.CreateICmp(ICmpInst::ICMP_ULE, Size, MaxSize);
1534   Value *Res = ConstantInt::get(CI->getType(), IRes);
1535   return B.CreateSelect(Cmp, Zero, Res);
1536 }
1537 
1538 // Optimize a memcmp call CI with constant size Len.
1539 static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
1540                                          uint64_t Len, IRBuilderBase &B,
1541                                          const DataLayout &DL) {
1542   if (Len == 0) // memcmp(s1,s2,0) -> 0
1543     return Constant::getNullValue(CI->getType());
1544 
1545   // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
1546   if (Len == 1) {
1547     Value *LHSV = B.CreateZExt(B.CreateLoad(B.getInt8Ty(), LHS, "lhsc"),
1548                                CI->getType(), "lhsv");
1549     Value *RHSV = B.CreateZExt(B.CreateLoad(B.getInt8Ty(), RHS, "rhsc"),
1550                                CI->getType(), "rhsv");
1551     return B.CreateSub(LHSV, RHSV, "chardiff");
1552   }
1553 
1554   // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
1555   // TODO: The case where both inputs are constants does not need to be limited
1556   // to legal integers or equality comparison. See block below this.
1557   if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
1558     IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
1559     Align PrefAlignment = DL.getPrefTypeAlign(IntType);
1560 
1561     // First, see if we can fold either argument to a constant.
1562     Value *LHSV = nullptr;
1563     if (auto *LHSC = dyn_cast<Constant>(LHS))
1564       LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL);
1565 
1566     Value *RHSV = nullptr;
1567     if (auto *RHSC = dyn_cast<Constant>(RHS))
1568       RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL);
1569 
1570     // Don't generate unaligned loads. If either source is constant data,
1571     // alignment doesn't matter for that source because there is no load.
1572     if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) &&
1573         (RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) {
1574       if (!LHSV)
1575         LHSV = B.CreateLoad(IntType, LHS, "lhsv");
1576       if (!RHSV)
1577         RHSV = B.CreateLoad(IntType, RHS, "rhsv");
1578       return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
1579     }
1580   }
1581 
1582   return nullptr;
1583 }
1584 
1585 // Most simplifications for memcmp also apply to bcmp.
1586 Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
1587                                                    IRBuilderBase &B) {
1588   Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
1589   Value *Size = CI->getArgOperand(2);
1590 
1591   annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
1592 
1593   if (Value *Res = optimizeMemCmpVarSize(CI, LHS, RHS, Size, false, B, DL))
1594     return Res;
1595 
1596   // Handle constant Size.
1597   ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
1598   if (!LenC)
1599     return nullptr;
1600 
1601   return optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL);
1602 }
1603 
1604 Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilderBase &B) {
1605   Module *M = CI->getModule();
1606   if (Value *V = optimizeMemCmpBCmpCommon(CI, B))
1607     return V;
1608 
1609   // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
1610   // bcmp can be more efficient than memcmp because it only has to know that
1611   // there is a difference, not how different one is to the other.
1612   if (isLibFuncEmittable(M, TLI, LibFunc_bcmp) &&
1613       isOnlyUsedInZeroEqualityComparison(CI)) {
1614     Value *LHS = CI->getArgOperand(0);
1615     Value *RHS = CI->getArgOperand(1);
1616     Value *Size = CI->getArgOperand(2);
1617     return copyFlags(*CI, emitBCmp(LHS, RHS, Size, B, DL, TLI));
1618   }
1619 
1620   return nullptr;
1621 }
1622 
1623 Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilderBase &B) {
1624   return optimizeMemCmpBCmpCommon(CI, B);
1625 }
1626 
1627 Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilderBase &B) {
1628   Value *Size = CI->getArgOperand(2);
1629   annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
1630   if (isa<IntrinsicInst>(CI))
1631     return nullptr;
1632 
1633   // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
1634   CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align(1),
1635                                    CI->getArgOperand(1), Align(1), Size);
1636   mergeAttributesAndFlags(NewCI, *CI);
1637   return CI->getArgOperand(0);
1638 }
1639 
1640 Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilderBase &B) {
1641   Value *Dst = CI->getArgOperand(0);
1642   Value *Src = CI->getArgOperand(1);
1643   ConstantInt *StopChar = dyn_cast<ConstantInt>(CI->getArgOperand(2));
1644   ConstantInt *N = dyn_cast<ConstantInt>(CI->getArgOperand(3));
1645   StringRef SrcStr;
1646   if (CI->use_empty() && Dst == Src)
1647     return Dst;
1648   // memccpy(d, s, c, 0) -> nullptr
1649   if (N) {
1650     if (N->isNullValue())
1651       return Constant::getNullValue(CI->getType());
1652     if (!getConstantStringInfo(Src, SrcStr, /*TrimAtNul=*/false) ||
1653         // TODO: Handle zeroinitializer.
1654         !StopChar)
1655       return nullptr;
1656   } else {
1657     return nullptr;
1658   }
1659 
1660   // Wrap arg 'c' of type int to char
1661   size_t Pos = SrcStr.find(StopChar->getSExtValue() & 0xFF);
1662   if (Pos == StringRef::npos) {
1663     if (N->getZExtValue() <= SrcStr.size()) {
1664       copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1),
1665                                     CI->getArgOperand(3)));
1666       return Constant::getNullValue(CI->getType());
1667     }
1668     return nullptr;
1669   }
1670 
1671   Value *NewN =
1672       ConstantInt::get(N->getType(), std::min(uint64_t(Pos + 1), N->getZExtValue()));
1673   // memccpy -> llvm.memcpy
1674   copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1), NewN));
1675   return Pos + 1 <= N->getZExtValue()
1676              ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, NewN)
1677              : Constant::getNullValue(CI->getType());
1678 }
1679 
1680 Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilderBase &B) {
1681   Value *Dst = CI->getArgOperand(0);
1682   Value *N = CI->getArgOperand(2);
1683   // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n
1684   CallInst *NewCI =
1685       B.CreateMemCpy(Dst, Align(1), CI->getArgOperand(1), Align(1), N);
1686   // Propagate attributes, but memcpy has no return value, so make sure that
1687   // any return attributes are compliant.
1688   // TODO: Attach return value attributes to the 1st operand to preserve them?
1689   mergeAttributesAndFlags(NewCI, *CI);
1690   return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
1691 }
1692 
1693 Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilderBase &B) {
1694   Value *Size = CI->getArgOperand(2);
1695   annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
1696   if (isa<IntrinsicInst>(CI))
1697     return nullptr;
1698 
1699   // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n)
1700   CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align(1),
1701                                     CI->getArgOperand(1), Align(1), Size);
1702   mergeAttributesAndFlags(NewCI, *CI);
1703   return CI->getArgOperand(0);
1704 }
1705 
1706 Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilderBase &B) {
1707   Value *Size = CI->getArgOperand(2);
1708   annotateNonNullAndDereferenceable(CI, 0, Size, DL);
1709   if (isa<IntrinsicInst>(CI))
1710     return nullptr;
1711 
1712   // memset(p, v, n) -> llvm.memset(align 1 p, v, n)
1713   Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
1714   CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1));
1715   mergeAttributesAndFlags(NewCI, *CI);
1716   return CI->getArgOperand(0);
1717 }
1718 
1719 Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) {
1720   if (isa<ConstantPointerNull>(CI->getArgOperand(0)))
1721     return copyFlags(*CI, emitMalloc(CI->getArgOperand(1), B, DL, TLI));
1722 
1723   return nullptr;
1724 }
1725 
1726 // When enabled, replace operator new() calls marked with a hot or cold memprof
1727 // attribute with an operator new() call that takes a __hot_cold_t parameter.
1728 // Currently this is supported by the open source version of tcmalloc, see:
1729 // https://github.com/google/tcmalloc/blob/master/tcmalloc/new_extension.h
1730 Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,
1731                                       LibFunc &Func) {
1732   if (!OptimizeHotColdNew)
1733     return nullptr;
1734 
1735   uint8_t HotCold;
1736   if (CI->getAttributes().getFnAttr("memprof").getValueAsString() == "cold")
1737     HotCold = ColdNewHintValue;
1738   else if (CI->getAttributes().getFnAttr("memprof").getValueAsString() ==
1739            "notcold")
1740     HotCold = NotColdNewHintValue;
1741   else if (CI->getAttributes().getFnAttr("memprof").getValueAsString() == "hot")
1742     HotCold = HotNewHintValue;
1743   else
1744     return nullptr;
1745 
1746   // For calls that already pass a hot/cold hint, only update the hint if
1747   // directed by OptimizeExistingHotColdNew. For other calls to new, add a hint
1748   // if cold or hot, and leave as-is for default handling if "notcold" aka warm.
1749   // Note that in cases where we decide it is "notcold", it might be slightly
1750   // better to replace the hinted call with a non hinted call, to avoid the
1751   // extra parameter and the if condition check of the hint value in the
1752   // allocator. This can be considered in the future.
1753   switch (Func) {
1754   case LibFunc_Znwm12__hot_cold_t:
1755     if (OptimizeExistingHotColdNew)
1756       return emitHotColdNew(CI->getArgOperand(0), B, TLI,
1757                             LibFunc_Znwm12__hot_cold_t, HotCold);
1758     break;
1759   case LibFunc_Znwm:
1760     if (HotCold != NotColdNewHintValue)
1761       return emitHotColdNew(CI->getArgOperand(0), B, TLI,
1762                             LibFunc_Znwm12__hot_cold_t, HotCold);
1763     break;
1764   case LibFunc_Znam12__hot_cold_t:
1765     if (OptimizeExistingHotColdNew)
1766       return emitHotColdNew(CI->getArgOperand(0), B, TLI,
1767                             LibFunc_Znam12__hot_cold_t, HotCold);
1768     break;
1769   case LibFunc_Znam:
1770     if (HotCold != NotColdNewHintValue)
1771       return emitHotColdNew(CI->getArgOperand(0), B, TLI,
1772                             LibFunc_Znam12__hot_cold_t, HotCold);
1773     break;
1774   case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
1775     if (OptimizeExistingHotColdNew)
1776       return emitHotColdNewNoThrow(
1777           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1778           LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold);
1779     break;
1780   case LibFunc_ZnwmRKSt9nothrow_t:
1781     if (HotCold != NotColdNewHintValue)
1782       return emitHotColdNewNoThrow(
1783           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1784           LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold);
1785     break;
1786   case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
1787     if (OptimizeExistingHotColdNew)
1788       return emitHotColdNewNoThrow(
1789           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1790           LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold);
1791     break;
1792   case LibFunc_ZnamRKSt9nothrow_t:
1793     if (HotCold != NotColdNewHintValue)
1794       return emitHotColdNewNoThrow(
1795           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1796           LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold);
1797     break;
1798   case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
1799     if (OptimizeExistingHotColdNew)
1800       return emitHotColdNewAligned(
1801           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1802           LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold);
1803     break;
1804   case LibFunc_ZnwmSt11align_val_t:
1805     if (HotCold != NotColdNewHintValue)
1806       return emitHotColdNewAligned(
1807           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1808           LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold);
1809     break;
1810   case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
1811     if (OptimizeExistingHotColdNew)
1812       return emitHotColdNewAligned(
1813           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1814           LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold);
1815     break;
1816   case LibFunc_ZnamSt11align_val_t:
1817     if (HotCold != NotColdNewHintValue)
1818       return emitHotColdNewAligned(
1819           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1820           LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold);
1821     break;
1822   case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
1823     if (OptimizeExistingHotColdNew)
1824       return emitHotColdNewAlignedNoThrow(
1825           CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
1826           TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
1827           HotCold);
1828     break;
1829   case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
1830     if (HotCold != NotColdNewHintValue)
1831       return emitHotColdNewAlignedNoThrow(
1832           CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
1833           TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
1834           HotCold);
1835     break;
1836   case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
1837     if (OptimizeExistingHotColdNew)
1838       return emitHotColdNewAlignedNoThrow(
1839           CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
1840           TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
1841           HotCold);
1842     break;
1843   case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
1844     if (HotCold != NotColdNewHintValue)
1845       return emitHotColdNewAlignedNoThrow(
1846           CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
1847           TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
1848           HotCold);
1849     break;
1850   case LibFunc_size_returning_new:
1851     if (HotCold != NotColdNewHintValue)
1852       return emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI,
1853                                          LibFunc_size_returning_new_hot_cold,
1854                                          HotCold);
1855     break;
1856   case LibFunc_size_returning_new_hot_cold:
1857     if (OptimizeExistingHotColdNew)
1858       return emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI,
1859                                          LibFunc_size_returning_new_hot_cold,
1860                                          HotCold);
1861     break;
1862   case LibFunc_size_returning_new_aligned:
1863     if (HotCold != NotColdNewHintValue)
1864       return emitHotColdSizeReturningNewAligned(
1865           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1866           LibFunc_size_returning_new_aligned_hot_cold, HotCold);
1867     break;
1868   case LibFunc_size_returning_new_aligned_hot_cold:
1869     if (OptimizeExistingHotColdNew)
1870       return emitHotColdSizeReturningNewAligned(
1871           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
1872           LibFunc_size_returning_new_aligned_hot_cold, HotCold);
1873     break;
1874   default:
1875     return nullptr;
1876   }
1877   return nullptr;
1878 }
1879 
1880 //===----------------------------------------------------------------------===//
1881 // Math Library Optimizations
1882 //===----------------------------------------------------------------------===//
1883 
1884 // Replace a libcall \p CI with a call to intrinsic \p IID
1885 static Value *replaceUnaryCall(CallInst *CI, IRBuilderBase &B,
1886                                Intrinsic::ID IID) {
1887   CallInst *NewCall = B.CreateUnaryIntrinsic(IID, CI->getArgOperand(0), CI);
1888   NewCall->takeName(CI);
1889   return copyFlags(*CI, NewCall);
1890 }
1891 
1892 /// Return a variant of Val with float type.
1893 /// Currently this works in two cases: If Val is an FPExtension of a float
1894 /// value to something bigger, simply return the operand.
1895 /// If Val is a ConstantFP but can be converted to a float ConstantFP without
1896 /// loss of precision do so.
1897 static Value *valueHasFloatPrecision(Value *Val) {
1898   if (FPExtInst *Cast = dyn_cast<FPExtInst>(Val)) {
1899     Value *Op = Cast->getOperand(0);
1900     if (Op->getType()->isFloatTy())
1901       return Op;
1902   }
1903   if (ConstantFP *Const = dyn_cast<ConstantFP>(Val)) {
1904     APFloat F = Const->getValueAPF();
1905     bool losesInfo;
1906     (void)F.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
1907                     &losesInfo);
1908     if (!losesInfo)
1909       return ConstantFP::get(Const->getContext(), F);
1910   }
1911   return nullptr;
1912 }
1913 
1914 /// Shrink double -> float functions.
1915 static Value *optimizeDoubleFP(CallInst *CI, IRBuilderBase &B,
1916                                bool isBinary, const TargetLibraryInfo *TLI,
1917                                bool isPrecise = false) {
1918   Function *CalleeFn = CI->getCalledFunction();
1919   if (!CI->getType()->isDoubleTy() || !CalleeFn)
1920     return nullptr;
1921 
1922   // If not all the uses of the function are converted to float, then bail out.
1923   // This matters if the precision of the result is more important than the
1924   // precision of the arguments.
1925   if (isPrecise)
1926     for (User *U : CI->users()) {
1927       FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
1928       if (!Cast || !Cast->getType()->isFloatTy())
1929         return nullptr;
1930     }
1931 
1932   // If this is something like 'g((double) float)', convert to 'gf(float)'.
1933   Value *V[2];
1934   V[0] = valueHasFloatPrecision(CI->getArgOperand(0));
1935   V[1] = isBinary ? valueHasFloatPrecision(CI->getArgOperand(1)) : nullptr;
1936   if (!V[0] || (isBinary && !V[1]))
1937     return nullptr;
1938 
1939   // If call isn't an intrinsic, check that it isn't within a function with the
1940   // same name as the float version of this call, otherwise the result is an
1941   // infinite loop.  For example, from MinGW-w64:
1942   //
1943   // float expf(float val) { return (float) exp((double) val); }
1944   StringRef CalleeName = CalleeFn->getName();
1945   bool IsIntrinsic = CalleeFn->isIntrinsic();
1946   if (!IsIntrinsic) {
1947     StringRef CallerName = CI->getFunction()->getName();
1948     if (!CallerName.empty() && CallerName.back() == 'f' &&
1949         CallerName.size() == (CalleeName.size() + 1) &&
1950         CallerName.starts_with(CalleeName))
1951       return nullptr;
1952   }
1953 
1954   // Propagate the math semantics from the current function to the new function.
1955   IRBuilderBase::FastMathFlagGuard Guard(B);
1956   B.setFastMathFlags(CI->getFastMathFlags());
1957 
1958   // g((double) float) -> (double) gf(float)
1959   Value *R;
1960   if (IsIntrinsic) {
1961     Module *M = CI->getModule();
1962     Intrinsic::ID IID = CalleeFn->getIntrinsicID();
1963     Function *Fn = Intrinsic::getOrInsertDeclaration(M, IID, B.getFloatTy());
1964     R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]);
1965   } else {
1966     AttributeList CalleeAttrs = CalleeFn->getAttributes();
1967     R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], TLI, CalleeName, B,
1968                                          CalleeAttrs)
1969                  : emitUnaryFloatFnCall(V[0], TLI, CalleeName, B, CalleeAttrs);
1970   }
1971   return B.CreateFPExt(R, B.getDoubleTy());
1972 }
1973 
1974 /// Shrink double -> float for unary functions.
1975 static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilderBase &B,
1976                                     const TargetLibraryInfo *TLI,
1977                                     bool isPrecise = false) {
1978   return optimizeDoubleFP(CI, B, false, TLI, isPrecise);
1979 }
1980 
1981 /// Shrink double -> float for binary functions.
1982 static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilderBase &B,
1983                                      const TargetLibraryInfo *TLI,
1984                                      bool isPrecise = false) {
1985   return optimizeDoubleFP(CI, B, true, TLI, isPrecise);
1986 }
1987 
1988 // cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z)))
1989 Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilderBase &B) {
1990   Value *Real, *Imag;
1991 
1992   if (CI->arg_size() == 1) {
1993 
1994     if (!CI->isFast())
1995       return nullptr;
1996 
1997     Value *Op = CI->getArgOperand(0);
1998     assert(Op->getType()->isArrayTy() && "Unexpected signature for cabs!");
1999 
2000     Real = B.CreateExtractValue(Op, 0, "real");
2001     Imag = B.CreateExtractValue(Op, 1, "imag");
2002 
2003   } else {
2004     assert(CI->arg_size() == 2 && "Unexpected signature for cabs!");
2005 
2006     Real = CI->getArgOperand(0);
2007     Imag = CI->getArgOperand(1);
2008 
2009     // if real or imaginary part is zero, simplify to abs(cimag(z))
2010     // or abs(creal(z))
2011     Value *AbsOp = nullptr;
2012     if (ConstantFP *ConstReal = dyn_cast<ConstantFP>(Real)) {
2013       if (ConstReal->isZero())
2014         AbsOp = Imag;
2015 
2016     } else if (ConstantFP *ConstImag = dyn_cast<ConstantFP>(Imag)) {
2017       if (ConstImag->isZero())
2018         AbsOp = Real;
2019     }
2020 
2021     if (AbsOp) {
2022       IRBuilderBase::FastMathFlagGuard Guard(B);
2023       B.setFastMathFlags(CI->getFastMathFlags());
2024 
2025       return copyFlags(
2026           *CI, B.CreateUnaryIntrinsic(Intrinsic::fabs, AbsOp, nullptr, "cabs"));
2027     }
2028 
2029     if (!CI->isFast())
2030       return nullptr;
2031   }
2032 
2033   // Propagate fast-math flags from the existing call to new instructions.
2034   IRBuilderBase::FastMathFlagGuard Guard(B);
2035   B.setFastMathFlags(CI->getFastMathFlags());
2036 
2037   Value *RealReal = B.CreateFMul(Real, Real);
2038   Value *ImagImag = B.CreateFMul(Imag, Imag);
2039 
2040   return copyFlags(*CI, B.CreateUnaryIntrinsic(Intrinsic::sqrt,
2041                                                B.CreateFAdd(RealReal, ImagImag),
2042                                                nullptr, "cabs"));
2043 }
2044 
2045 // Return a properly extended integer (DstWidth bits wide) if the operation is
2046 // an itofp.
2047 static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
2048   if (isa<SIToFPInst>(I2F) || isa<UIToFPInst>(I2F)) {
2049     Value *Op = cast<Instruction>(I2F)->getOperand(0);
2050     // Make sure that the exponent fits inside an "int" of size DstWidth,
2051     // thus avoiding any range issues that FP has not.
2052     unsigned BitWidth = Op->getType()->getScalarSizeInBits();
2053     if (BitWidth < DstWidth || (BitWidth == DstWidth && isa<SIToFPInst>(I2F))) {
2054       Type *IntTy = Op->getType()->getWithNewBitWidth(DstWidth);
2055       return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, IntTy)
2056                                   : B.CreateZExt(Op, IntTy);
2057     }
2058   }
2059 
2060   return nullptr;
2061 }
2062 
2063 /// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
2064 /// ldexp(1.0, x) for pow(2.0, itofp(x)); exp2(n * x) for pow(2.0 ** n, x);
2065 /// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x).
2066 Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
2067   Module *M = Pow->getModule();
2068   Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
2069   Type *Ty = Pow->getType();
2070   bool Ignored;
2071 
2072   // Evaluate special cases related to a nested function as the base.
2073 
2074   // pow(exp(x), y) -> exp(x * y)
2075   // pow(exp2(x), y) -> exp2(x * y)
2076   // If exp{,2}() is used only once, it is better to fold two transcendental
2077   // math functions into one.  If used again, exp{,2}() would still have to be
2078   // called with the original argument, then keep both original transcendental
2079   // functions.  However, this transformation is only safe with fully relaxed
2080   // math semantics, since, besides rounding differences, it changes overflow
2081   // and underflow behavior quite dramatically.  For example:
2082   //   pow(exp(1000), 0.001) = pow(inf, 0.001) = inf
2083   // Whereas:
2084   //   exp(1000 * 0.001) = exp(1)
2085   // TODO: Loosen the requirement for fully relaxed math semantics.
2086   // TODO: Handle exp10() when more targets have it available.
2087   CallInst *BaseFn = dyn_cast<CallInst>(Base);
2088   if (BaseFn && BaseFn->hasOneUse() && BaseFn->isFast() && Pow->isFast()) {
2089     LibFunc LibFn;
2090 
2091     Function *CalleeFn = BaseFn->getCalledFunction();
2092     if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) &&
2093         isLibFuncEmittable(M, TLI, LibFn)) {
2094       StringRef ExpName;
2095       Intrinsic::ID ID;
2096       Value *ExpFn;
2097       LibFunc LibFnFloat, LibFnDouble, LibFnLongDouble;
2098 
2099       switch (LibFn) {
2100       default:
2101         return nullptr;
2102       case LibFunc_expf:
2103       case LibFunc_exp:
2104       case LibFunc_expl:
2105         ExpName = TLI->getName(LibFunc_exp);
2106         ID = Intrinsic::exp;
2107         LibFnFloat = LibFunc_expf;
2108         LibFnDouble = LibFunc_exp;
2109         LibFnLongDouble = LibFunc_expl;
2110         break;
2111       case LibFunc_exp2f:
2112       case LibFunc_exp2:
2113       case LibFunc_exp2l:
2114         ExpName = TLI->getName(LibFunc_exp2);
2115         ID = Intrinsic::exp2;
2116         LibFnFloat = LibFunc_exp2f;
2117         LibFnDouble = LibFunc_exp2;
2118         LibFnLongDouble = LibFunc_exp2l;
2119         break;
2120       }
2121 
2122       // Create new exp{,2}() with the product as its argument.
2123       Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul");
2124       ExpFn = BaseFn->doesNotAccessMemory()
2125                   ? B.CreateUnaryIntrinsic(ID, FMul, nullptr, ExpName)
2126                   : emitUnaryFloatFnCall(FMul, TLI, LibFnDouble, LibFnFloat,
2127                                          LibFnLongDouble, B,
2128                                          BaseFn->getAttributes());
2129 
2130       // Since the new exp{,2}() is different from the original one, dead code
2131       // elimination cannot be trusted to remove it, since it may have side
2132       // effects (e.g., errno).  When the only consumer for the original
2133       // exp{,2}() is pow(), then it has to be explicitly erased.
2134       substituteInParent(BaseFn, ExpFn);
2135       return ExpFn;
2136     }
2137   }
2138 
2139   // Evaluate special cases related to a constant base.
2140 
2141   const APFloat *BaseF;
2142   if (!match(Base, m_APFloat(BaseF)))
2143     return nullptr;
2144 
2145   AttributeList NoAttrs; // Attributes are only meaningful on the original call
2146 
2147   const bool UseIntrinsic = Pow->doesNotAccessMemory();
2148 
2149   // pow(2.0, itofp(x)) -> ldexp(1.0, x)
2150   if ((UseIntrinsic || !Ty->isVectorTy()) && BaseF->isExactlyValue(2.0) &&
2151       (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) &&
2152       (UseIntrinsic ||
2153        hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl))) {
2154 
2155     // TODO: Shouldn't really need to depend on getIntToFPVal for intrinsic. Can
2156     // just directly use the original integer type.
2157     if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize())) {
2158       Constant *One = ConstantFP::get(Ty, 1.0);
2159 
2160       if (UseIntrinsic) {
2161         return copyFlags(*Pow, B.CreateIntrinsic(Intrinsic::ldexp,
2162                                                  {Ty, ExpoI->getType()},
2163                                                  {One, ExpoI}, Pow, "exp2"));
2164       }
2165 
2166       return copyFlags(*Pow, emitBinaryFloatFnCall(
2167                                  One, ExpoI, TLI, LibFunc_ldexp, LibFunc_ldexpf,
2168                                  LibFunc_ldexpl, B, NoAttrs));
2169     }
2170   }
2171 
2172   // pow(2.0 ** n, x) -> exp2(n * x)
2173   if (hasFloatFn(M, TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
2174     APFloat BaseR = APFloat(1.0);
2175     BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
2176     BaseR = BaseR / *BaseF;
2177     bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger();
2178     const APFloat *NF = IsReciprocal ? &BaseR : BaseF;
2179     APSInt NI(64, false);
2180     if ((IsInteger || IsReciprocal) &&
2181         NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) ==
2182             APFloat::opOK &&
2183         NI > 1 && NI.isPowerOf2()) {
2184       double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
2185       Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
2186       if (Pow->doesNotAccessMemory())
2187         return copyFlags(*Pow, B.CreateUnaryIntrinsic(Intrinsic::exp2, FMul,
2188                                                       nullptr, "exp2"));
2189       else
2190         return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2,
2191                                                     LibFunc_exp2f,
2192                                                     LibFunc_exp2l, B, NoAttrs));
2193     }
2194   }
2195 
2196   // pow(10.0, x) -> exp10(x)
2197   if (BaseF->isExactlyValue(10.0) &&
2198       hasFloatFn(M, TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) {
2199 
2200     if (Pow->doesNotAccessMemory()) {
2201       CallInst *NewExp10 =
2202           B.CreateIntrinsic(Intrinsic::exp10, {Ty}, {Expo}, Pow, "exp10");
2203       return copyFlags(*Pow, NewExp10);
2204     }
2205 
2206     return copyFlags(*Pow, emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10,
2207                                                 LibFunc_exp10f, LibFunc_exp10l,
2208                                                 B, NoAttrs));
2209   }
2210 
2211   // pow(x, y) -> exp2(log2(x) * y)
2212   if (Pow->hasApproxFunc() && Pow->hasNoNaNs() && BaseF->isFiniteNonZero() &&
2213       !BaseF->isNegative()) {
2214     // pow(1, inf) is defined to be 1 but exp2(log2(1) * inf) evaluates to NaN.
2215     // Luckily optimizePow has already handled the x == 1 case.
2216     assert(!match(Base, m_FPOne()) &&
2217            "pow(1.0, y) should have been simplified earlier!");
2218 
2219     Value *Log = nullptr;
2220     if (Ty->isFloatTy())
2221       Log = ConstantFP::get(Ty, std::log2(BaseF->convertToFloat()));
2222     else if (Ty->isDoubleTy())
2223       Log = ConstantFP::get(Ty, std::log2(BaseF->convertToDouble()));
2224 
2225     if (Log) {
2226       Value *FMul = B.CreateFMul(Log, Expo, "mul");
2227       if (Pow->doesNotAccessMemory())
2228         return copyFlags(*Pow, B.CreateUnaryIntrinsic(Intrinsic::exp2, FMul,
2229                                                       nullptr, "exp2"));
2230       else if (hasFloatFn(M, TLI, Ty, LibFunc_exp2, LibFunc_exp2f,
2231                           LibFunc_exp2l))
2232         return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2,
2233                                                     LibFunc_exp2f,
2234                                                     LibFunc_exp2l, B, NoAttrs));
2235     }
2236   }
2237 
2238   return nullptr;
2239 }
2240 
2241 static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
2242                           Module *M, IRBuilderBase &B,
2243                           const TargetLibraryInfo *TLI) {
2244   // If errno is never set, then use the intrinsic for sqrt().
2245   if (NoErrno)
2246     return B.CreateUnaryIntrinsic(Intrinsic::sqrt, V, nullptr, "sqrt");
2247 
2248   // Otherwise, use the libcall for sqrt().
2249   if (hasFloatFn(M, TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf,
2250                  LibFunc_sqrtl))
2251     // TODO: We also should check that the target can in fact lower the sqrt()
2252     // libcall. We currently have no way to ask this question, so we ask if
2253     // the target has a sqrt() libcall, which is not exactly the same.
2254     return emitUnaryFloatFnCall(V, TLI, LibFunc_sqrt, LibFunc_sqrtf,
2255                                 LibFunc_sqrtl, B, Attrs);
2256 
2257   return nullptr;
2258 }
2259 
2260 /// Use square root in place of pow(x, +/-0.5).
2261 Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) {
2262   Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
2263   Module *Mod = Pow->getModule();
2264   Type *Ty = Pow->getType();
2265 
2266   const APFloat *ExpoF;
2267   if (!match(Expo, m_APFloat(ExpoF)) ||
2268       (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
2269     return nullptr;
2270 
2271   // Converting pow(X, -0.5) to 1/sqrt(X) may introduce an extra rounding step,
2272   // so that requires fast-math-flags (afn or reassoc).
2273   if (ExpoF->isNegative() && (!Pow->hasApproxFunc() && !Pow->hasAllowReassoc()))
2274     return nullptr;
2275 
2276   // If we have a pow() library call (accesses memory) and we can't guarantee
2277   // that the base is not an infinity, give up:
2278   // pow(-Inf, 0.5) is optionally required to have a result of +Inf (not setting
2279   // errno), but sqrt(-Inf) is required by various standards to set errno.
2280   if (!Pow->doesNotAccessMemory() && !Pow->hasNoInfs() &&
2281       !isKnownNeverInfinity(
2282           Base, 0, SimplifyQuery(DL, TLI, DT, AC, Pow, true, true, DC)))
2283     return nullptr;
2284 
2285   Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), Mod, B,
2286                      TLI);
2287   if (!Sqrt)
2288     return nullptr;
2289 
2290   // Handle signed zero base by expanding to fabs(sqrt(x)).
2291   if (!Pow->hasNoSignedZeros())
2292     Sqrt = B.CreateUnaryIntrinsic(Intrinsic::fabs, Sqrt, nullptr, "abs");
2293 
2294   Sqrt = copyFlags(*Pow, Sqrt);
2295 
2296   // Handle non finite base by expanding to
2297   // (x == -infinity ? +infinity : sqrt(x)).
2298   if (!Pow->hasNoInfs()) {
2299     Value *PosInf = ConstantFP::getInfinity(Ty),
2300           *NegInf = ConstantFP::getInfinity(Ty, true);
2301     Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf");
2302     Sqrt = B.CreateSelect(FCmp, PosInf, Sqrt);
2303   }
2304 
2305   // If the exponent is negative, then get the reciprocal.
2306   if (ExpoF->isNegative())
2307     Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal");
2308 
2309   return Sqrt;
2310 }
2311 
2312 static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M,
2313                                            IRBuilderBase &B) {
2314   Value *Args[] = {Base, Expo};
2315   Type *Types[] = {Base->getType(), Expo->getType()};
2316   return B.CreateIntrinsic(Intrinsic::powi, Types, Args);
2317 }
2318 
2319 Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
2320   Value *Base = Pow->getArgOperand(0);
2321   Value *Expo = Pow->getArgOperand(1);
2322   Function *Callee = Pow->getCalledFunction();
2323   StringRef Name = Callee->getName();
2324   Type *Ty = Pow->getType();
2325   Module *M = Pow->getModule();
2326   bool AllowApprox = Pow->hasApproxFunc();
2327   bool Ignored;
2328 
2329   // Propagate the math semantics from the call to any created instructions.
2330   IRBuilderBase::FastMathFlagGuard Guard(B);
2331   B.setFastMathFlags(Pow->getFastMathFlags());
2332   // Evaluate special cases related to the base.
2333 
2334   // pow(1.0, x) -> 1.0
2335   if (match(Base, m_FPOne()))
2336     return Base;
2337 
2338   if (Value *Exp = replacePowWithExp(Pow, B))
2339     return Exp;
2340 
2341   // Evaluate special cases related to the exponent.
2342 
2343   // pow(x, -1.0) -> 1.0 / x
2344   if (match(Expo, m_SpecificFP(-1.0)))
2345     return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal");
2346 
2347   // pow(x, +/-0.0) -> 1.0
2348   if (match(Expo, m_AnyZeroFP()))
2349     return ConstantFP::get(Ty, 1.0);
2350 
2351   // pow(x, 1.0) -> x
2352   if (match(Expo, m_FPOne()))
2353     return Base;
2354 
2355   // pow(x, 2.0) -> x * x
2356   if (match(Expo, m_SpecificFP(2.0)))
2357     return B.CreateFMul(Base, Base, "square");
2358 
2359   if (Value *Sqrt = replacePowWithSqrt(Pow, B))
2360     return Sqrt;
2361 
2362   // If we can approximate pow:
2363   // pow(x, n) -> powi(x, n) * sqrt(x) if n has exactly a 0.5 fraction
2364   // pow(x, n) -> powi(x, n) if n is a constant signed integer value
2365   const APFloat *ExpoF;
2366   if (AllowApprox && match(Expo, m_APFloat(ExpoF)) &&
2367       !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) {
2368     APFloat ExpoA(abs(*ExpoF));
2369     APFloat ExpoI(*ExpoF);
2370     Value *Sqrt = nullptr;
2371     if (!ExpoA.isInteger()) {
2372       APFloat Expo2 = ExpoA;
2373       // To check if ExpoA is an integer + 0.5, we add it to itself. If there
2374       // is no floating point exception and the result is an integer, then
2375       // ExpoA == integer + 0.5
2376       if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK)
2377         return nullptr;
2378 
2379       if (!Expo2.isInteger())
2380         return nullptr;
2381 
2382       if (ExpoI.roundToIntegral(APFloat::rmTowardNegative) !=
2383           APFloat::opInexact)
2384         return nullptr;
2385       if (!ExpoI.isInteger())
2386         return nullptr;
2387       ExpoF = &ExpoI;
2388 
2389       Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), M,
2390                          B, TLI);
2391       if (!Sqrt)
2392         return nullptr;
2393     }
2394 
2395     // 0.5 fraction is now optionally handled.
2396     // Do pow -> powi for remaining integer exponent
2397     APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false);
2398     if (ExpoF->isInteger() &&
2399         ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
2400             APFloat::opOK) {
2401       Value *PowI = copyFlags(
2402           *Pow,
2403           createPowWithIntegerExponent(
2404               Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo),
2405               M, B));
2406 
2407       if (PowI && Sqrt)
2408         return B.CreateFMul(PowI, Sqrt);
2409 
2410       return PowI;
2411     }
2412   }
2413 
2414   // powf(x, itofp(y)) -> powi(x, y)
2415   if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
2416     if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
2417       return copyFlags(*Pow, createPowWithIntegerExponent(Base, ExpoI, M, B));
2418   }
2419 
2420   // Shrink pow() to powf() if the arguments are single precision,
2421   // unless the result is expected to be double precision.
2422   if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) &&
2423       hasFloatVersion(M, Name)) {
2424     if (Value *Shrunk = optimizeBinaryDoubleFP(Pow, B, TLI, true))
2425       return Shrunk;
2426   }
2427 
2428   return nullptr;
2429 }
2430 
2431 Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
2432   Module *M = CI->getModule();
2433   Function *Callee = CI->getCalledFunction();
2434   StringRef Name = Callee->getName();
2435   Value *Ret = nullptr;
2436   if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) &&
2437       hasFloatVersion(M, Name))
2438     Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
2439 
2440   // If we have an llvm.exp2 intrinsic, emit the llvm.ldexp intrinsic. If we
2441   // have the libcall, emit the libcall.
2442   //
2443   // TODO: In principle we should be able to just always use the intrinsic for
2444   // any doesNotAccessMemory callsite.
2445 
2446   const bool UseIntrinsic = Callee->isIntrinsic();
2447   // Bail out for vectors because the code below only expects scalars.
2448   Type *Ty = CI->getType();
2449   if (!UseIntrinsic && Ty->isVectorTy())
2450     return Ret;
2451 
2452   // exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= IntSize
2453   // exp2(uitofp(x)) -> ldexp(1.0, zext(x))  if sizeof(x) < IntSize
2454   Value *Op = CI->getArgOperand(0);
2455   if ((isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op)) &&
2456       (UseIntrinsic ||
2457        hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl))) {
2458     if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize())) {
2459       Constant *One = ConstantFP::get(Ty, 1.0);
2460 
2461       if (UseIntrinsic) {
2462         return copyFlags(*CI, B.CreateIntrinsic(Intrinsic::ldexp,
2463                                                 {Ty, Exp->getType()},
2464                                                 {One, Exp}, CI));
2465       }
2466 
2467       IRBuilderBase::FastMathFlagGuard Guard(B);
2468       B.setFastMathFlags(CI->getFastMathFlags());
2469       return copyFlags(*CI, emitBinaryFloatFnCall(
2470                                 One, Exp, TLI, LibFunc_ldexp, LibFunc_ldexpf,
2471                                 LibFunc_ldexpl, B, AttributeList()));
2472     }
2473   }
2474 
2475   return Ret;
2476 }
2477 
2478 Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) {
2479   Module *M = CI->getModule();
2480 
2481   // If we can shrink the call to a float function rather than a double
2482   // function, do that first.
2483   Function *Callee = CI->getCalledFunction();
2484   StringRef Name = Callee->getName();
2485   if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(M, Name))
2486     if (Value *Ret = optimizeBinaryDoubleFP(CI, B, TLI))
2487       return Ret;
2488 
2489   // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to
2490   // the intrinsics for improved optimization (for example, vectorization).
2491   // No-signed-zeros is implied by the definitions of fmax/fmin themselves.
2492   // From the C standard draft WG14/N1256:
2493   // "Ideally, fmax would be sensitive to the sign of zero, for example
2494   // fmax(-0.0, +0.0) would return +0; however, implementation in software
2495   // might be impractical."
2496   IRBuilderBase::FastMathFlagGuard Guard(B);
2497   FastMathFlags FMF = CI->getFastMathFlags();
2498   FMF.setNoSignedZeros();
2499   B.setFastMathFlags(FMF);
2500 
2501   Intrinsic::ID IID = Callee->getName().starts_with("fmin") ? Intrinsic::minnum
2502                                                             : Intrinsic::maxnum;
2503   return copyFlags(*CI, B.CreateBinaryIntrinsic(IID, CI->getArgOperand(0),
2504                                                 CI->getArgOperand(1)));
2505 }
2506 
2507 Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
2508   Function *LogFn = Log->getCalledFunction();
2509   StringRef LogNm = LogFn->getName();
2510   Intrinsic::ID LogID = LogFn->getIntrinsicID();
2511   Module *Mod = Log->getModule();
2512   Type *Ty = Log->getType();
2513 
2514   if (UnsafeFPShrink && hasFloatVersion(Mod, LogNm))
2515     if (Value *Ret = optimizeUnaryDoubleFP(Log, B, TLI, true))
2516       return Ret;
2517 
2518   LibFunc LogLb, ExpLb, Exp2Lb, Exp10Lb, PowLb;
2519 
2520   // This is only applicable to log(), log2(), log10().
2521   if (TLI->getLibFunc(LogNm, LogLb)) {
2522     switch (LogLb) {
2523     case LibFunc_logf:
2524       LogID = Intrinsic::log;
2525       ExpLb = LibFunc_expf;
2526       Exp2Lb = LibFunc_exp2f;
2527       Exp10Lb = LibFunc_exp10f;
2528       PowLb = LibFunc_powf;
2529       break;
2530     case LibFunc_log:
2531       LogID = Intrinsic::log;
2532       ExpLb = LibFunc_exp;
2533       Exp2Lb = LibFunc_exp2;
2534       Exp10Lb = LibFunc_exp10;
2535       PowLb = LibFunc_pow;
2536       break;
2537     case LibFunc_logl:
2538       LogID = Intrinsic::log;
2539       ExpLb = LibFunc_expl;
2540       Exp2Lb = LibFunc_exp2l;
2541       Exp10Lb = LibFunc_exp10l;
2542       PowLb = LibFunc_powl;
2543       break;
2544     case LibFunc_log2f:
2545       LogID = Intrinsic::log2;
2546       ExpLb = LibFunc_expf;
2547       Exp2Lb = LibFunc_exp2f;
2548       Exp10Lb = LibFunc_exp10f;
2549       PowLb = LibFunc_powf;
2550       break;
2551     case LibFunc_log2:
2552       LogID = Intrinsic::log2;
2553       ExpLb = LibFunc_exp;
2554       Exp2Lb = LibFunc_exp2;
2555       Exp10Lb = LibFunc_exp10;
2556       PowLb = LibFunc_pow;
2557       break;
2558     case LibFunc_log2l:
2559       LogID = Intrinsic::log2;
2560       ExpLb = LibFunc_expl;
2561       Exp2Lb = LibFunc_exp2l;
2562       Exp10Lb = LibFunc_exp10l;
2563       PowLb = LibFunc_powl;
2564       break;
2565     case LibFunc_log10f:
2566       LogID = Intrinsic::log10;
2567       ExpLb = LibFunc_expf;
2568       Exp2Lb = LibFunc_exp2f;
2569       Exp10Lb = LibFunc_exp10f;
2570       PowLb = LibFunc_powf;
2571       break;
2572     case LibFunc_log10:
2573       LogID = Intrinsic::log10;
2574       ExpLb = LibFunc_exp;
2575       Exp2Lb = LibFunc_exp2;
2576       Exp10Lb = LibFunc_exp10;
2577       PowLb = LibFunc_pow;
2578       break;
2579     case LibFunc_log10l:
2580       LogID = Intrinsic::log10;
2581       ExpLb = LibFunc_expl;
2582       Exp2Lb = LibFunc_exp2l;
2583       Exp10Lb = LibFunc_exp10l;
2584       PowLb = LibFunc_powl;
2585       break;
2586     default:
2587       return nullptr;
2588     }
2589 
2590     // Convert libcall to intrinsic if the value is known > 0.
2591     bool IsKnownNoErrno = Log->hasNoNaNs() && Log->hasNoInfs();
2592     if (!IsKnownNoErrno) {
2593       SimplifyQuery SQ(DL, TLI, DT, AC, Log, true, true, DC);
2594       KnownFPClass Known = computeKnownFPClass(
2595           Log->getOperand(0),
2596           KnownFPClass::OrderedLessThanZeroMask | fcSubnormal,
2597           /*Depth=*/0, SQ);
2598       Function *F = Log->getParent()->getParent();
2599       IsKnownNoErrno = Known.cannotBeOrderedLessThanZero() &&
2600                        Known.isKnownNeverLogicalZero(*F, Ty);
2601     }
2602     if (IsKnownNoErrno) {
2603       auto *NewLog = B.CreateUnaryIntrinsic(LogID, Log->getArgOperand(0), Log);
2604       NewLog->copyMetadata(*Log);
2605       return copyFlags(*Log, NewLog);
2606     }
2607   } else if (LogID == Intrinsic::log || LogID == Intrinsic::log2 ||
2608              LogID == Intrinsic::log10) {
2609     if (Ty->getScalarType()->isFloatTy()) {
2610       ExpLb = LibFunc_expf;
2611       Exp2Lb = LibFunc_exp2f;
2612       Exp10Lb = LibFunc_exp10f;
2613       PowLb = LibFunc_powf;
2614     } else if (Ty->getScalarType()->isDoubleTy()) {
2615       ExpLb = LibFunc_exp;
2616       Exp2Lb = LibFunc_exp2;
2617       Exp10Lb = LibFunc_exp10;
2618       PowLb = LibFunc_pow;
2619     } else
2620       return nullptr;
2621   } else
2622     return nullptr;
2623 
2624   // The earlier call must also be 'fast' in order to do these transforms.
2625   CallInst *Arg = dyn_cast<CallInst>(Log->getArgOperand(0));
2626   if (!Log->isFast() || !Arg || !Arg->isFast() || !Arg->hasOneUse())
2627     return nullptr;
2628 
2629   IRBuilderBase::FastMathFlagGuard Guard(B);
2630   B.setFastMathFlags(FastMathFlags::getFast());
2631 
2632   Intrinsic::ID ArgID = Arg->getIntrinsicID();
2633   LibFunc ArgLb = NotLibFunc;
2634   TLI->getLibFunc(*Arg, ArgLb);
2635 
2636   // log(pow(x,y)) -> y*log(x)
2637   AttributeList NoAttrs;
2638   if (ArgLb == PowLb || ArgID == Intrinsic::pow || ArgID == Intrinsic::powi) {
2639     Value *LogX =
2640         Log->doesNotAccessMemory()
2641             ? B.CreateUnaryIntrinsic(LogID, Arg->getOperand(0), nullptr, "log")
2642             : emitUnaryFloatFnCall(Arg->getOperand(0), TLI, LogNm, B, NoAttrs);
2643     Value *Y = Arg->getArgOperand(1);
2644     // Cast exponent to FP if integer.
2645     if (ArgID == Intrinsic::powi)
2646       Y = B.CreateSIToFP(Y, Ty, "cast");
2647     Value *MulY = B.CreateFMul(Y, LogX, "mul");
2648     // Since pow() may have side effects, e.g. errno,
2649     // dead code elimination may not be trusted to remove it.
2650     substituteInParent(Arg, MulY);
2651     return MulY;
2652   }
2653 
2654   // log(exp{,2,10}(y)) -> y*log({e,2,10})
2655   // TODO: There is no exp10() intrinsic yet.
2656   if (ArgLb == ExpLb || ArgLb == Exp2Lb || ArgLb == Exp10Lb ||
2657            ArgID == Intrinsic::exp || ArgID == Intrinsic::exp2) {
2658     Constant *Eul;
2659     if (ArgLb == ExpLb || ArgID == Intrinsic::exp)
2660       // FIXME: Add more precise value of e for long double.
2661       Eul = ConstantFP::get(Log->getType(), numbers::e);
2662     else if (ArgLb == Exp2Lb || ArgID == Intrinsic::exp2)
2663       Eul = ConstantFP::get(Log->getType(), 2.0);
2664     else
2665       Eul = ConstantFP::get(Log->getType(), 10.0);
2666     Value *LogE = Log->doesNotAccessMemory()
2667                       ? B.CreateUnaryIntrinsic(LogID, Eul, nullptr, "log")
2668                       : emitUnaryFloatFnCall(Eul, TLI, LogNm, B, NoAttrs);
2669     Value *MulY = B.CreateFMul(Arg->getArgOperand(0), LogE, "mul");
2670     // Since exp() may have side effects, e.g. errno,
2671     // dead code elimination may not be trusted to remove it.
2672     substituteInParent(Arg, MulY);
2673     return MulY;
2674   }
2675 
2676   return nullptr;
2677 }
2678 
2679 // sqrt(exp(X)) -> exp(X * 0.5)
2680 Value *LibCallSimplifier::mergeSqrtToExp(CallInst *CI, IRBuilderBase &B) {
2681   if (!CI->hasAllowReassoc())
2682     return nullptr;
2683 
2684   Function *SqrtFn = CI->getCalledFunction();
2685   CallInst *Arg = dyn_cast<CallInst>(CI->getArgOperand(0));
2686   if (!Arg || !Arg->hasAllowReassoc() || !Arg->hasOneUse())
2687     return nullptr;
2688   Intrinsic::ID ArgID = Arg->getIntrinsicID();
2689   LibFunc ArgLb = NotLibFunc;
2690   TLI->getLibFunc(*Arg, ArgLb);
2691 
2692   LibFunc SqrtLb, ExpLb, Exp2Lb, Exp10Lb;
2693 
2694   if (TLI->getLibFunc(SqrtFn->getName(), SqrtLb))
2695     switch (SqrtLb) {
2696     case LibFunc_sqrtf:
2697       ExpLb = LibFunc_expf;
2698       Exp2Lb = LibFunc_exp2f;
2699       Exp10Lb = LibFunc_exp10f;
2700       break;
2701     case LibFunc_sqrt:
2702       ExpLb = LibFunc_exp;
2703       Exp2Lb = LibFunc_exp2;
2704       Exp10Lb = LibFunc_exp10;
2705       break;
2706     case LibFunc_sqrtl:
2707       ExpLb = LibFunc_expl;
2708       Exp2Lb = LibFunc_exp2l;
2709       Exp10Lb = LibFunc_exp10l;
2710       break;
2711     default:
2712       return nullptr;
2713     }
2714   else if (SqrtFn->getIntrinsicID() == Intrinsic::sqrt) {
2715     if (CI->getType()->getScalarType()->isFloatTy()) {
2716       ExpLb = LibFunc_expf;
2717       Exp2Lb = LibFunc_exp2f;
2718       Exp10Lb = LibFunc_exp10f;
2719     } else if (CI->getType()->getScalarType()->isDoubleTy()) {
2720       ExpLb = LibFunc_exp;
2721       Exp2Lb = LibFunc_exp2;
2722       Exp10Lb = LibFunc_exp10;
2723     } else
2724       return nullptr;
2725   } else
2726     return nullptr;
2727 
2728   if (ArgLb != ExpLb && ArgLb != Exp2Lb && ArgLb != Exp10Lb &&
2729       ArgID != Intrinsic::exp && ArgID != Intrinsic::exp2)
2730     return nullptr;
2731 
2732   IRBuilderBase::InsertPointGuard Guard(B);
2733   B.SetInsertPoint(Arg);
2734   auto *ExpOperand = Arg->getOperand(0);
2735   auto *FMul =
2736       B.CreateFMulFMF(ExpOperand, ConstantFP::get(ExpOperand->getType(), 0.5),
2737                       CI, "merged.sqrt");
2738 
2739   Arg->setOperand(0, FMul);
2740   return Arg;
2741 }
2742 
2743 Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
2744   Module *M = CI->getModule();
2745   Function *Callee = CI->getCalledFunction();
2746   Value *Ret = nullptr;
2747   // TODO: Once we have a way (other than checking for the existince of the
2748   // libcall) to tell whether our target can lower @llvm.sqrt, relax the
2749   // condition below.
2750   if (isLibFuncEmittable(M, TLI, LibFunc_sqrtf) &&
2751       (Callee->getName() == "sqrt" ||
2752        Callee->getIntrinsicID() == Intrinsic::sqrt))
2753     Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
2754 
2755   if (Value *Opt = mergeSqrtToExp(CI, B))
2756     return Opt;
2757 
2758   if (!CI->isFast())
2759     return Ret;
2760 
2761   Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0));
2762   if (!I || I->getOpcode() != Instruction::FMul || !I->isFast())
2763     return Ret;
2764 
2765   // We're looking for a repeated factor in a multiplication tree,
2766   // so we can do this fold: sqrt(x * x) -> fabs(x);
2767   // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y).
2768   Value *Op0 = I->getOperand(0);
2769   Value *Op1 = I->getOperand(1);
2770   Value *RepeatOp = nullptr;
2771   Value *OtherOp = nullptr;
2772   if (Op0 == Op1) {
2773     // Simple match: the operands of the multiply are identical.
2774     RepeatOp = Op0;
2775   } else {
2776     // Look for a more complicated pattern: one of the operands is itself
2777     // a multiply, so search for a common factor in that multiply.
2778     // Note: We don't bother looking any deeper than this first level or for
2779     // variations of this pattern because instcombine's visitFMUL and/or the
2780     // reassociation pass should give us this form.
2781     Value *MulOp;
2782     if (match(Op0, m_FMul(m_Value(MulOp), m_Deferred(MulOp))) &&
2783         cast<Instruction>(Op0)->isFast()) {
2784       // Pattern: sqrt((x * x) * z)
2785       RepeatOp = MulOp;
2786       OtherOp = Op1;
2787     } else if (match(Op1, m_FMul(m_Value(MulOp), m_Deferred(MulOp))) &&
2788                cast<Instruction>(Op1)->isFast()) {
2789       // Pattern: sqrt(z * (x * x))
2790       RepeatOp = MulOp;
2791       OtherOp = Op0;
2792     }
2793   }
2794   if (!RepeatOp)
2795     return Ret;
2796 
2797   // Fast math flags for any created instructions should match the sqrt
2798   // and multiply.
2799   IRBuilderBase::FastMathFlagGuard Guard(B);
2800   B.setFastMathFlags(I->getFastMathFlags());
2801 
2802   // If we found a repeated factor, hoist it out of the square root and
2803   // replace it with the fabs of that factor.
2804   Value *FabsCall =
2805       B.CreateUnaryIntrinsic(Intrinsic::fabs, RepeatOp, nullptr, "fabs");
2806   if (OtherOp) {
2807     // If we found a non-repeated factor, we still need to get its square
2808     // root. We then multiply that by the value that was simplified out
2809     // of the square root calculation.
2810     Value *SqrtCall =
2811         B.CreateUnaryIntrinsic(Intrinsic::sqrt, OtherOp, nullptr, "sqrt");
2812     return copyFlags(*CI, B.CreateFMul(FabsCall, SqrtCall));
2813   }
2814   return copyFlags(*CI, FabsCall);
2815 }
2816 
2817 Value *LibCallSimplifier::optimizeFMod(CallInst *CI, IRBuilderBase &B) {
2818 
2819   // fmod(x,y) can set errno if y == 0 or x == +/-inf, and returns Nan in those
2820   // case. If we know those do not happen, then we can convert the fmod into
2821   // frem.
2822   bool IsNoNan = CI->hasNoNaNs();
2823   if (!IsNoNan) {
2824     SimplifyQuery SQ(DL, TLI, DT, AC, CI, true, true, DC);
2825     KnownFPClass Known0 = computeKnownFPClass(CI->getOperand(0), fcInf,
2826                                               /*Depth=*/0, SQ);
2827     if (Known0.isKnownNeverInfinity()) {
2828       KnownFPClass Known1 =
2829           computeKnownFPClass(CI->getOperand(1), fcZero | fcSubnormal,
2830                               /*Depth=*/0, SQ);
2831       Function *F = CI->getParent()->getParent();
2832       IsNoNan = Known1.isKnownNeverLogicalZero(*F, CI->getType());
2833     }
2834   }
2835 
2836   if (IsNoNan) {
2837     Value *FRem = B.CreateFRemFMF(CI->getOperand(0), CI->getOperand(1), CI);
2838     if (auto *FRemI = dyn_cast<Instruction>(FRem))
2839       FRemI->setHasNoNaNs(true);
2840     return FRem;
2841   }
2842   return nullptr;
2843 }
2844 
2845 Value *LibCallSimplifier::optimizeTrigInversionPairs(CallInst *CI,
2846                                                      IRBuilderBase &B) {
2847   Module *M = CI->getModule();
2848   Function *Callee = CI->getCalledFunction();
2849   Value *Ret = nullptr;
2850   StringRef Name = Callee->getName();
2851   if (UnsafeFPShrink &&
2852       (Name == "tan" || Name == "atanh" || Name == "sinh" || Name == "cosh" ||
2853        Name == "asinh") &&
2854       hasFloatVersion(M, Name))
2855     Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
2856 
2857   Value *Op1 = CI->getArgOperand(0);
2858   auto *OpC = dyn_cast<CallInst>(Op1);
2859   if (!OpC)
2860     return Ret;
2861 
2862   // Both calls must be 'fast' in order to remove them.
2863   if (!CI->isFast() || !OpC->isFast())
2864     return Ret;
2865 
2866   // tan(atan(x)) -> x
2867   // atanh(tanh(x)) -> x
2868   // sinh(asinh(x)) -> x
2869   // asinh(sinh(x)) -> x
2870   // cosh(acosh(x)) -> x
2871   LibFunc Func;
2872   Function *F = OpC->getCalledFunction();
2873   if (F && TLI->getLibFunc(F->getName(), Func) &&
2874       isLibFuncEmittable(M, TLI, Func)) {
2875     LibFunc inverseFunc = llvm::StringSwitch<LibFunc>(Callee->getName())
2876                               .Case("tan", LibFunc_atan)
2877                               .Case("atanh", LibFunc_tanh)
2878                               .Case("sinh", LibFunc_asinh)
2879                               .Case("cosh", LibFunc_acosh)
2880                               .Case("tanf", LibFunc_atanf)
2881                               .Case("atanhf", LibFunc_tanhf)
2882                               .Case("sinhf", LibFunc_asinhf)
2883                               .Case("coshf", LibFunc_acoshf)
2884                               .Case("tanl", LibFunc_atanl)
2885                               .Case("atanhl", LibFunc_tanhl)
2886                               .Case("sinhl", LibFunc_asinhl)
2887                               .Case("coshl", LibFunc_acoshl)
2888                               .Case("asinh", LibFunc_sinh)
2889                               .Case("asinhf", LibFunc_sinhf)
2890                               .Case("asinhl", LibFunc_sinhl)
2891                               .Default(NumLibFuncs); // Used as error value
2892     if (Func == inverseFunc)
2893       Ret = OpC->getArgOperand(0);
2894   }
2895   return Ret;
2896 }
2897 
2898 static bool isTrigLibCall(CallInst *CI) {
2899   // We can only hope to do anything useful if we can ignore things like errno
2900   // and floating-point exceptions.
2901   // We already checked the prototype.
2902   return CI->doesNotThrow() && CI->doesNotAccessMemory();
2903 }
2904 
2905 static bool insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg,
2906                              bool UseFloat, Value *&Sin, Value *&Cos,
2907                              Value *&SinCos, const TargetLibraryInfo *TLI) {
2908   Module *M = OrigCallee->getParent();
2909   Type *ArgTy = Arg->getType();
2910   Type *ResTy;
2911   StringRef Name;
2912 
2913   Triple T(OrigCallee->getParent()->getTargetTriple());
2914   if (UseFloat) {
2915     Name = "__sincospif_stret";
2916 
2917     assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
2918     // x86_64 can't use {float, float} since that would be returned in both
2919     // xmm0 and xmm1, which isn't what a real struct would do.
2920     ResTy = T.getArch() == Triple::x86_64
2921                 ? static_cast<Type *>(FixedVectorType::get(ArgTy, 2))
2922                 : static_cast<Type *>(StructType::get(ArgTy, ArgTy));
2923   } else {
2924     Name = "__sincospi_stret";
2925     ResTy = StructType::get(ArgTy, ArgTy);
2926   }
2927 
2928   if (!isLibFuncEmittable(M, TLI, Name))
2929     return false;
2930   LibFunc TheLibFunc;
2931   TLI->getLibFunc(Name, TheLibFunc);
2932   FunctionCallee Callee = getOrInsertLibFunc(
2933       M, *TLI, TheLibFunc, OrigCallee->getAttributes(), ResTy, ArgTy);
2934 
2935   if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
2936     // If the argument is an instruction, it must dominate all uses so put our
2937     // sincos call there.
2938     B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
2939   } else {
2940     // Otherwise (e.g. for a constant) the beginning of the function is as
2941     // good a place as any.
2942     BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
2943     B.SetInsertPoint(&EntryBB, EntryBB.begin());
2944   }
2945 
2946   SinCos = B.CreateCall(Callee, Arg, "sincospi");
2947 
2948   if (SinCos->getType()->isStructTy()) {
2949     Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
2950     Cos = B.CreateExtractValue(SinCos, 1, "cospi");
2951   } else {
2952     Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
2953                                  "sinpi");
2954     Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
2955                                  "cospi");
2956   }
2957 
2958   return true;
2959 }
2960 
2961 static Value *optimizeSymmetricCall(CallInst *CI, bool IsEven,
2962                                     IRBuilderBase &B) {
2963   Value *X;
2964   Value *Src = CI->getArgOperand(0);
2965 
2966   if (match(Src, m_OneUse(m_FNeg(m_Value(X))))) {
2967     IRBuilderBase::FastMathFlagGuard Guard(B);
2968     B.setFastMathFlags(CI->getFastMathFlags());
2969 
2970     auto *CallInst = copyFlags(*CI, B.CreateCall(CI->getCalledFunction(), {X}));
2971     if (IsEven) {
2972       // Even function: f(-x) = f(x)
2973       return CallInst;
2974     }
2975     // Odd function: f(-x) = -f(x)
2976     return B.CreateFNeg(CallInst);
2977   }
2978 
2979   // Even function: f(abs(x)) = f(x), f(copysign(x, y)) = f(x)
2980   if (IsEven && (match(Src, m_FAbs(m_Value(X))) ||
2981                  match(Src, m_CopySign(m_Value(X), m_Value())))) {
2982     IRBuilderBase::FastMathFlagGuard Guard(B);
2983     B.setFastMathFlags(CI->getFastMathFlags());
2984 
2985     auto *CallInst = copyFlags(*CI, B.CreateCall(CI->getCalledFunction(), {X}));
2986     return CallInst;
2987   }
2988 
2989   return nullptr;
2990 }
2991 
2992 Value *LibCallSimplifier::optimizeSymmetric(CallInst *CI, LibFunc Func,
2993                                             IRBuilderBase &B) {
2994   switch (Func) {
2995   case LibFunc_cos:
2996   case LibFunc_cosf:
2997   case LibFunc_cosl:
2998     return optimizeSymmetricCall(CI, /*IsEven*/ true, B);
2999 
3000   case LibFunc_sin:
3001   case LibFunc_sinf:
3002   case LibFunc_sinl:
3003 
3004   case LibFunc_tan:
3005   case LibFunc_tanf:
3006   case LibFunc_tanl:
3007 
3008   case LibFunc_erf:
3009   case LibFunc_erff:
3010   case LibFunc_erfl:
3011     return optimizeSymmetricCall(CI, /*IsEven*/ false, B);
3012 
3013   default:
3014     return nullptr;
3015   }
3016 }
3017 
3018 Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBase &B) {
3019   // Make sure the prototype is as expected, otherwise the rest of the
3020   // function is probably invalid and likely to abort.
3021   if (!isTrigLibCall(CI))
3022     return nullptr;
3023 
3024   Value *Arg = CI->getArgOperand(0);
3025   SmallVector<CallInst *, 1> SinCalls;
3026   SmallVector<CallInst *, 1> CosCalls;
3027   SmallVector<CallInst *, 1> SinCosCalls;
3028 
3029   bool IsFloat = Arg->getType()->isFloatTy();
3030 
3031   // Look for all compatible sinpi, cospi and sincospi calls with the same
3032   // argument. If there are enough (in some sense) we can make the
3033   // substitution.
3034   Function *F = CI->getFunction();
3035   for (User *U : Arg->users())
3036     classifyArgUse(U, F, IsFloat, SinCalls, CosCalls, SinCosCalls);
3037 
3038   // It's only worthwhile if both sinpi and cospi are actually used.
3039   if (SinCalls.empty() || CosCalls.empty())
3040     return nullptr;
3041 
3042   Value *Sin, *Cos, *SinCos;
3043   if (!insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
3044                         SinCos, TLI))
3045     return nullptr;
3046 
3047   auto replaceTrigInsts = [this](SmallVectorImpl<CallInst *> &Calls,
3048                                  Value *Res) {
3049     for (CallInst *C : Calls)
3050       replaceAllUsesWith(C, Res);
3051   };
3052 
3053   replaceTrigInsts(SinCalls, Sin);
3054   replaceTrigInsts(CosCalls, Cos);
3055   replaceTrigInsts(SinCosCalls, SinCos);
3056 
3057   return IsSin ? Sin : Cos;
3058 }
3059 
3060 void LibCallSimplifier::classifyArgUse(
3061     Value *Val, Function *F, bool IsFloat,
3062     SmallVectorImpl<CallInst *> &SinCalls,
3063     SmallVectorImpl<CallInst *> &CosCalls,
3064     SmallVectorImpl<CallInst *> &SinCosCalls) {
3065   auto *CI = dyn_cast<CallInst>(Val);
3066   if (!CI || CI->use_empty())
3067     return;
3068 
3069   // Don't consider calls in other functions.
3070   if (CI->getFunction() != F)
3071     return;
3072 
3073   Module *M = CI->getModule();
3074   Function *Callee = CI->getCalledFunction();
3075   LibFunc Func;
3076   if (!Callee || !TLI->getLibFunc(*Callee, Func) ||
3077       !isLibFuncEmittable(M, TLI, Func) ||
3078       !isTrigLibCall(CI))
3079     return;
3080 
3081   if (IsFloat) {
3082     if (Func == LibFunc_sinpif)
3083       SinCalls.push_back(CI);
3084     else if (Func == LibFunc_cospif)
3085       CosCalls.push_back(CI);
3086     else if (Func == LibFunc_sincospif_stret)
3087       SinCosCalls.push_back(CI);
3088   } else {
3089     if (Func == LibFunc_sinpi)
3090       SinCalls.push_back(CI);
3091     else if (Func == LibFunc_cospi)
3092       CosCalls.push_back(CI);
3093     else if (Func == LibFunc_sincospi_stret)
3094       SinCosCalls.push_back(CI);
3095   }
3096 }
3097 
3098 /// Constant folds remquo
3099 Value *LibCallSimplifier::optimizeRemquo(CallInst *CI, IRBuilderBase &B) {
3100   const APFloat *X, *Y;
3101   if (!match(CI->getArgOperand(0), m_APFloat(X)) ||
3102       !match(CI->getArgOperand(1), m_APFloat(Y)))
3103     return nullptr;
3104 
3105   APFloat::opStatus Status;
3106   APFloat Quot = *X;
3107   Status = Quot.divide(*Y, APFloat::rmNearestTiesToEven);
3108   if (Status != APFloat::opOK && Status != APFloat::opInexact)
3109     return nullptr;
3110   APFloat Rem = *X;
3111   if (Rem.remainder(*Y) != APFloat::opOK)
3112     return nullptr;
3113 
3114   // TODO: We can only keep at least the three of the last bits of x/y
3115   unsigned IntBW = TLI->getIntSize();
3116   APSInt QuotInt(IntBW, /*isUnsigned=*/false);
3117   bool IsExact;
3118   Status =
3119       Quot.convertToInteger(QuotInt, APFloat::rmNearestTiesToEven, &IsExact);
3120   if (Status != APFloat::opOK && Status != APFloat::opInexact)
3121     return nullptr;
3122 
3123   B.CreateAlignedStore(
3124       ConstantInt::get(B.getIntNTy(IntBW), QuotInt.getExtValue()),
3125       CI->getArgOperand(2), CI->getParamAlign(2));
3126   return ConstantFP::get(CI->getType(), Rem);
3127 }
3128 
3129 /// Constant folds fdim
3130 Value *LibCallSimplifier::optimizeFdim(CallInst *CI, IRBuilderBase &B) {
3131   // Cannot perform the fold unless the call has attribute memory(none)
3132   if (!CI->doesNotAccessMemory())
3133     return nullptr;
3134 
3135   // TODO : Handle undef values
3136   // Propagate poison if any
3137   if (isa<PoisonValue>(CI->getArgOperand(0)))
3138     return CI->getArgOperand(0);
3139   if (isa<PoisonValue>(CI->getArgOperand(1)))
3140     return CI->getArgOperand(1);
3141 
3142   const APFloat *X, *Y;
3143   // Check if both values are constants
3144   if (!match(CI->getArgOperand(0), m_APFloat(X)) ||
3145       !match(CI->getArgOperand(1), m_APFloat(Y)))
3146     return nullptr;
3147 
3148   APFloat Difference = *X;
3149   Difference.subtract(*Y, RoundingMode::NearestTiesToEven);
3150 
3151   APFloat MaxVal =
3152       maximum(Difference, APFloat::getZero(CI->getType()->getFltSemantics()));
3153   return ConstantFP::get(CI->getType(), MaxVal);
3154 }
3155 
3156 //===----------------------------------------------------------------------===//
3157 // Integer Library Call Optimizations
3158 //===----------------------------------------------------------------------===//
3159 
3160 Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilderBase &B) {
3161   // All variants of ffs return int which need not be 32 bits wide.
3162   // ffs{,l,ll}(x) -> x != 0 ? (int)llvm.cttz(x)+1 : 0
3163   Type *RetType = CI->getType();
3164   Value *Op = CI->getArgOperand(0);
3165   Type *ArgType = Op->getType();
3166   Value *V = B.CreateIntrinsic(Intrinsic::cttz, {ArgType}, {Op, B.getTrue()},
3167                                nullptr, "cttz");
3168   V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
3169   V = B.CreateIntCast(V, RetType, false);
3170 
3171   Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
3172   return B.CreateSelect(Cond, V, ConstantInt::get(RetType, 0));
3173 }
3174 
3175 Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilderBase &B) {
3176   // All variants of fls return int which need not be 32 bits wide.
3177   // fls{,l,ll}(x) -> (int)(sizeInBits(x) - llvm.ctlz(x, false))
3178   Value *Op = CI->getArgOperand(0);
3179   Type *ArgType = Op->getType();
3180   Value *V = B.CreateIntrinsic(Intrinsic::ctlz, {ArgType}, {Op, B.getFalse()},
3181                                nullptr, "ctlz");
3182   V = B.CreateSub(ConstantInt::get(V->getType(), ArgType->getIntegerBitWidth()),
3183                   V);
3184   return B.CreateIntCast(V, CI->getType(), false);
3185 }
3186 
3187 Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilderBase &B) {
3188   // abs(x) -> x <s 0 ? -x : x
3189   // The negation has 'nsw' because abs of INT_MIN is undefined.
3190   Value *X = CI->getArgOperand(0);
3191   Value *IsNeg = B.CreateIsNeg(X);
3192   Value *NegX = B.CreateNSWNeg(X, "neg");
3193   return B.CreateSelect(IsNeg, NegX, X);
3194 }
3195 
3196 Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilderBase &B) {
3197   // isdigit(c) -> (c-'0') <u 10
3198   Value *Op = CI->getArgOperand(0);
3199   Type *ArgType = Op->getType();
3200   Op = B.CreateSub(Op, ConstantInt::get(ArgType, '0'), "isdigittmp");
3201   Op = B.CreateICmpULT(Op, ConstantInt::get(ArgType, 10), "isdigit");
3202   return B.CreateZExt(Op, CI->getType());
3203 }
3204 
3205 Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilderBase &B) {
3206   // isascii(c) -> c <u 128
3207   Value *Op = CI->getArgOperand(0);
3208   Type *ArgType = Op->getType();
3209   Op = B.CreateICmpULT(Op, ConstantInt::get(ArgType, 128), "isascii");
3210   return B.CreateZExt(Op, CI->getType());
3211 }
3212 
3213 Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilderBase &B) {
3214   // toascii(c) -> c & 0x7f
3215   return B.CreateAnd(CI->getArgOperand(0),
3216                      ConstantInt::get(CI->getType(), 0x7F));
3217 }
3218 
3219 // Fold calls to atoi, atol, and atoll.
3220 Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilderBase &B) {
3221   CI->addParamAttr(0, Attribute::NoCapture);
3222 
3223   StringRef Str;
3224   if (!getConstantStringInfo(CI->getArgOperand(0), Str))
3225     return nullptr;
3226 
3227   return convertStrToInt(CI, Str, nullptr, 10, /*AsSigned=*/true, B);
3228 }
3229 
3230 // Fold calls to strtol, strtoll, strtoul, and strtoull.
3231 Value *LibCallSimplifier::optimizeStrToInt(CallInst *CI, IRBuilderBase &B,
3232                                            bool AsSigned) {
3233   Value *EndPtr = CI->getArgOperand(1);
3234   if (isa<ConstantPointerNull>(EndPtr)) {
3235     // With a null EndPtr, this function won't capture the main argument.
3236     // It would be readonly too, except that it still may write to errno.
3237     CI->addParamAttr(0, Attribute::NoCapture);
3238     EndPtr = nullptr;
3239   } else if (!isKnownNonZero(EndPtr, DL))
3240     return nullptr;
3241 
3242   StringRef Str;
3243   if (!getConstantStringInfo(CI->getArgOperand(0), Str))
3244     return nullptr;
3245 
3246   if (ConstantInt *CInt = dyn_cast<ConstantInt>(CI->getArgOperand(2))) {
3247     return convertStrToInt(CI, Str, EndPtr, CInt->getSExtValue(), AsSigned, B);
3248   }
3249 
3250   return nullptr;
3251 }
3252 
3253 //===----------------------------------------------------------------------===//
3254 // Formatting and IO Library Call Optimizations
3255 //===----------------------------------------------------------------------===//
3256 
3257 static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg);
3258 
3259 Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilderBase &B,
3260                                                  int StreamArg) {
3261   Function *Callee = CI->getCalledFunction();
3262   // Error reporting calls should be cold, mark them as such.
3263   // This applies even to non-builtin calls: it is only a hint and applies to
3264   // functions that the frontend might not understand as builtins.
3265 
3266   // This heuristic was suggested in:
3267   // Improving Static Branch Prediction in a Compiler
3268   // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
3269   // Proceedings of PACT'98, Oct. 1998, IEEE
3270   if (!CI->hasFnAttr(Attribute::Cold) &&
3271       isReportingError(Callee, CI, StreamArg)) {
3272     CI->addFnAttr(Attribute::Cold);
3273   }
3274 
3275   return nullptr;
3276 }
3277 
3278 static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
3279   if (!Callee || !Callee->isDeclaration())
3280     return false;
3281 
3282   if (StreamArg < 0)
3283     return true;
3284 
3285   // These functions might be considered cold, but only if their stream
3286   // argument is stderr.
3287 
3288   if (StreamArg >= (int)CI->arg_size())
3289     return false;
3290   LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg));
3291   if (!LI)
3292     return false;
3293   GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand());
3294   if (!GV || !GV->isDeclaration())
3295     return false;
3296   return GV->getName() == "stderr";
3297 }
3298 
3299 Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
3300   // Check for a fixed format string.
3301   StringRef FormatStr;
3302   if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
3303     return nullptr;
3304 
3305   // Empty format string -> noop.
3306   if (FormatStr.empty()) // Tolerate printf's declared void.
3307     return CI->use_empty() ? (Value *)CI : ConstantInt::get(CI->getType(), 0);
3308 
3309   // Do not do any of the following transformations if the printf return value
3310   // is used, in general the printf return value is not compatible with either
3311   // putchar() or puts().
3312   if (!CI->use_empty())
3313     return nullptr;
3314 
3315   Type *IntTy = CI->getType();
3316   // printf("x") -> putchar('x'), even for "%" and "%%".
3317   if (FormatStr.size() == 1 || FormatStr == "%%") {
3318     // Convert the character to unsigned char before passing it to putchar
3319     // to avoid host-specific sign extension in the IR.  Putchar converts
3320     // it to unsigned char regardless.
3321     Value *IntChar = ConstantInt::get(IntTy, (unsigned char)FormatStr[0]);
3322     return copyFlags(*CI, emitPutChar(IntChar, B, TLI));
3323   }
3324 
3325   // Try to remove call or emit putchar/puts.
3326   if (FormatStr == "%s" && CI->arg_size() > 1) {
3327     StringRef OperandStr;
3328     if (!getConstantStringInfo(CI->getOperand(1), OperandStr))
3329       return nullptr;
3330     // printf("%s", "") --> NOP
3331     if (OperandStr.empty())
3332       return (Value *)CI;
3333     // printf("%s", "a") --> putchar('a')
3334     if (OperandStr.size() == 1) {
3335       // Convert the character to unsigned char before passing it to putchar
3336       // to avoid host-specific sign extension in the IR.  Putchar converts
3337       // it to unsigned char regardless.
3338       Value *IntChar = ConstantInt::get(IntTy, (unsigned char)OperandStr[0]);
3339       return copyFlags(*CI, emitPutChar(IntChar, B, TLI));
3340     }
3341     // printf("%s", str"\n") --> puts(str)
3342     if (OperandStr.back() == '\n') {
3343       OperandStr = OperandStr.drop_back();
3344       Value *GV = B.CreateGlobalString(OperandStr, "str");
3345       return copyFlags(*CI, emitPutS(GV, B, TLI));
3346     }
3347     return nullptr;
3348   }
3349 
3350   // printf("foo\n") --> puts("foo")
3351   if (FormatStr.back() == '\n' &&
3352       !FormatStr.contains('%')) { // No format characters.
3353     // Create a string literal with no \n on it.  We expect the constant merge
3354     // pass to be run after this pass, to merge duplicate strings.
3355     FormatStr = FormatStr.drop_back();
3356     Value *GV = B.CreateGlobalString(FormatStr, "str");
3357     return copyFlags(*CI, emitPutS(GV, B, TLI));
3358   }
3359 
3360   // Optimize specific format strings.
3361   // printf("%c", chr) --> putchar(chr)
3362   if (FormatStr == "%c" && CI->arg_size() > 1 &&
3363       CI->getArgOperand(1)->getType()->isIntegerTy()) {
3364     // Convert the argument to the type expected by putchar, i.e., int, which
3365     // need not be 32 bits wide but which is the same as printf's return type.
3366     Value *IntChar = B.CreateIntCast(CI->getArgOperand(1), IntTy, false);
3367     return copyFlags(*CI, emitPutChar(IntChar, B, TLI));
3368   }
3369 
3370   // printf("%s\n", str) --> puts(str)
3371   if (FormatStr == "%s\n" && CI->arg_size() > 1 &&
3372       CI->getArgOperand(1)->getType()->isPointerTy())
3373     return copyFlags(*CI, emitPutS(CI->getArgOperand(1), B, TLI));
3374   return nullptr;
3375 }
3376 
3377 Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) {
3378 
3379   Module *M = CI->getModule();
3380   Function *Callee = CI->getCalledFunction();
3381   FunctionType *FT = Callee->getFunctionType();
3382   if (Value *V = optimizePrintFString(CI, B)) {
3383     return V;
3384   }
3385 
3386   annotateNonNullNoUndefBasedOnAccess(CI, 0);
3387 
3388   // printf(format, ...) -> iprintf(format, ...) if no floating point
3389   // arguments.
3390   if (isLibFuncEmittable(M, TLI, LibFunc_iprintf) &&
3391       !callHasFloatingPointArgument(CI)) {
3392     FunctionCallee IPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_iprintf, FT,
3393                                                   Callee->getAttributes());
3394     CallInst *New = cast<CallInst>(CI->clone());
3395     New->setCalledFunction(IPrintFFn);
3396     B.Insert(New);
3397     return New;
3398   }
3399 
3400   // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point
3401   // arguments.
3402   if (isLibFuncEmittable(M, TLI, LibFunc_small_printf) &&
3403       !callHasFP128Argument(CI)) {
3404     auto SmallPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_small_printf, FT,
3405                                             Callee->getAttributes());
3406     CallInst *New = cast<CallInst>(CI->clone());
3407     New->setCalledFunction(SmallPrintFFn);
3408     B.Insert(New);
3409     return New;
3410   }
3411 
3412   return nullptr;
3413 }
3414 
3415 Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
3416                                                 IRBuilderBase &B) {
3417   // Check for a fixed format string.
3418   StringRef FormatStr;
3419   if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
3420     return nullptr;
3421 
3422   // If we just have a format string (nothing else crazy) transform it.
3423   Value *Dest = CI->getArgOperand(0);
3424   if (CI->arg_size() == 2) {
3425     // Make sure there's no % in the constant array.  We could try to handle
3426     // %% -> % in the future if we cared.
3427     if (FormatStr.contains('%'))
3428       return nullptr; // we found a format specifier, bail out.
3429 
3430     // sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1)
3431     B.CreateMemCpy(
3432         Dest, Align(1), CI->getArgOperand(1), Align(1),
3433         ConstantInt::get(DL.getIntPtrType(CI->getContext()),
3434                          FormatStr.size() + 1)); // Copy the null byte.
3435     return ConstantInt::get(CI->getType(), FormatStr.size());
3436   }
3437 
3438   // The remaining optimizations require the format string to be "%s" or "%c"
3439   // and have an extra operand.
3440   if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() < 3)
3441     return nullptr;
3442 
3443   // Decode the second character of the format string.
3444   if (FormatStr[1] == 'c') {
3445     // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
3446     if (!CI->getArgOperand(2)->getType()->isIntegerTy())
3447       return nullptr;
3448     Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
3449     Value *Ptr = Dest;
3450     B.CreateStore(V, Ptr);
3451     Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
3452     B.CreateStore(B.getInt8(0), Ptr);
3453 
3454     return ConstantInt::get(CI->getType(), 1);
3455   }
3456 
3457   if (FormatStr[1] == 's') {
3458     // sprintf(dest, "%s", str) -> llvm.memcpy(align 1 dest, align 1 str,
3459     // strlen(str)+1)
3460     if (!CI->getArgOperand(2)->getType()->isPointerTy())
3461       return nullptr;
3462 
3463     if (CI->use_empty())
3464       // sprintf(dest, "%s", str) -> strcpy(dest, str)
3465       return copyFlags(*CI, emitStrCpy(Dest, CI->getArgOperand(2), B, TLI));
3466 
3467     uint64_t SrcLen = GetStringLength(CI->getArgOperand(2));
3468     if (SrcLen) {
3469       B.CreateMemCpy(
3470           Dest, Align(1), CI->getArgOperand(2), Align(1),
3471           ConstantInt::get(DL.getIntPtrType(CI->getContext()), SrcLen));
3472       // Returns total number of characters written without null-character.
3473       return ConstantInt::get(CI->getType(), SrcLen - 1);
3474     } else if (Value *V = emitStpCpy(Dest, CI->getArgOperand(2), B, TLI)) {
3475       // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest
3476       Value *PtrDiff = B.CreatePtrDiff(B.getInt8Ty(), V, Dest);
3477       return B.CreateIntCast(PtrDiff, CI->getType(), false);
3478     }
3479 
3480     bool OptForSize = CI->getFunction()->hasOptSize() ||
3481                       llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
3482                                                   PGSOQueryType::IRPass);
3483     if (OptForSize)
3484       return nullptr;
3485 
3486     Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI);
3487     if (!Len)
3488       return nullptr;
3489     Value *IncLen =
3490         B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc");
3491     B.CreateMemCpy(Dest, Align(1), CI->getArgOperand(2), Align(1), IncLen);
3492 
3493     // The sprintf result is the unincremented number of bytes in the string.
3494     return B.CreateIntCast(Len, CI->getType(), false);
3495   }
3496   return nullptr;
3497 }
3498 
3499 Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) {
3500   Module *M = CI->getModule();
3501   Function *Callee = CI->getCalledFunction();
3502   FunctionType *FT = Callee->getFunctionType();
3503   if (Value *V = optimizeSPrintFString(CI, B)) {
3504     return V;
3505   }
3506 
3507   annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
3508 
3509   // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
3510   // point arguments.
3511   if (isLibFuncEmittable(M, TLI, LibFunc_siprintf) &&
3512       !callHasFloatingPointArgument(CI)) {
3513     FunctionCallee SIPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_siprintf,
3514                                                    FT, Callee->getAttributes());
3515     CallInst *New = cast<CallInst>(CI->clone());
3516     New->setCalledFunction(SIPrintFFn);
3517     B.Insert(New);
3518     return New;
3519   }
3520 
3521   // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit
3522   // floating point arguments.
3523   if (isLibFuncEmittable(M, TLI, LibFunc_small_sprintf) &&
3524       !callHasFP128Argument(CI)) {
3525     auto SmallSPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_small_sprintf, FT,
3526                                              Callee->getAttributes());
3527     CallInst *New = cast<CallInst>(CI->clone());
3528     New->setCalledFunction(SmallSPrintFFn);
3529     B.Insert(New);
3530     return New;
3531   }
3532 
3533   return nullptr;
3534 }
3535 
3536 // Transform an snprintf call CI with the bound N to format the string Str
3537 // either to a call to memcpy, or to single character a store, or to nothing,
3538 // and fold the result to a constant.  A nonnull StrArg refers to the string
3539 // argument being formatted.  Otherwise the call is one with N < 2 and
3540 // the "%c" directive to format a single character.
3541 Value *LibCallSimplifier::emitSnPrintfMemCpy(CallInst *CI, Value *StrArg,
3542                                              StringRef Str, uint64_t N,
3543                                              IRBuilderBase &B) {
3544   assert(StrArg || (N < 2 && Str.size() == 1));
3545 
3546   unsigned IntBits = TLI->getIntSize();
3547   uint64_t IntMax = maxIntN(IntBits);
3548   if (Str.size() > IntMax)
3549     // Bail if the string is longer than INT_MAX.  POSIX requires
3550     // implementations to set errno to EOVERFLOW in this case, in
3551     // addition to when N is larger than that (checked by the caller).
3552     return nullptr;
3553 
3554   Value *StrLen = ConstantInt::get(CI->getType(), Str.size());
3555   if (N == 0)
3556     return StrLen;
3557 
3558   // Set to the number of bytes to copy fron StrArg which is also
3559   // the offset of the terinating nul.
3560   uint64_t NCopy;
3561   if (N > Str.size())
3562     // Copy the full string, including the terminating nul (which must
3563     // be present regardless of the bound).
3564     NCopy = Str.size() + 1;
3565   else
3566     NCopy = N - 1;
3567 
3568   Value *DstArg = CI->getArgOperand(0);
3569   if (NCopy && StrArg)
3570     // Transform the call to lvm.memcpy(dst, fmt, N).
3571     copyFlags(
3572          *CI,
3573           B.CreateMemCpy(
3574                          DstArg, Align(1), StrArg, Align(1),
3575               ConstantInt::get(DL.getIntPtrType(CI->getContext()), NCopy)));
3576 
3577   if (N > Str.size())
3578     // Return early when the whole format string, including the final nul,
3579     // has been copied.
3580     return StrLen;
3581 
3582   // Otherwise, when truncating the string append a terminating nul.
3583   Type *Int8Ty = B.getInt8Ty();
3584   Value *NulOff = B.getIntN(IntBits, NCopy);
3585   Value *DstEnd = B.CreateInBoundsGEP(Int8Ty, DstArg, NulOff, "endptr");
3586   B.CreateStore(ConstantInt::get(Int8Ty, 0), DstEnd);
3587   return StrLen;
3588 }
3589 
3590 Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI,
3591                                                  IRBuilderBase &B) {
3592   // Check for size
3593   ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
3594   if (!Size)
3595     return nullptr;
3596 
3597   uint64_t N = Size->getZExtValue();
3598   uint64_t IntMax = maxIntN(TLI->getIntSize());
3599   if (N > IntMax)
3600     // Bail if the bound exceeds INT_MAX.  POSIX requires implementations
3601     // to set errno to EOVERFLOW in this case.
3602     return nullptr;
3603 
3604   Value *DstArg = CI->getArgOperand(0);
3605   Value *FmtArg = CI->getArgOperand(2);
3606 
3607   // Check for a fixed format string.
3608   StringRef FormatStr;
3609   if (!getConstantStringInfo(FmtArg, FormatStr))
3610     return nullptr;
3611 
3612   // If we just have a format string (nothing else crazy) transform it.
3613   if (CI->arg_size() == 3) {
3614     if (FormatStr.contains('%'))
3615       // Bail if the format string contains a directive and there are
3616       // no arguments.  We could handle "%%" in the future.
3617       return nullptr;
3618 
3619     return emitSnPrintfMemCpy(CI, FmtArg, FormatStr, N, B);
3620   }
3621 
3622   // The remaining optimizations require the format string to be "%s" or "%c"
3623   // and have an extra operand.
3624   if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() != 4)
3625     return nullptr;
3626 
3627   // Decode the second character of the format string.
3628   if (FormatStr[1] == 'c') {
3629     if (N <= 1) {
3630       // Use an arbitary string of length 1 to transform the call into
3631       // either a nul store (N == 1) or a no-op (N == 0) and fold it
3632       // to one.
3633       StringRef CharStr("*");
3634       return emitSnPrintfMemCpy(CI, nullptr, CharStr, N, B);
3635     }
3636 
3637     // snprintf(dst, size, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
3638     if (!CI->getArgOperand(3)->getType()->isIntegerTy())
3639       return nullptr;
3640     Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char");
3641     Value *Ptr = DstArg;
3642     B.CreateStore(V, Ptr);
3643     Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
3644     B.CreateStore(B.getInt8(0), Ptr);
3645     return ConstantInt::get(CI->getType(), 1);
3646   }
3647 
3648   if (FormatStr[1] != 's')
3649     return nullptr;
3650 
3651   Value *StrArg = CI->getArgOperand(3);
3652   // snprintf(dest, size, "%s", str) to llvm.memcpy(dest, str, len+1, 1)
3653   StringRef Str;
3654   if (!getConstantStringInfo(StrArg, Str))
3655     return nullptr;
3656 
3657   return emitSnPrintfMemCpy(CI, StrArg, Str, N, B);
3658 }
3659 
3660 Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilderBase &B) {
3661   if (Value *V = optimizeSnPrintFString(CI, B)) {
3662     return V;
3663   }
3664 
3665   if (isKnownNonZero(CI->getOperand(1), DL))
3666     annotateNonNullNoUndefBasedOnAccess(CI, 0);
3667   return nullptr;
3668 }
3669 
3670 Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI,
3671                                                 IRBuilderBase &B) {
3672   optimizeErrorReporting(CI, B, 0);
3673 
3674   // All the optimizations depend on the format string.
3675   StringRef FormatStr;
3676   if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
3677     return nullptr;
3678 
3679   // Do not do any of the following transformations if the fprintf return
3680   // value is used, in general the fprintf return value is not compatible
3681   // with fwrite(), fputc() or fputs().
3682   if (!CI->use_empty())
3683     return nullptr;
3684 
3685   // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
3686   if (CI->arg_size() == 2) {
3687     // Could handle %% -> % if we cared.
3688     if (FormatStr.contains('%'))
3689       return nullptr; // We found a format specifier.
3690 
3691     unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
3692     Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
3693     return copyFlags(
3694         *CI, emitFWrite(CI->getArgOperand(1),
3695                         ConstantInt::get(SizeTTy, FormatStr.size()),
3696                         CI->getArgOperand(0), B, DL, TLI));
3697   }
3698 
3699   // The remaining optimizations require the format string to be "%s" or "%c"
3700   // and have an extra operand.
3701   if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() < 3)
3702     return nullptr;
3703 
3704   // Decode the second character of the format string.
3705   if (FormatStr[1] == 'c') {
3706     // fprintf(F, "%c", chr) --> fputc((int)chr, F)
3707     if (!CI->getArgOperand(2)->getType()->isIntegerTy())
3708       return nullptr;
3709     Type *IntTy = B.getIntNTy(TLI->getIntSize());
3710     Value *V = B.CreateIntCast(CI->getArgOperand(2), IntTy, /*isSigned*/ true,
3711                                "chari");
3712     return copyFlags(*CI, emitFPutC(V, CI->getArgOperand(0), B, TLI));
3713   }
3714 
3715   if (FormatStr[1] == 's') {
3716     // fprintf(F, "%s", str) --> fputs(str, F)
3717     if (!CI->getArgOperand(2)->getType()->isPointerTy())
3718       return nullptr;
3719     return copyFlags(
3720         *CI, emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI));
3721   }
3722   return nullptr;
3723 }
3724 
3725 Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilderBase &B) {
3726   Module *M = CI->getModule();
3727   Function *Callee = CI->getCalledFunction();
3728   FunctionType *FT = Callee->getFunctionType();
3729   if (Value *V = optimizeFPrintFString(CI, B)) {
3730     return V;
3731   }
3732 
3733   // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
3734   // floating point arguments.
3735   if (isLibFuncEmittable(M, TLI, LibFunc_fiprintf) &&
3736       !callHasFloatingPointArgument(CI)) {
3737     FunctionCallee FIPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_fiprintf,
3738                                                    FT, Callee->getAttributes());
3739     CallInst *New = cast<CallInst>(CI->clone());
3740     New->setCalledFunction(FIPrintFFn);
3741     B.Insert(New);
3742     return New;
3743   }
3744 
3745   // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no
3746   // 128-bit floating point arguments.
3747   if (isLibFuncEmittable(M, TLI, LibFunc_small_fprintf) &&
3748       !callHasFP128Argument(CI)) {
3749     auto SmallFPrintFFn =
3750         getOrInsertLibFunc(M, *TLI, LibFunc_small_fprintf, FT,
3751                            Callee->getAttributes());
3752     CallInst *New = cast<CallInst>(CI->clone());
3753     New->setCalledFunction(SmallFPrintFFn);
3754     B.Insert(New);
3755     return New;
3756   }
3757 
3758   return nullptr;
3759 }
3760 
3761 Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilderBase &B) {
3762   optimizeErrorReporting(CI, B, 3);
3763 
3764   // Get the element size and count.
3765   ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
3766   ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
3767   if (SizeC && CountC) {
3768     uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue();
3769 
3770     // If this is writing zero records, remove the call (it's a noop).
3771     if (Bytes == 0)
3772       return ConstantInt::get(CI->getType(), 0);
3773 
3774     // If this is writing one byte, turn it into fputc.
3775     // This optimisation is only valid, if the return value is unused.
3776     if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
3777       Value *Char = B.CreateLoad(B.getInt8Ty(), CI->getArgOperand(0), "char");
3778       Type *IntTy = B.getIntNTy(TLI->getIntSize());
3779       Value *Cast = B.CreateIntCast(Char, IntTy, /*isSigned*/ true, "chari");
3780       Value *NewCI = emitFPutC(Cast, CI->getArgOperand(3), B, TLI);
3781       return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
3782     }
3783   }
3784 
3785   return nullptr;
3786 }
3787 
3788 Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilderBase &B) {
3789   optimizeErrorReporting(CI, B, 1);
3790 
3791   // Don't rewrite fputs to fwrite when optimising for size because fwrite
3792   // requires more arguments and thus extra MOVs are required.
3793   bool OptForSize = CI->getFunction()->hasOptSize() ||
3794                     llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
3795                                                 PGSOQueryType::IRPass);
3796   if (OptForSize)
3797     return nullptr;
3798 
3799   // We can't optimize if return value is used.
3800   if (!CI->use_empty())
3801     return nullptr;
3802 
3803   // fputs(s,F) --> fwrite(s,strlen(s),1,F)
3804   uint64_t Len = GetStringLength(CI->getArgOperand(0));
3805   if (!Len)
3806     return nullptr;
3807 
3808   // Known to have no uses (see above).
3809   unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
3810   Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
3811   return copyFlags(
3812       *CI,
3813       emitFWrite(CI->getArgOperand(0),
3814                  ConstantInt::get(SizeTTy, Len - 1),
3815                  CI->getArgOperand(1), B, DL, TLI));
3816 }
3817 
3818 Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilderBase &B) {
3819   annotateNonNullNoUndefBasedOnAccess(CI, 0);
3820   if (!CI->use_empty())
3821     return nullptr;
3822 
3823   // Check for a constant string.
3824   // puts("") -> putchar('\n')
3825   StringRef Str;
3826   if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty()) {
3827     // putchar takes an argument of the same type as puts returns, i.e.,
3828     // int, which need not be 32 bits wide.
3829     Type *IntTy = CI->getType();
3830     return copyFlags(*CI, emitPutChar(ConstantInt::get(IntTy, '\n'), B, TLI));
3831   }
3832 
3833   return nullptr;
3834 }
3835 
3836 Value *LibCallSimplifier::optimizeExit(CallInst *CI) {
3837 
3838   // Mark 'exit' as cold if its not exit(0) (success).
3839   const APInt *C;
3840   if (!CI->hasFnAttr(Attribute::Cold) &&
3841       match(CI->getArgOperand(0), m_APInt(C)) && !C->isZero()) {
3842     CI->addFnAttr(Attribute::Cold);
3843   }
3844   return nullptr;
3845 }
3846 
3847 Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilderBase &B) {
3848   // bcopy(src, dst, n) -> llvm.memmove(dst, src, n)
3849   return copyFlags(*CI, B.CreateMemMove(CI->getArgOperand(1), Align(1),
3850                                         CI->getArgOperand(0), Align(1),
3851                                         CI->getArgOperand(2)));
3852 }
3853 
3854 bool LibCallSimplifier::hasFloatVersion(const Module *M, StringRef FuncName) {
3855   SmallString<20> FloatFuncName = FuncName;
3856   FloatFuncName += 'f';
3857   return isLibFuncEmittable(M, TLI, FloatFuncName);
3858 }
3859 
3860 Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
3861                                                       IRBuilderBase &Builder) {
3862   Module *M = CI->getModule();
3863   LibFunc Func;
3864   Function *Callee = CI->getCalledFunction();
3865 
3866   // Check for string/memory library functions.
3867   if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) {
3868     // Make sure we never change the calling convention.
3869     assert(
3870         (ignoreCallingConv(Func) ||
3871          TargetLibraryInfoImpl::isCallingConvCCompatible(CI)) &&
3872         "Optimizing string/memory libcall would change the calling convention");
3873     switch (Func) {
3874     case LibFunc_strcat:
3875       return optimizeStrCat(CI, Builder);
3876     case LibFunc_strncat:
3877       return optimizeStrNCat(CI, Builder);
3878     case LibFunc_strchr:
3879       return optimizeStrChr(CI, Builder);
3880     case LibFunc_strrchr:
3881       return optimizeStrRChr(CI, Builder);
3882     case LibFunc_strcmp:
3883       return optimizeStrCmp(CI, Builder);
3884     case LibFunc_strncmp:
3885       return optimizeStrNCmp(CI, Builder);
3886     case LibFunc_strcpy:
3887       return optimizeStrCpy(CI, Builder);
3888     case LibFunc_stpcpy:
3889       return optimizeStpCpy(CI, Builder);
3890     case LibFunc_strlcpy:
3891       return optimizeStrLCpy(CI, Builder);
3892     case LibFunc_stpncpy:
3893       return optimizeStringNCpy(CI, /*RetEnd=*/true, Builder);
3894     case LibFunc_strncpy:
3895       return optimizeStringNCpy(CI, /*RetEnd=*/false, Builder);
3896     case LibFunc_strlen:
3897       return optimizeStrLen(CI, Builder);
3898     case LibFunc_strnlen:
3899       return optimizeStrNLen(CI, Builder);
3900     case LibFunc_strpbrk:
3901       return optimizeStrPBrk(CI, Builder);
3902     case LibFunc_strndup:
3903       return optimizeStrNDup(CI, Builder);
3904     case LibFunc_strtol:
3905     case LibFunc_strtod:
3906     case LibFunc_strtof:
3907     case LibFunc_strtoul:
3908     case LibFunc_strtoll:
3909     case LibFunc_strtold:
3910     case LibFunc_strtoull:
3911       return optimizeStrTo(CI, Builder);
3912     case LibFunc_strspn:
3913       return optimizeStrSpn(CI, Builder);
3914     case LibFunc_strcspn:
3915       return optimizeStrCSpn(CI, Builder);
3916     case LibFunc_strstr:
3917       return optimizeStrStr(CI, Builder);
3918     case LibFunc_memchr:
3919       return optimizeMemChr(CI, Builder);
3920     case LibFunc_memrchr:
3921       return optimizeMemRChr(CI, Builder);
3922     case LibFunc_bcmp:
3923       return optimizeBCmp(CI, Builder);
3924     case LibFunc_memcmp:
3925       return optimizeMemCmp(CI, Builder);
3926     case LibFunc_memcpy:
3927       return optimizeMemCpy(CI, Builder);
3928     case LibFunc_memccpy:
3929       return optimizeMemCCpy(CI, Builder);
3930     case LibFunc_mempcpy:
3931       return optimizeMemPCpy(CI, Builder);
3932     case LibFunc_memmove:
3933       return optimizeMemMove(CI, Builder);
3934     case LibFunc_memset:
3935       return optimizeMemSet(CI, Builder);
3936     case LibFunc_realloc:
3937       return optimizeRealloc(CI, Builder);
3938     case LibFunc_wcslen:
3939       return optimizeWcslen(CI, Builder);
3940     case LibFunc_bcopy:
3941       return optimizeBCopy(CI, Builder);
3942     case LibFunc_Znwm:
3943     case LibFunc_ZnwmRKSt9nothrow_t:
3944     case LibFunc_ZnwmSt11align_val_t:
3945     case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
3946     case LibFunc_Znam:
3947     case LibFunc_ZnamRKSt9nothrow_t:
3948     case LibFunc_ZnamSt11align_val_t:
3949     case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
3950     case LibFunc_Znwm12__hot_cold_t:
3951     case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
3952     case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
3953     case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
3954     case LibFunc_Znam12__hot_cold_t:
3955     case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
3956     case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
3957     case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
3958     case LibFunc_size_returning_new:
3959     case LibFunc_size_returning_new_hot_cold:
3960     case LibFunc_size_returning_new_aligned:
3961     case LibFunc_size_returning_new_aligned_hot_cold:
3962       return optimizeNew(CI, Builder, Func);
3963     default:
3964       break;
3965     }
3966   }
3967   return nullptr;
3968 }
3969 
3970 /// Constant folding nan/nanf/nanl.
3971 static Value *optimizeNaN(CallInst *CI) {
3972   StringRef CharSeq;
3973   if (!getConstantStringInfo(CI->getArgOperand(0), CharSeq))
3974     return nullptr;
3975 
3976   APInt Fill;
3977   // Treat empty strings as if they were zero.
3978   if (CharSeq.empty())
3979     Fill = APInt(32, 0);
3980   else if (CharSeq.getAsInteger(0, Fill))
3981     return nullptr;
3982 
3983   return ConstantFP::getQNaN(CI->getType(), /*Negative=*/false, &Fill);
3984 }
3985 
3986 Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
3987                                                        LibFunc Func,
3988                                                        IRBuilderBase &Builder) {
3989   const Module *M = CI->getModule();
3990 
3991   // Don't optimize calls that require strict floating point semantics.
3992   if (CI->isStrictFP())
3993     return nullptr;
3994 
3995   if (Value *V = optimizeSymmetric(CI, Func, Builder))
3996     return V;
3997 
3998   switch (Func) {
3999   case LibFunc_sinpif:
4000   case LibFunc_sinpi:
4001     return optimizeSinCosPi(CI, /*IsSin*/true, Builder);
4002   case LibFunc_cospif:
4003   case LibFunc_cospi:
4004     return optimizeSinCosPi(CI, /*IsSin*/false, Builder);
4005   case LibFunc_powf:
4006   case LibFunc_pow:
4007   case LibFunc_powl:
4008     return optimizePow(CI, Builder);
4009   case LibFunc_exp2l:
4010   case LibFunc_exp2:
4011   case LibFunc_exp2f:
4012     return optimizeExp2(CI, Builder);
4013   case LibFunc_fabsf:
4014   case LibFunc_fabs:
4015   case LibFunc_fabsl:
4016     return replaceUnaryCall(CI, Builder, Intrinsic::fabs);
4017   case LibFunc_sqrtf:
4018   case LibFunc_sqrt:
4019   case LibFunc_sqrtl:
4020     return optimizeSqrt(CI, Builder);
4021   case LibFunc_fmod:
4022   case LibFunc_fmodf:
4023   case LibFunc_fmodl:
4024     return optimizeFMod(CI, Builder);
4025   case LibFunc_logf:
4026   case LibFunc_log:
4027   case LibFunc_logl:
4028   case LibFunc_log10f:
4029   case LibFunc_log10:
4030   case LibFunc_log10l:
4031   case LibFunc_log1pf:
4032   case LibFunc_log1p:
4033   case LibFunc_log1pl:
4034   case LibFunc_log2f:
4035   case LibFunc_log2:
4036   case LibFunc_log2l:
4037   case LibFunc_logbf:
4038   case LibFunc_logb:
4039   case LibFunc_logbl:
4040     return optimizeLog(CI, Builder);
4041   case LibFunc_tan:
4042   case LibFunc_tanf:
4043   case LibFunc_tanl:
4044   case LibFunc_sinh:
4045   case LibFunc_sinhf:
4046   case LibFunc_sinhl:
4047   case LibFunc_asinh:
4048   case LibFunc_asinhf:
4049   case LibFunc_asinhl:
4050   case LibFunc_cosh:
4051   case LibFunc_coshf:
4052   case LibFunc_coshl:
4053   case LibFunc_atanh:
4054   case LibFunc_atanhf:
4055   case LibFunc_atanhl:
4056     return optimizeTrigInversionPairs(CI, Builder);
4057   case LibFunc_ceil:
4058     return replaceUnaryCall(CI, Builder, Intrinsic::ceil);
4059   case LibFunc_floor:
4060     return replaceUnaryCall(CI, Builder, Intrinsic::floor);
4061   case LibFunc_round:
4062     return replaceUnaryCall(CI, Builder, Intrinsic::round);
4063   case LibFunc_roundeven:
4064     return replaceUnaryCall(CI, Builder, Intrinsic::roundeven);
4065   case LibFunc_nearbyint:
4066     return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint);
4067   case LibFunc_rint:
4068     return replaceUnaryCall(CI, Builder, Intrinsic::rint);
4069   case LibFunc_trunc:
4070     return replaceUnaryCall(CI, Builder, Intrinsic::trunc);
4071   case LibFunc_acos:
4072   case LibFunc_acosh:
4073   case LibFunc_asin:
4074   case LibFunc_atan:
4075   case LibFunc_cbrt:
4076   case LibFunc_exp:
4077   case LibFunc_exp10:
4078   case LibFunc_expm1:
4079   case LibFunc_cos:
4080   case LibFunc_sin:
4081   case LibFunc_tanh:
4082     if (UnsafeFPShrink && hasFloatVersion(M, CI->getCalledFunction()->getName()))
4083       return optimizeUnaryDoubleFP(CI, Builder, TLI, true);
4084     return nullptr;
4085   case LibFunc_copysign:
4086     if (hasFloatVersion(M, CI->getCalledFunction()->getName()))
4087       return optimizeBinaryDoubleFP(CI, Builder, TLI);
4088     return nullptr;
4089   case LibFunc_fdim:
4090   case LibFunc_fdimf:
4091   case LibFunc_fdiml:
4092     return optimizeFdim(CI, Builder);
4093   case LibFunc_fminf:
4094   case LibFunc_fmin:
4095   case LibFunc_fminl:
4096   case LibFunc_fmaxf:
4097   case LibFunc_fmax:
4098   case LibFunc_fmaxl:
4099     return optimizeFMinFMax(CI, Builder);
4100   case LibFunc_cabs:
4101   case LibFunc_cabsf:
4102   case LibFunc_cabsl:
4103     return optimizeCAbs(CI, Builder);
4104   case LibFunc_remquo:
4105   case LibFunc_remquof:
4106   case LibFunc_remquol:
4107     return optimizeRemquo(CI, Builder);
4108   case LibFunc_nan:
4109   case LibFunc_nanf:
4110   case LibFunc_nanl:
4111     return optimizeNaN(CI);
4112   default:
4113     return nullptr;
4114   }
4115 }
4116 
4117 Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
4118   Module *M = CI->getModule();
4119   assert(!CI->isMustTailCall() && "These transforms aren't musttail safe.");
4120 
4121   // TODO: Split out the code below that operates on FP calls so that
4122   //       we can all non-FP calls with the StrictFP attribute to be
4123   //       optimized.
4124   if (CI->isNoBuiltin())
4125     return nullptr;
4126 
4127   LibFunc Func;
4128   Function *Callee = CI->getCalledFunction();
4129   bool IsCallingConvC = TargetLibraryInfoImpl::isCallingConvCCompatible(CI);
4130 
4131   SmallVector<OperandBundleDef, 2> OpBundles;
4132   CI->getOperandBundlesAsDefs(OpBundles);
4133 
4134   IRBuilderBase::OperandBundlesGuard Guard(Builder);
4135   Builder.setDefaultOperandBundles(OpBundles);
4136 
4137   // Command-line parameter overrides instruction attribute.
4138   // This can't be moved to optimizeFloatingPointLibCall() because it may be
4139   // used by the intrinsic optimizations.
4140   if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
4141     UnsafeFPShrink = EnableUnsafeFPShrink;
4142   else if (isa<FPMathOperator>(CI) && CI->isFast())
4143     UnsafeFPShrink = true;
4144 
4145   // First, check for intrinsics.
4146   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
4147     if (!IsCallingConvC)
4148       return nullptr;
4149     // The FP intrinsics have corresponding constrained versions so we don't
4150     // need to check for the StrictFP attribute here.
4151     switch (II->getIntrinsicID()) {
4152     case Intrinsic::pow:
4153       return optimizePow(CI, Builder);
4154     case Intrinsic::exp2:
4155       return optimizeExp2(CI, Builder);
4156     case Intrinsic::log:
4157     case Intrinsic::log2:
4158     case Intrinsic::log10:
4159       return optimizeLog(CI, Builder);
4160     case Intrinsic::sqrt:
4161       return optimizeSqrt(CI, Builder);
4162     case Intrinsic::memset:
4163       return optimizeMemSet(CI, Builder);
4164     case Intrinsic::memcpy:
4165       return optimizeMemCpy(CI, Builder);
4166     case Intrinsic::memmove:
4167       return optimizeMemMove(CI, Builder);
4168     default:
4169       return nullptr;
4170     }
4171   }
4172 
4173   // Also try to simplify calls to fortified library functions.
4174   if (Value *SimplifiedFortifiedCI =
4175           FortifiedSimplifier.optimizeCall(CI, Builder))
4176     return SimplifiedFortifiedCI;
4177 
4178   // Then check for known library functions.
4179   if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) {
4180     // We never change the calling convention.
4181     if (!ignoreCallingConv(Func) && !IsCallingConvC)
4182       return nullptr;
4183     if (Value *V = optimizeStringMemoryLibCall(CI, Builder))
4184       return V;
4185     if (Value *V = optimizeFloatingPointLibCall(CI, Func, Builder))
4186       return V;
4187     switch (Func) {
4188     case LibFunc_ffs:
4189     case LibFunc_ffsl:
4190     case LibFunc_ffsll:
4191       return optimizeFFS(CI, Builder);
4192     case LibFunc_fls:
4193     case LibFunc_flsl:
4194     case LibFunc_flsll:
4195       return optimizeFls(CI, Builder);
4196     case LibFunc_abs:
4197     case LibFunc_labs:
4198     case LibFunc_llabs:
4199       return optimizeAbs(CI, Builder);
4200     case LibFunc_isdigit:
4201       return optimizeIsDigit(CI, Builder);
4202     case LibFunc_isascii:
4203       return optimizeIsAscii(CI, Builder);
4204     case LibFunc_toascii:
4205       return optimizeToAscii(CI, Builder);
4206     case LibFunc_atoi:
4207     case LibFunc_atol:
4208     case LibFunc_atoll:
4209       return optimizeAtoi(CI, Builder);
4210     case LibFunc_strtol:
4211     case LibFunc_strtoll:
4212       return optimizeStrToInt(CI, Builder, /*AsSigned=*/true);
4213     case LibFunc_strtoul:
4214     case LibFunc_strtoull:
4215       return optimizeStrToInt(CI, Builder, /*AsSigned=*/false);
4216     case LibFunc_printf:
4217       return optimizePrintF(CI, Builder);
4218     case LibFunc_sprintf:
4219       return optimizeSPrintF(CI, Builder);
4220     case LibFunc_snprintf:
4221       return optimizeSnPrintF(CI, Builder);
4222     case LibFunc_fprintf:
4223       return optimizeFPrintF(CI, Builder);
4224     case LibFunc_fwrite:
4225       return optimizeFWrite(CI, Builder);
4226     case LibFunc_fputs:
4227       return optimizeFPuts(CI, Builder);
4228     case LibFunc_puts:
4229       return optimizePuts(CI, Builder);
4230     case LibFunc_perror:
4231       return optimizeErrorReporting(CI, Builder);
4232     case LibFunc_vfprintf:
4233     case LibFunc_fiprintf:
4234       return optimizeErrorReporting(CI, Builder, 0);
4235     case LibFunc_exit:
4236     case LibFunc_Exit:
4237       return optimizeExit(CI);
4238     default:
4239       return nullptr;
4240     }
4241   }
4242   return nullptr;
4243 }
4244 
4245 LibCallSimplifier::LibCallSimplifier(
4246     const DataLayout &DL, const TargetLibraryInfo *TLI, DominatorTree *DT,
4247     DomConditionCache *DC, AssumptionCache *AC, OptimizationRemarkEmitter &ORE,
4248     BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
4249     function_ref<void(Instruction *, Value *)> Replacer,
4250     function_ref<void(Instruction *)> Eraser)
4251     : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), DT(DT), DC(DC), AC(AC),
4252       ORE(ORE), BFI(BFI), PSI(PSI), Replacer(Replacer), Eraser(Eraser) {}
4253 
4254 void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
4255   // Indirect through the replacer used in this instance.
4256   Replacer(I, With);
4257 }
4258 
4259 void LibCallSimplifier::eraseFromParent(Instruction *I) {
4260   Eraser(I);
4261 }
4262 
4263 // TODO:
4264 //   Additional cases that we need to add to this file:
4265 //
4266 // cbrt:
4267 //   * cbrt(expN(X))  -> expN(x/3)
4268 //   * cbrt(sqrt(x))  -> pow(x,1/6)
4269 //   * cbrt(cbrt(x))  -> pow(x,1/9)
4270 //
4271 // exp, expf, expl:
4272 //   * exp(log(x))  -> x
4273 //
4274 // log, logf, logl:
4275 //   * log(exp(x))   -> x
4276 //   * log(exp(y))   -> y*log(e)
4277 //   * log(exp10(y)) -> y*log(10)
4278 //   * log(sqrt(x))  -> 0.5*log(x)
4279 //
4280 // pow, powf, powl:
4281 //   * pow(sqrt(x),y) -> pow(x,y*0.5)
4282 //   * pow(pow(x,y),z)-> pow(x,y*z)
4283 //
4284 // signbit:
4285 //   * signbit(cnst) -> cnst'
4286 //   * signbit(nncst) -> 0 (if pstv is a non-negative constant)
4287 //
4288 // sqrt, sqrtf, sqrtl:
4289 //   * sqrt(expN(x))  -> expN(x*0.5)
4290 //   * sqrt(Nroot(x)) -> pow(x,1/(2*N))
4291 //   * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
4292 //
4293 
4294 //===----------------------------------------------------------------------===//
4295 // Fortified Library Call Optimizations
4296 //===----------------------------------------------------------------------===//
4297 
4298 bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(
4299     CallInst *CI, unsigned ObjSizeOp, std::optional<unsigned> SizeOp,
4300     std::optional<unsigned> StrOp, std::optional<unsigned> FlagOp) {
4301   // If this function takes a flag argument, the implementation may use it to
4302   // perform extra checks. Don't fold into the non-checking variant.
4303   if (FlagOp) {
4304     ConstantInt *Flag = dyn_cast<ConstantInt>(CI->getArgOperand(*FlagOp));
4305     if (!Flag || !Flag->isZero())
4306       return false;
4307   }
4308 
4309   if (SizeOp && CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(*SizeOp))
4310     return true;
4311 
4312   if (ConstantInt *ObjSizeCI =
4313           dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) {
4314     if (ObjSizeCI->isMinusOne())
4315       return true;
4316     // If the object size wasn't -1 (unknown), bail out if we were asked to.
4317     if (OnlyLowerUnknownSize)
4318       return false;
4319     if (StrOp) {
4320       uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp));
4321       // If the length is 0 we don't know how long it is and so we can't
4322       // remove the check.
4323       if (Len)
4324         annotateDereferenceableBytes(CI, *StrOp, Len);
4325       else
4326         return false;
4327       return ObjSizeCI->getZExtValue() >= Len;
4328     }
4329 
4330     if (SizeOp) {
4331       if (ConstantInt *SizeCI =
4332               dyn_cast<ConstantInt>(CI->getArgOperand(*SizeOp)))
4333         return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
4334     }
4335   }
4336   return false;
4337 }
4338 
4339 Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
4340                                                      IRBuilderBase &B) {
4341   if (isFortifiedCallFoldable(CI, 3, 2)) {
4342     CallInst *NewCI =
4343         B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
4344                        Align(1), CI->getArgOperand(2));
4345     mergeAttributesAndFlags(NewCI, *CI);
4346     return CI->getArgOperand(0);
4347   }
4348   return nullptr;
4349 }
4350 
4351 Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
4352                                                       IRBuilderBase &B) {
4353   if (isFortifiedCallFoldable(CI, 3, 2)) {
4354     CallInst *NewCI =
4355         B.CreateMemMove(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
4356                         Align(1), CI->getArgOperand(2));
4357     mergeAttributesAndFlags(NewCI, *CI);
4358     return CI->getArgOperand(0);
4359   }
4360   return nullptr;
4361 }
4362 
4363 Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
4364                                                      IRBuilderBase &B) {
4365   if (isFortifiedCallFoldable(CI, 3, 2)) {
4366     Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
4367     CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val,
4368                                      CI->getArgOperand(2), Align(1));
4369     mergeAttributesAndFlags(NewCI, *CI);
4370     return CI->getArgOperand(0);
4371   }
4372   return nullptr;
4373 }
4374 
4375 Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI,
4376                                                       IRBuilderBase &B) {
4377   const DataLayout &DL = CI->getDataLayout();
4378   if (isFortifiedCallFoldable(CI, 3, 2))
4379     if (Value *Call = emitMemPCpy(CI->getArgOperand(0), CI->getArgOperand(1),
4380                                   CI->getArgOperand(2), B, DL, TLI)) {
4381       return mergeAttributesAndFlags(cast<CallInst>(Call), *CI);
4382     }
4383   return nullptr;
4384 }
4385 
4386 Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
4387                                                       IRBuilderBase &B,
4388                                                       LibFunc Func) {
4389   const DataLayout &DL = CI->getDataLayout();
4390   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
4391         *ObjSize = CI->getArgOperand(2);
4392 
4393   // __stpcpy_chk(x,x,...)  -> x+strlen(x)
4394   if (Func == LibFunc_stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
4395     Value *StrLen = emitStrLen(Src, B, DL, TLI);
4396     return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
4397   }
4398 
4399   // If a) we don't have any length information, or b) we know this will
4400   // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
4401   // st[rp]cpy_chk call which may fail at runtime if the size is too long.
4402   // TODO: It might be nice to get a maximum length out of the possible
4403   // string lengths for varying.
4404   if (isFortifiedCallFoldable(CI, 2, std::nullopt, 1)) {
4405     if (Func == LibFunc_strcpy_chk)
4406       return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI));
4407     else
4408       return copyFlags(*CI, emitStpCpy(Dst, Src, B, TLI));
4409   }
4410 
4411   if (OnlyLowerUnknownSize)
4412     return nullptr;
4413 
4414   // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
4415   uint64_t Len = GetStringLength(Src);
4416   if (Len)
4417     annotateDereferenceableBytes(CI, 1, Len);
4418   else
4419     return nullptr;
4420 
4421   unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
4422   Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
4423   Value *LenV = ConstantInt::get(SizeTTy, Len);
4424   Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
4425   // If the function was an __stpcpy_chk, and we were able to fold it into
4426   // a __memcpy_chk, we still need to return the correct end pointer.
4427   if (Ret && Func == LibFunc_stpcpy_chk)
4428     return B.CreateInBoundsGEP(B.getInt8Ty(), Dst,
4429                                ConstantInt::get(SizeTTy, Len - 1));
4430   return copyFlags(*CI, cast<CallInst>(Ret));
4431 }
4432 
4433 Value *FortifiedLibCallSimplifier::optimizeStrLenChk(CallInst *CI,
4434                                                      IRBuilderBase &B) {
4435   if (isFortifiedCallFoldable(CI, 1, std::nullopt, 0))
4436     return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B,
4437                                      CI->getDataLayout(), TLI));
4438   return nullptr;
4439 }
4440 
4441 Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
4442                                                        IRBuilderBase &B,
4443                                                        LibFunc Func) {
4444   if (isFortifiedCallFoldable(CI, 3, 2)) {
4445     if (Func == LibFunc_strncpy_chk)
4446       return copyFlags(*CI,
4447                        emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
4448                                    CI->getArgOperand(2), B, TLI));
4449     else
4450       return copyFlags(*CI,
4451                        emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
4452                                    CI->getArgOperand(2), B, TLI));
4453   }
4454 
4455   return nullptr;
4456 }
4457 
4458 Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
4459                                                       IRBuilderBase &B) {
4460   if (isFortifiedCallFoldable(CI, 4, 3))
4461     return copyFlags(
4462         *CI, emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1),
4463                          CI->getArgOperand(2), CI->getArgOperand(3), B, TLI));
4464 
4465   return nullptr;
4466 }
4467 
4468 Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
4469                                                        IRBuilderBase &B) {
4470   if (isFortifiedCallFoldable(CI, 3, 1, std::nullopt, 2)) {
4471     SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 5));
4472     return copyFlags(*CI,
4473                      emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
4474                                   CI->getArgOperand(4), VariadicArgs, B, TLI));
4475   }
4476 
4477   return nullptr;
4478 }
4479 
4480 Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI,
4481                                                       IRBuilderBase &B) {
4482   if (isFortifiedCallFoldable(CI, 2, std::nullopt, std::nullopt, 1)) {
4483     SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 4));
4484     return copyFlags(*CI,
4485                      emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
4486                                  VariadicArgs, B, TLI));
4487   }
4488 
4489   return nullptr;
4490 }
4491 
4492 Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI,
4493                                                      IRBuilderBase &B) {
4494   if (isFortifiedCallFoldable(CI, 2))
4495     return copyFlags(
4496         *CI, emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI));
4497 
4498   return nullptr;
4499 }
4500 
4501 Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI,
4502                                                    IRBuilderBase &B) {
4503   if (isFortifiedCallFoldable(CI, 3))
4504     return copyFlags(*CI,
4505                      emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1),
4506                                  CI->getArgOperand(2), B, TLI));
4507 
4508   return nullptr;
4509 }
4510 
4511 Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI,
4512                                                       IRBuilderBase &B) {
4513   if (isFortifiedCallFoldable(CI, 3))
4514     return copyFlags(*CI,
4515                      emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1),
4516                                  CI->getArgOperand(2), B, TLI));
4517 
4518   return nullptr;
4519 }
4520 
4521 Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI,
4522                                                       IRBuilderBase &B) {
4523   if (isFortifiedCallFoldable(CI, 3))
4524     return copyFlags(*CI,
4525                      emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1),
4526                                  CI->getArgOperand(2), B, TLI));
4527 
4528   return nullptr;
4529 }
4530 
4531 Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI,
4532                                                         IRBuilderBase &B) {
4533   if (isFortifiedCallFoldable(CI, 3, 1, std::nullopt, 2))
4534     return copyFlags(
4535         *CI, emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
4536                            CI->getArgOperand(4), CI->getArgOperand(5), B, TLI));
4537 
4538   return nullptr;
4539 }
4540 
4541 Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI,
4542                                                        IRBuilderBase &B) {
4543   if (isFortifiedCallFoldable(CI, 2, std::nullopt, std::nullopt, 1))
4544     return copyFlags(*CI,
4545                      emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
4546                                   CI->getArgOperand(4), B, TLI));
4547 
4548   return nullptr;
4549 }
4550 
4551 Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI,
4552                                                 IRBuilderBase &Builder) {
4553   // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here.
4554   // Some clang users checked for _chk libcall availability using:
4555   //   __has_builtin(__builtin___memcpy_chk)
4556   // When compiling with -fno-builtin, this is always true.
4557   // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we
4558   // end up with fortified libcalls, which isn't acceptable in a freestanding
4559   // environment which only provides their non-fortified counterparts.
4560   //
4561   // Until we change clang and/or teach external users to check for availability
4562   // differently, disregard the "nobuiltin" attribute and TLI::has.
4563   //
4564   // PR23093.
4565 
4566   LibFunc Func;
4567   Function *Callee = CI->getCalledFunction();
4568   bool IsCallingConvC = TargetLibraryInfoImpl::isCallingConvCCompatible(CI);
4569 
4570   SmallVector<OperandBundleDef, 2> OpBundles;
4571   CI->getOperandBundlesAsDefs(OpBundles);
4572 
4573   IRBuilderBase::OperandBundlesGuard Guard(Builder);
4574   Builder.setDefaultOperandBundles(OpBundles);
4575 
4576   // First, check that this is a known library functions and that the prototype
4577   // is correct.
4578   if (!TLI->getLibFunc(*Callee, Func))
4579     return nullptr;
4580 
4581   // We never change the calling convention.
4582   if (!ignoreCallingConv(Func) && !IsCallingConvC)
4583     return nullptr;
4584 
4585   switch (Func) {
4586   case LibFunc_memcpy_chk:
4587     return optimizeMemCpyChk(CI, Builder);
4588   case LibFunc_mempcpy_chk:
4589     return optimizeMemPCpyChk(CI, Builder);
4590   case LibFunc_memmove_chk:
4591     return optimizeMemMoveChk(CI, Builder);
4592   case LibFunc_memset_chk:
4593     return optimizeMemSetChk(CI, Builder);
4594   case LibFunc_stpcpy_chk:
4595   case LibFunc_strcpy_chk:
4596     return optimizeStrpCpyChk(CI, Builder, Func);
4597   case LibFunc_strlen_chk:
4598     return optimizeStrLenChk(CI, Builder);
4599   case LibFunc_stpncpy_chk:
4600   case LibFunc_strncpy_chk:
4601     return optimizeStrpNCpyChk(CI, Builder, Func);
4602   case LibFunc_memccpy_chk:
4603     return optimizeMemCCpyChk(CI, Builder);
4604   case LibFunc_snprintf_chk:
4605     return optimizeSNPrintfChk(CI, Builder);
4606   case LibFunc_sprintf_chk:
4607     return optimizeSPrintfChk(CI, Builder);
4608   case LibFunc_strcat_chk:
4609     return optimizeStrCatChk(CI, Builder);
4610   case LibFunc_strlcat_chk:
4611     return optimizeStrLCat(CI, Builder);
4612   case LibFunc_strncat_chk:
4613     return optimizeStrNCatChk(CI, Builder);
4614   case LibFunc_strlcpy_chk:
4615     return optimizeStrLCpyChk(CI, Builder);
4616   case LibFunc_vsnprintf_chk:
4617     return optimizeVSNPrintfChk(CI, Builder);
4618   case LibFunc_vsprintf_chk:
4619     return optimizeVSPrintfChk(CI, Builder);
4620   default:
4621     break;
4622   }
4623   return nullptr;
4624 }
4625 
4626 FortifiedLibCallSimplifier::FortifiedLibCallSimplifier(
4627     const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize)
4628     : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}
4629