xref: /llvm-project/llvm/lib/FuzzMutate/Operations.cpp (revision 3e15bce9e1e144c0e568eed10010fa0e359e8ec2)
1 //===-- Operations.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/FuzzMutate/Operations.h"
10 #include "llvm/IR/BasicBlock.h"
11 #include "llvm/IR/Constants.h"
12 #include "llvm/IR/Function.h"
13 #include "llvm/IR/Instructions.h"
14 
15 using namespace llvm;
16 using namespace fuzzerop;
17 
18 void llvm::describeFuzzerIntOps(std::vector<fuzzerop::OpDescriptor> &Ops) {
19   Ops.push_back(binOpDescriptor(1, Instruction::Add));
20   Ops.push_back(binOpDescriptor(1, Instruction::Sub));
21   Ops.push_back(binOpDescriptor(1, Instruction::Mul));
22   Ops.push_back(binOpDescriptor(1, Instruction::SDiv));
23   Ops.push_back(binOpDescriptor(1, Instruction::UDiv));
24   Ops.push_back(binOpDescriptor(1, Instruction::SRem));
25   Ops.push_back(binOpDescriptor(1, Instruction::URem));
26   Ops.push_back(binOpDescriptor(1, Instruction::Shl));
27   Ops.push_back(binOpDescriptor(1, Instruction::LShr));
28   Ops.push_back(binOpDescriptor(1, Instruction::AShr));
29   Ops.push_back(binOpDescriptor(1, Instruction::And));
30   Ops.push_back(binOpDescriptor(1, Instruction::Or));
31   Ops.push_back(binOpDescriptor(1, Instruction::Xor));
32 
33   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_EQ));
34   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_NE));
35   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_UGT));
36   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_UGE));
37   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_ULT));
38   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_ULE));
39   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_SGT));
40   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_SGE));
41   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_SLT));
42   Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_SLE));
43 }
44 
45 void llvm::describeFuzzerFloatOps(std::vector<fuzzerop::OpDescriptor> &Ops) {
46   Ops.push_back(binOpDescriptor(1, Instruction::FAdd));
47   Ops.push_back(binOpDescriptor(1, Instruction::FSub));
48   Ops.push_back(binOpDescriptor(1, Instruction::FMul));
49   Ops.push_back(binOpDescriptor(1, Instruction::FDiv));
50   Ops.push_back(binOpDescriptor(1, Instruction::FRem));
51 
52   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_FALSE));
53   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OEQ));
54   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OGT));
55   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OGE));
56   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OLT));
57   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OLE));
58   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_ONE));
59   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_ORD));
60   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UNO));
61   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UEQ));
62   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UGT));
63   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UGE));
64   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_ULT));
65   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_ULE));
66   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UNE));
67   Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_TRUE));
68 }
69 
70 void llvm::describeFuzzerUnaryOperations(
71     std::vector<fuzzerop::OpDescriptor> &Ops) {
72   Ops.push_back(fnegDescriptor(1));
73 }
74 
75 void llvm::describeFuzzerControlFlowOps(
76     std::vector<fuzzerop::OpDescriptor> &Ops) {
77   Ops.push_back(splitBlockDescriptor(1));
78 }
79 
80 void llvm::describeFuzzerOtherOps(std::vector<fuzzerop::OpDescriptor> &Ops) {
81   Ops.push_back(selectDescriptor(1));
82 }
83 
84 void llvm::describeFuzzerPointerOps(std::vector<fuzzerop::OpDescriptor> &Ops) {
85   Ops.push_back(gepDescriptor(1));
86 }
87 
88 void llvm::describeFuzzerAggregateOps(
89     std::vector<fuzzerop::OpDescriptor> &Ops) {
90   Ops.push_back(extractValueDescriptor(1));
91   Ops.push_back(insertValueDescriptor(1));
92 }
93 
94 void llvm::describeFuzzerVectorOps(std::vector<fuzzerop::OpDescriptor> &Ops) {
95   Ops.push_back(extractElementDescriptor(1));
96   Ops.push_back(insertElementDescriptor(1));
97   Ops.push_back(shuffleVectorDescriptor(1));
98 }
99 
100 OpDescriptor llvm::fuzzerop::selectDescriptor(unsigned Weight) {
101   auto buildOp = [](ArrayRef<Value *> Srcs, BasicBlock::iterator InsertPt) {
102     return SelectInst::Create(Srcs[0], Srcs[1], Srcs[2], "S", InsertPt);
103   };
104   return {Weight,
105           {boolOrVecBoolType(), matchFirstLengthWAnyType(), matchSecondType()},
106           buildOp};
107 }
108 
109 OpDescriptor llvm::fuzzerop::fnegDescriptor(unsigned Weight) {
110   auto buildOp = [](ArrayRef<Value *> Srcs, BasicBlock::iterator InsertPt) {
111     return UnaryOperator::Create(Instruction::FNeg, Srcs[0], "F", InsertPt);
112   };
113   return {Weight, {anyFloatOrVecFloatType()}, buildOp};
114 }
115 
116 OpDescriptor llvm::fuzzerop::binOpDescriptor(unsigned Weight,
117                                              Instruction::BinaryOps Op) {
118   auto buildOp = [Op](ArrayRef<Value *> Srcs, BasicBlock::iterator InsertPt) {
119     return BinaryOperator::Create(Op, Srcs[0], Srcs[1], "B", InsertPt);
120   };
121   switch (Op) {
122   case Instruction::Add:
123   case Instruction::Sub:
124   case Instruction::Mul:
125   case Instruction::SDiv:
126   case Instruction::UDiv:
127   case Instruction::SRem:
128   case Instruction::URem:
129   case Instruction::Shl:
130   case Instruction::LShr:
131   case Instruction::AShr:
132   case Instruction::And:
133   case Instruction::Or:
134   case Instruction::Xor:
135     return {Weight, {anyIntOrVecIntType(), matchFirstType()}, buildOp};
136   case Instruction::FAdd:
137   case Instruction::FSub:
138   case Instruction::FMul:
139   case Instruction::FDiv:
140   case Instruction::FRem:
141     return {Weight, {anyFloatOrVecFloatType(), matchFirstType()}, buildOp};
142   case Instruction::BinaryOpsEnd:
143     llvm_unreachable("Value out of range of enum");
144   }
145   llvm_unreachable("Covered switch");
146 }
147 
148 OpDescriptor llvm::fuzzerop::cmpOpDescriptor(unsigned Weight,
149                                              Instruction::OtherOps CmpOp,
150                                              CmpInst::Predicate Pred) {
151   auto buildOp = [CmpOp, Pred](ArrayRef<Value *> Srcs,
152                                BasicBlock::iterator InsertPt) {
153     return CmpInst::Create(CmpOp, Pred, Srcs[0], Srcs[1], "C", InsertPt);
154   };
155 
156   switch (CmpOp) {
157   case Instruction::ICmp:
158     return {Weight, {anyIntOrVecIntType(), matchFirstType()}, buildOp};
159   case Instruction::FCmp:
160     return {Weight, {anyFloatOrVecFloatType(), matchFirstType()}, buildOp};
161   default:
162     llvm_unreachable("CmpOp must be ICmp or FCmp");
163   }
164 }
165 
166 OpDescriptor llvm::fuzzerop::splitBlockDescriptor(unsigned Weight) {
167   auto buildSplitBlock = [](ArrayRef<Value *> Srcs,
168                             BasicBlock::iterator InsertPt) {
169     BasicBlock *Block = InsertPt->getParent();
170     BasicBlock *Next = Block->splitBasicBlock(InsertPt, "BB");
171 
172     // If it was an exception handling block, we are done.
173     if (Block->isEHPad())
174       return nullptr;
175 
176     // Loop back on this block by replacing the unconditional forward branch
177     // with a conditional with a backedge.
178     if (Block != &Block->getParent()->getEntryBlock()) {
179       BranchInst::Create(Block, Next, Srcs[0],
180                          Block->getTerminator()->getIterator());
181       Block->getTerminator()->eraseFromParent();
182 
183       // We need values for each phi in the block. Since there isn't a good way
184       // to do a variable number of input values currently, we just fill them
185       // with poison.
186       for (PHINode &PHI : Block->phis())
187         PHI.addIncoming(PoisonValue::get(PHI.getType()), Block);
188     }
189     return nullptr;
190   };
191   SourcePred isInt1Ty{[](ArrayRef<Value *>, const Value *V) {
192                         return V->getType()->isIntegerTy(1);
193                       },
194                       std::nullopt};
195   return {Weight, {isInt1Ty}, buildSplitBlock};
196 }
197 
198 OpDescriptor llvm::fuzzerop::gepDescriptor(unsigned Weight) {
199   auto buildGEP = [](ArrayRef<Value *> Srcs, BasicBlock::iterator InsertPt) {
200     // TODO: It would be better to generate a random type here, rather than
201     // generating a random value and picking its type.
202     Type *Ty = Srcs[1]->getType();
203     auto Indices = ArrayRef(Srcs).drop_front(2);
204     return GetElementPtrInst::Create(Ty, Srcs[0], Indices, "G", InsertPt);
205   };
206   // TODO: Handle aggregates and vectors
207   // TODO: Support multiple indices.
208   // TODO: Try to avoid meaningless accesses.
209   SourcePred sizedType(
210       [](ArrayRef<Value *>, const Value *V) { return V->getType()->isSized(); },
211       std::nullopt);
212   return {Weight, {sizedPtrType(), sizedType, anyIntType()}, buildGEP};
213 }
214 
215 static uint64_t getAggregateNumElements(Type *T) {
216   assert(T->isAggregateType() && "Not a struct or array");
217   if (isa<StructType>(T))
218     return T->getStructNumElements();
219   return T->getArrayNumElements();
220 }
221 
222 static SourcePred validExtractValueIndex() {
223   auto Pred = [](ArrayRef<Value *> Cur, const Value *V) {
224     if (auto *CI = dyn_cast<ConstantInt>(V))
225       if (!CI->uge(getAggregateNumElements(Cur[0]->getType())))
226         return true;
227     return false;
228   };
229   auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> Ts) {
230     std::vector<Constant *> Result;
231     auto *Int32Ty = Type::getInt32Ty(Cur[0]->getContext());
232     uint64_t N = getAggregateNumElements(Cur[0]->getType());
233     // Create indices at the start, end, and middle, but avoid dups.
234     Result.push_back(ConstantInt::get(Int32Ty, 0));
235     if (N > 1)
236       Result.push_back(ConstantInt::get(Int32Ty, N - 1));
237     if (N > 2)
238       Result.push_back(ConstantInt::get(Int32Ty, N / 2));
239     return Result;
240   };
241   return {Pred, Make};
242 }
243 
244 OpDescriptor llvm::fuzzerop::extractValueDescriptor(unsigned Weight) {
245   auto buildExtract = [](ArrayRef<Value *> Srcs,
246                          BasicBlock::iterator InsertPt) {
247     // TODO: It's pretty inefficient to shuffle this all through constants.
248     unsigned Idx = cast<ConstantInt>(Srcs[1])->getZExtValue();
249     return ExtractValueInst::Create(Srcs[0], {Idx}, "E", InsertPt);
250   };
251   // TODO: Should we handle multiple indices?
252   return {Weight, {anyAggregateType(), validExtractValueIndex()}, buildExtract};
253 }
254 
255 static SourcePred matchScalarInAggregate() {
256   auto Pred = [](ArrayRef<Value *> Cur, const Value *V) {
257     if (auto *ArrayT = dyn_cast<ArrayType>(Cur[0]->getType()))
258       return V->getType() == ArrayT->getElementType();
259 
260     auto *STy = cast<StructType>(Cur[0]->getType());
261     for (int I = 0, E = STy->getNumElements(); I < E; ++I)
262       if (STy->getTypeAtIndex(I) == V->getType())
263         return true;
264     return false;
265   };
266   auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *>) {
267     if (auto *ArrayT = dyn_cast<ArrayType>(Cur[0]->getType()))
268       return makeConstantsWithType(ArrayT->getElementType());
269 
270     std::vector<Constant *> Result;
271     auto *STy = cast<StructType>(Cur[0]->getType());
272     for (int I = 0, E = STy->getNumElements(); I < E; ++I)
273       makeConstantsWithType(STy->getTypeAtIndex(I), Result);
274     return Result;
275   };
276   return {Pred, Make};
277 }
278 
279 static SourcePred validInsertValueIndex() {
280   auto Pred = [](ArrayRef<Value *> Cur, const Value *V) {
281     if (auto *CI = dyn_cast<ConstantInt>(V))
282       if (CI->getBitWidth() == 32) {
283         Type *Indexed = ExtractValueInst::getIndexedType(Cur[0]->getType(),
284                                                          CI->getZExtValue());
285         return Indexed == Cur[1]->getType();
286       }
287     return false;
288   };
289   auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> Ts) {
290     std::vector<Constant *> Result;
291     auto *Int32Ty = Type::getInt32Ty(Cur[0]->getContext());
292     auto *BaseTy = Cur[0]->getType();
293     int I = 0;
294     while (Type *Indexed = ExtractValueInst::getIndexedType(BaseTy, I)) {
295       if (Indexed == Cur[1]->getType())
296         Result.push_back(ConstantInt::get(Int32Ty, I));
297       ++I;
298     }
299     return Result;
300   };
301   return {Pred, Make};
302 }
303 
304 OpDescriptor llvm::fuzzerop::insertValueDescriptor(unsigned Weight) {
305   auto buildInsert = [](ArrayRef<Value *> Srcs, BasicBlock::iterator InsertPt) {
306     // TODO: It's pretty inefficient to shuffle this all through constants.
307     unsigned Idx = cast<ConstantInt>(Srcs[2])->getZExtValue();
308     return InsertValueInst::Create(Srcs[0], Srcs[1], {Idx}, "I", InsertPt);
309   };
310   return {
311       Weight,
312       {anyAggregateType(), matchScalarInAggregate(), validInsertValueIndex()},
313       buildInsert};
314 }
315 
316 OpDescriptor llvm::fuzzerop::extractElementDescriptor(unsigned Weight) {
317   auto buildExtract = [](ArrayRef<Value *> Srcs,
318                          BasicBlock::iterator InsertPt) {
319     return ExtractElementInst::Create(Srcs[0], Srcs[1], "E", InsertPt);
320   };
321   // TODO: Try to avoid undefined accesses.
322   return {Weight, {anyVectorType(), anyIntType()}, buildExtract};
323 }
324 
325 OpDescriptor llvm::fuzzerop::insertElementDescriptor(unsigned Weight) {
326   auto buildInsert = [](ArrayRef<Value *> Srcs, BasicBlock::iterator InsertPt) {
327     return InsertElementInst::Create(Srcs[0], Srcs[1], Srcs[2], "I", InsertPt);
328   };
329   // TODO: Try to avoid undefined accesses.
330   return {Weight,
331           {anyVectorType(), matchScalarOfFirstType(), anyIntType()},
332           buildInsert};
333 }
334 
335 static SourcePred validShuffleVectorIndex() {
336   auto Pred = [](ArrayRef<Value *> Cur, const Value *V) {
337     return ShuffleVectorInst::isValidOperands(Cur[0], Cur[1], V);
338   };
339   auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> Ts) {
340     auto *FirstTy = cast<VectorType>(Cur[0]->getType());
341     auto *Int32Ty = Type::getInt32Ty(Cur[0]->getContext());
342     // TODO: It's straighforward to make up reasonable values, but listing them
343     // exhaustively would be insane. Come up with a couple of sensible ones.
344     return std::vector<Constant *>{
345         PoisonValue::get(VectorType::get(Int32Ty, FirstTy->getElementCount()))};
346   };
347   return {Pred, Make};
348 }
349 
350 OpDescriptor llvm::fuzzerop::shuffleVectorDescriptor(unsigned Weight) {
351   auto buildShuffle = [](ArrayRef<Value *> Srcs,
352                          BasicBlock::iterator InsertPt) {
353     return new ShuffleVectorInst(Srcs[0], Srcs[1], Srcs[2], "S", InsertPt);
354   };
355   return {Weight,
356           {anyVectorType(), matchFirstType(), validShuffleVectorIndex()},
357           buildShuffle};
358 }
359