xref: /llvm-project/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp (revision 070556237e29e8a804fbec1d416d431239384ab0)
1f489e2bfSKostya Serebryany /*===- DataFlow.cpp - a standalone DataFlow tracer                  -------===//
2f489e2bfSKostya Serebryany //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6f489e2bfSKostya Serebryany //
7f489e2bfSKostya Serebryany //===----------------------------------------------------------------------===//
8f489e2bfSKostya Serebryany // An experimental data-flow tracer for fuzz targets.
9f489e2bfSKostya Serebryany // It is based on DFSan and SanitizerCoverage.
10f489e2bfSKostya Serebryany // https://clang.llvm.org/docs/DataFlowSanitizer.html
11f489e2bfSKostya Serebryany // https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow
12f489e2bfSKostya Serebryany //
13f489e2bfSKostya Serebryany // It executes the fuzz target on the given input while monitoring the
14f489e2bfSKostya Serebryany // data flow for every instrumented comparison instruction.
15f489e2bfSKostya Serebryany //
16219b2b3aSKostya Serebryany // The output shows which functions depend on which bytes of the input,
17219b2b3aSKostya Serebryany // and also provides basic-block coverage for every input.
18f489e2bfSKostya Serebryany //
19f489e2bfSKostya Serebryany // Build:
205b4dda55SGeorge Balatsouras //   1. Compile this file (DataFlow.cpp) with -fsanitize=dataflow and -O2.
21679669a7SKostya Serebryany //   2. Compile DataFlowCallbacks.cpp with -O2 -fPIC.
22679669a7SKostya Serebryany //   3. Build the fuzz target with -g -fsanitize=dataflow
23219b2b3aSKostya Serebryany //       -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp
24679669a7SKostya Serebryany //   4. Link those together with -fsanitize=dataflow
25f489e2bfSKostya Serebryany //
26f489e2bfSKostya Serebryany //  -fsanitize-coverage=trace-cmp inserts callbacks around every comparison
27f489e2bfSKostya Serebryany //  instruction, DFSan modifies the calls to pass the data flow labels.
28f489e2bfSKostya Serebryany //  The callbacks update the data flow label for the current function.
29f489e2bfSKostya Serebryany //  See e.g. __dfsw___sanitizer_cov_trace_cmp1 below.
30f489e2bfSKostya Serebryany //
31219b2b3aSKostya Serebryany //  -fsanitize-coverage=trace-pc-guard,pc-table,bb instruments function
32f489e2bfSKostya Serebryany //  entries so that the comparison callback knows that current function.
33219b2b3aSKostya Serebryany //  -fsanitize-coverage=...,bb also allows to collect basic block coverage.
34f489e2bfSKostya Serebryany //
35f489e2bfSKostya Serebryany //
36f489e2bfSKostya Serebryany // Run:
37219b2b3aSKostya Serebryany //   # Collect data flow and coverage for INPUT_FILE
38219b2b3aSKostya Serebryany //   # write to OUTPUT_FILE (default: stdout)
39e2d0b44aSMatt Morehouse //   export DFSAN_OPTIONS=warn_unimplemented=0
403f39123dSKostya Serebryany //   ./a.out INPUT_FILE [OUTPUT_FILE]
41f489e2bfSKostya Serebryany //
42f489e2bfSKostya Serebryany //   # Print all instrumented functions. llvm-symbolizer must be present in PATH
43f489e2bfSKostya Serebryany //   ./a.out
44f489e2bfSKostya Serebryany //
45f489e2bfSKostya Serebryany // Example output:
46f489e2bfSKostya Serebryany // ===============
4749253928SKostya Serebryany //  F0 11111111111111
4849253928SKostya Serebryany //  F1 10000000000000
49e13eff29SKostya Serebryany //  C0 1 2 3 4 5
50e13eff29SKostya Serebryany //  C1 8
51f489e2bfSKostya Serebryany //  ===============
5249253928SKostya Serebryany // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on.
53e13eff29SKostya Serebryany // "CN X Y Z T": tells that a function N has basic blocks X, Y, and Z covered
54e13eff29SKostya Serebryany //    in addition to the function's entry block, out of T total instrumented
55e13eff29SKostya Serebryany //    blocks.
56219b2b3aSKostya Serebryany //
57f489e2bfSKostya Serebryany //===----------------------------------------------------------------------===*/
58f489e2bfSKostya Serebryany 
59f489e2bfSKostya Serebryany #include <assert.h>
60f489e2bfSKostya Serebryany #include <stdio.h>
61f489e2bfSKostya Serebryany #include <stdlib.h>
62f489e2bfSKostya Serebryany #include <stdint.h>
63f489e2bfSKostya Serebryany #include <string.h>
64f489e2bfSKostya Serebryany 
65f489e2bfSKostya Serebryany #include <execinfo.h>  // backtrace_symbols_fd
66f489e2bfSKostya Serebryany 
67679669a7SKostya Serebryany #include "DataFlow.h"
68f489e2bfSKostya Serebryany 
69f489e2bfSKostya Serebryany extern "C" {
70f489e2bfSKostya Serebryany extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size);
71f489e2bfSKostya Serebryany __attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv);
72f489e2bfSKostya Serebryany } // extern "C"
73f489e2bfSKostya Serebryany 
74679669a7SKostya Serebryany CallbackData __dft;
75f489e2bfSKostya Serebryany static size_t InputLen;
763f39123dSKostya Serebryany static size_t NumIterations;
77679669a7SKostya Serebryany static dfsan_label **FuncLabelsPerIter;  // NumIterations x NumFuncs;
783f39123dSKostya Serebryany 
BlockIsEntry(size_t BlockIdx)79e13eff29SKostya Serebryany static inline bool BlockIsEntry(size_t BlockIdx) {
80679669a7SKostya Serebryany   return __dft.PCsBeg[BlockIdx * 2 + 1] & PCFLAG_FUNC_ENTRY;
81e13eff29SKostya Serebryany }
82e13eff29SKostya Serebryany 
835b4dda55SGeorge Balatsouras const int kNumLabels = 8;
84679669a7SKostya Serebryany 
85f489e2bfSKostya Serebryany // Prints all instrumented functions.
PrintFunctions()8649253928SKostya Serebryany static int PrintFunctions() {
87f489e2bfSKostya Serebryany   // We don't have the symbolizer integrated with dfsan yet.
88f489e2bfSKostya Serebryany   // So use backtrace_symbols_fd and pipe it through llvm-symbolizer.
89f489e2bfSKostya Serebryany   // TODO(kcc): this is pretty ugly and may break in lots of ways.
90f489e2bfSKostya Serebryany   //      We'll need to make a proper in-process symbolizer work with DFSan.
91f489e2bfSKostya Serebryany   FILE *Pipe = popen("sed 's/(+/ /g; s/).*//g' "
92f489e2bfSKostya Serebryany                      "| llvm-symbolizer "
93*07055623SGeorge Balatsouras                      "| grep '\\.dfsan' "
94*07055623SGeorge Balatsouras                      "| sed 's/\\.dfsan//g' "
9527cf743bSKostya Serebryany                      "| c++filt",
9627cf743bSKostya Serebryany                      "w");
97679669a7SKostya Serebryany   for (size_t I = 0; I < __dft.NumGuards; I++) {
98679669a7SKostya Serebryany     uintptr_t PC = __dft.PCsBeg[I * 2];
99e13eff29SKostya Serebryany     if (!BlockIsEntry(I)) continue;
100f489e2bfSKostya Serebryany     void *const Buf[1] = {(void*)PC};
101f489e2bfSKostya Serebryany     backtrace_symbols_fd(Buf, 1, fileno(Pipe));
102f489e2bfSKostya Serebryany   }
103f489e2bfSKostya Serebryany   pclose(Pipe);
104f489e2bfSKostya Serebryany   return 0;
105f489e2bfSKostya Serebryany }
106f489e2bfSKostya Serebryany 
PrintBinary(FILE * Out,dfsan_label L,size_t Len)1073f39123dSKostya Serebryany static void PrintBinary(FILE *Out, dfsan_label L, size_t Len) {
1083f39123dSKostya Serebryany   char buf[kNumLabels + 1];
1093f39123dSKostya Serebryany   assert(Len <= kNumLabels);
1103f39123dSKostya Serebryany   for (int i = 0; i < kNumLabels; i++)
1113f39123dSKostya Serebryany     buf[i] = (L & (1 << i)) ? '1' : '0';
1123f39123dSKostya Serebryany   buf[Len] = 0;
1133f39123dSKostya Serebryany   fprintf(Out, "%s", buf);
11449253928SKostya Serebryany }
11549253928SKostya Serebryany 
PrintDataFlow(FILE * Out)11649253928SKostya Serebryany static void PrintDataFlow(FILE *Out) {
117679669a7SKostya Serebryany   for (size_t Func = 0; Func < __dft.NumFuncs; Func++) {
1183f39123dSKostya Serebryany     bool HasAny = false;
1193f39123dSKostya Serebryany     for (size_t Iter = 0; Iter < NumIterations; Iter++)
120679669a7SKostya Serebryany       if (FuncLabelsPerIter[Iter][Func])
1213f39123dSKostya Serebryany         HasAny = true;
1223f39123dSKostya Serebryany     if (!HasAny)
1233f39123dSKostya Serebryany       continue;
1243f39123dSKostya Serebryany     fprintf(Out, "F%zd ", Func);
1253f39123dSKostya Serebryany     size_t LenOfLastIteration = kNumLabels;
1263f39123dSKostya Serebryany     if (auto Tail = InputLen % kNumLabels)
1273f39123dSKostya Serebryany         LenOfLastIteration = Tail;
1283f39123dSKostya Serebryany     for (size_t Iter = 0; Iter < NumIterations; Iter++)
129679669a7SKostya Serebryany       PrintBinary(Out, FuncLabelsPerIter[Iter][Func],
1303f39123dSKostya Serebryany                   Iter == NumIterations - 1 ? LenOfLastIteration : kNumLabels);
1313f39123dSKostya Serebryany     fprintf(Out, "\n");
1323f39123dSKostya Serebryany   }
133f489e2bfSKostya Serebryany }
134f489e2bfSKostya Serebryany 
PrintCoverage(FILE * Out)135219b2b3aSKostya Serebryany static void PrintCoverage(FILE *Out) {
136219b2b3aSKostya Serebryany   ssize_t CurrentFuncGuard = -1;
137219b2b3aSKostya Serebryany   ssize_t CurrentFuncNum = -1;
138e13eff29SKostya Serebryany   ssize_t NumBlocksInCurrentFunc = -1;
139679669a7SKostya Serebryany   for (size_t FuncBeg = 0; FuncBeg < __dft.NumGuards;) {
140219b2b3aSKostya Serebryany     CurrentFuncNum++;
141e13eff29SKostya Serebryany     assert(BlockIsEntry(FuncBeg));
142e13eff29SKostya Serebryany     size_t FuncEnd = FuncBeg + 1;
143679669a7SKostya Serebryany     for (; FuncEnd < __dft.NumGuards && !BlockIsEntry(FuncEnd); FuncEnd++)
144e13eff29SKostya Serebryany       ;
145679669a7SKostya Serebryany     if (__dft.BBExecuted[FuncBeg]) {
146219b2b3aSKostya Serebryany       fprintf(Out, "C%zd", CurrentFuncNum);
147e13eff29SKostya Serebryany       for (size_t I = FuncBeg + 1; I < FuncEnd; I++)
148679669a7SKostya Serebryany         if (__dft.BBExecuted[I])
149e13eff29SKostya Serebryany           fprintf(Out, " %zd", I - FuncBeg);
150e13eff29SKostya Serebryany       fprintf(Out, " %zd\n", FuncEnd - FuncBeg);
151219b2b3aSKostya Serebryany     }
152e13eff29SKostya Serebryany     FuncBeg = FuncEnd;
153219b2b3aSKostya Serebryany   }
154219b2b3aSKostya Serebryany }
155219b2b3aSKostya Serebryany 
main(int argc,char ** argv)156f489e2bfSKostya Serebryany int main(int argc, char **argv) {
157f489e2bfSKostya Serebryany   if (LLVMFuzzerInitialize)
158f489e2bfSKostya Serebryany     LLVMFuzzerInitialize(&argc, &argv);
159f489e2bfSKostya Serebryany   if (argc == 1)
160f489e2bfSKostya Serebryany     return PrintFunctions();
1613f39123dSKostya Serebryany   assert(argc == 2 || argc == 3);
162f489e2bfSKostya Serebryany 
1633f39123dSKostya Serebryany   const char *Input = argv[1];
164f489e2bfSKostya Serebryany   fprintf(stderr, "INFO: reading '%s'\n", Input);
165f489e2bfSKostya Serebryany   FILE *In = fopen(Input, "r");
166f489e2bfSKostya Serebryany   assert(In);
167f489e2bfSKostya Serebryany   fseek(In, 0, SEEK_END);
168f489e2bfSKostya Serebryany   InputLen = ftell(In);
169f489e2bfSKostya Serebryany   fseek(In, 0, SEEK_SET);
170f489e2bfSKostya Serebryany   unsigned char *Buf = (unsigned char*)malloc(InputLen);
171f489e2bfSKostya Serebryany   size_t NumBytesRead = fread(Buf, 1, InputLen, In);
172f489e2bfSKostya Serebryany   assert(NumBytesRead == InputLen);
173f489e2bfSKostya Serebryany   fclose(In);
174f489e2bfSKostya Serebryany 
1753f39123dSKostya Serebryany   NumIterations = (NumBytesRead + kNumLabels - 1) / kNumLabels;
176679669a7SKostya Serebryany   FuncLabelsPerIter =
177679669a7SKostya Serebryany       (dfsan_label **)calloc(NumIterations, sizeof(dfsan_label *));
178679669a7SKostya Serebryany   for (size_t Iter = 0; Iter < NumIterations; Iter++)
179679669a7SKostya Serebryany     FuncLabelsPerIter[Iter] =
180679669a7SKostya Serebryany         (dfsan_label *)calloc(__dft.NumFuncs, sizeof(dfsan_label));
1819bc707c0SHans Wennborg 
182679669a7SKostya Serebryany   for (size_t Iter = 0; Iter < NumIterations; Iter++) {
183679669a7SKostya Serebryany     fprintf(stderr, "INFO: running '%s' %zd/%zd\n", Input, Iter, NumIterations);
1843f39123dSKostya Serebryany     dfsan_flush();
1853f39123dSKostya Serebryany     dfsan_set_label(0, Buf, InputLen);
186679669a7SKostya Serebryany     __dft.FuncLabels = FuncLabelsPerIter[Iter];
1873f39123dSKostya Serebryany 
188679669a7SKostya Serebryany     size_t BaseIdx = Iter * kNumLabels;
1893f39123dSKostya Serebryany     size_t LastIdx = BaseIdx + kNumLabels < NumBytesRead ? BaseIdx + kNumLabels
1903f39123dSKostya Serebryany                                                          : NumBytesRead;
1913f39123dSKostya Serebryany     assert(BaseIdx < LastIdx);
1923f39123dSKostya Serebryany     for (size_t Idx = BaseIdx; Idx < LastIdx; Idx++)
1933f39123dSKostya Serebryany       dfsan_set_label(1 << (Idx - BaseIdx), Buf + Idx, 1);
1949bc707c0SHans Wennborg     LLVMFuzzerTestOneInput(Buf, InputLen);
1953f39123dSKostya Serebryany   }
196f489e2bfSKostya Serebryany   free(Buf);
197f489e2bfSKostya Serebryany 
1983f39123dSKostya Serebryany   bool OutIsStdout = argc == 2;
199f489e2bfSKostya Serebryany   fprintf(stderr, "INFO: writing dataflow to %s\n",
2003f39123dSKostya Serebryany           OutIsStdout ? "<stdout>" : argv[2]);
2013f39123dSKostya Serebryany   FILE *Out = OutIsStdout ? stdout : fopen(argv[2], "w");
202f489e2bfSKostya Serebryany   PrintDataFlow(Out);
203219b2b3aSKostya Serebryany   PrintCoverage(Out);
204f489e2bfSKostya Serebryany   if (!OutIsStdout) fclose(Out);
205f489e2bfSKostya Serebryany }
206