13cab2bb3Spatrick /*===- DataFlow.cpp - a standalone DataFlow tracer -------===//
23cab2bb3Spatrick //
33cab2bb3Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
43cab2bb3Spatrick // See https://llvm.org/LICENSE.txt for license information.
53cab2bb3Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
63cab2bb3Spatrick //
73cab2bb3Spatrick //===----------------------------------------------------------------------===//
83cab2bb3Spatrick // An experimental data-flow tracer for fuzz targets.
93cab2bb3Spatrick // It is based on DFSan and SanitizerCoverage.
103cab2bb3Spatrick // https://clang.llvm.org/docs/DataFlowSanitizer.html
113cab2bb3Spatrick // https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow
123cab2bb3Spatrick //
133cab2bb3Spatrick // It executes the fuzz target on the given input while monitoring the
143cab2bb3Spatrick // data flow for every instrumented comparison instruction.
153cab2bb3Spatrick //
163cab2bb3Spatrick // The output shows which functions depend on which bytes of the input,
173cab2bb3Spatrick // and also provides basic-block coverage for every input.
183cab2bb3Spatrick //
193cab2bb3Spatrick // Build:
203cab2bb3Spatrick // 1. Compile this file (DataFlow.cpp) with -fsanitize=dataflow and -O2.
213cab2bb3Spatrick // 2. Compile DataFlowCallbacks.cpp with -O2 -fPIC.
223cab2bb3Spatrick // 3. Build the fuzz target with -g -fsanitize=dataflow
233cab2bb3Spatrick // -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp
243cab2bb3Spatrick // 4. Link those together with -fsanitize=dataflow
253cab2bb3Spatrick //
263cab2bb3Spatrick // -fsanitize-coverage=trace-cmp inserts callbacks around every comparison
273cab2bb3Spatrick // instruction, DFSan modifies the calls to pass the data flow labels.
283cab2bb3Spatrick // The callbacks update the data flow label for the current function.
293cab2bb3Spatrick // See e.g. __dfsw___sanitizer_cov_trace_cmp1 below.
303cab2bb3Spatrick //
313cab2bb3Spatrick // -fsanitize-coverage=trace-pc-guard,pc-table,bb instruments function
323cab2bb3Spatrick // entries so that the comparison callback knows that current function.
333cab2bb3Spatrick // -fsanitize-coverage=...,bb also allows to collect basic block coverage.
343cab2bb3Spatrick //
353cab2bb3Spatrick //
363cab2bb3Spatrick // Run:
373cab2bb3Spatrick // # Collect data flow and coverage for INPUT_FILE
383cab2bb3Spatrick // # write to OUTPUT_FILE (default: stdout)
39*d89ec533Spatrick // export DFSAN_OPTIONS=warn_unimplemented=0
403cab2bb3Spatrick // ./a.out INPUT_FILE [OUTPUT_FILE]
413cab2bb3Spatrick //
423cab2bb3Spatrick // # Print all instrumented functions. llvm-symbolizer must be present in PATH
433cab2bb3Spatrick // ./a.out
443cab2bb3Spatrick //
453cab2bb3Spatrick // Example output:
463cab2bb3Spatrick // ===============
473cab2bb3Spatrick // F0 11111111111111
483cab2bb3Spatrick // F1 10000000000000
493cab2bb3Spatrick // C0 1 2 3 4 5
503cab2bb3Spatrick // C1 8
513cab2bb3Spatrick // ===============
523cab2bb3Spatrick // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on.
533cab2bb3Spatrick // "CN X Y Z T": tells that a function N has basic blocks X, Y, and Z covered
543cab2bb3Spatrick // in addition to the function's entry block, out of T total instrumented
553cab2bb3Spatrick // blocks.
563cab2bb3Spatrick //
573cab2bb3Spatrick //===----------------------------------------------------------------------===*/
583cab2bb3Spatrick
593cab2bb3Spatrick #include <assert.h>
603cab2bb3Spatrick #include <stdio.h>
613cab2bb3Spatrick #include <stdlib.h>
623cab2bb3Spatrick #include <stdint.h>
633cab2bb3Spatrick #include <string.h>
643cab2bb3Spatrick
653cab2bb3Spatrick #include <execinfo.h> // backtrace_symbols_fd
663cab2bb3Spatrick
673cab2bb3Spatrick #include "DataFlow.h"
683cab2bb3Spatrick
693cab2bb3Spatrick extern "C" {
703cab2bb3Spatrick extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size);
713cab2bb3Spatrick __attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv);
723cab2bb3Spatrick } // extern "C"
733cab2bb3Spatrick
743cab2bb3Spatrick CallbackData __dft;
753cab2bb3Spatrick static size_t InputLen;
763cab2bb3Spatrick static size_t NumIterations;
773cab2bb3Spatrick static dfsan_label **FuncLabelsPerIter; // NumIterations x NumFuncs;
783cab2bb3Spatrick
BlockIsEntry(size_t BlockIdx)793cab2bb3Spatrick static inline bool BlockIsEntry(size_t BlockIdx) {
803cab2bb3Spatrick return __dft.PCsBeg[BlockIdx * 2 + 1] & PCFLAG_FUNC_ENTRY;
813cab2bb3Spatrick }
823cab2bb3Spatrick
83*d89ec533Spatrick const int kNumLabels = 8;
843cab2bb3Spatrick
853cab2bb3Spatrick // Prints all instrumented functions.
PrintFunctions()863cab2bb3Spatrick static int PrintFunctions() {
873cab2bb3Spatrick // We don't have the symbolizer integrated with dfsan yet.
883cab2bb3Spatrick // So use backtrace_symbols_fd and pipe it through llvm-symbolizer.
893cab2bb3Spatrick // TODO(kcc): this is pretty ugly and may break in lots of ways.
903cab2bb3Spatrick // We'll need to make a proper in-process symbolizer work with DFSan.
913cab2bb3Spatrick FILE *Pipe = popen("sed 's/(+/ /g; s/).*//g' "
923cab2bb3Spatrick "| llvm-symbolizer "
93*d89ec533Spatrick "| grep '\\.dfsan' "
94*d89ec533Spatrick "| sed 's/\\.dfsan//g' "
953cab2bb3Spatrick "| c++filt",
963cab2bb3Spatrick "w");
973cab2bb3Spatrick for (size_t I = 0; I < __dft.NumGuards; I++) {
983cab2bb3Spatrick uintptr_t PC = __dft.PCsBeg[I * 2];
993cab2bb3Spatrick if (!BlockIsEntry(I)) continue;
1003cab2bb3Spatrick void *const Buf[1] = {(void*)PC};
1013cab2bb3Spatrick backtrace_symbols_fd(Buf, 1, fileno(Pipe));
1023cab2bb3Spatrick }
1033cab2bb3Spatrick pclose(Pipe);
1043cab2bb3Spatrick return 0;
1053cab2bb3Spatrick }
1063cab2bb3Spatrick
PrintBinary(FILE * Out,dfsan_label L,size_t Len)1073cab2bb3Spatrick static void PrintBinary(FILE *Out, dfsan_label L, size_t Len) {
1083cab2bb3Spatrick char buf[kNumLabels + 1];
1093cab2bb3Spatrick assert(Len <= kNumLabels);
1103cab2bb3Spatrick for (int i = 0; i < kNumLabels; i++)
1113cab2bb3Spatrick buf[i] = (L & (1 << i)) ? '1' : '0';
1123cab2bb3Spatrick buf[Len] = 0;
1133cab2bb3Spatrick fprintf(Out, "%s", buf);
1143cab2bb3Spatrick }
1153cab2bb3Spatrick
PrintDataFlow(FILE * Out)1163cab2bb3Spatrick static void PrintDataFlow(FILE *Out) {
1173cab2bb3Spatrick for (size_t Func = 0; Func < __dft.NumFuncs; Func++) {
1183cab2bb3Spatrick bool HasAny = false;
1193cab2bb3Spatrick for (size_t Iter = 0; Iter < NumIterations; Iter++)
1203cab2bb3Spatrick if (FuncLabelsPerIter[Iter][Func])
1213cab2bb3Spatrick HasAny = true;
1223cab2bb3Spatrick if (!HasAny)
1233cab2bb3Spatrick continue;
1243cab2bb3Spatrick fprintf(Out, "F%zd ", Func);
1253cab2bb3Spatrick size_t LenOfLastIteration = kNumLabels;
1263cab2bb3Spatrick if (auto Tail = InputLen % kNumLabels)
1273cab2bb3Spatrick LenOfLastIteration = Tail;
1283cab2bb3Spatrick for (size_t Iter = 0; Iter < NumIterations; Iter++)
1293cab2bb3Spatrick PrintBinary(Out, FuncLabelsPerIter[Iter][Func],
1303cab2bb3Spatrick Iter == NumIterations - 1 ? LenOfLastIteration : kNumLabels);
1313cab2bb3Spatrick fprintf(Out, "\n");
1323cab2bb3Spatrick }
1333cab2bb3Spatrick }
1343cab2bb3Spatrick
PrintCoverage(FILE * Out)1353cab2bb3Spatrick static void PrintCoverage(FILE *Out) {
1363cab2bb3Spatrick ssize_t CurrentFuncGuard = -1;
1373cab2bb3Spatrick ssize_t CurrentFuncNum = -1;
1383cab2bb3Spatrick ssize_t NumBlocksInCurrentFunc = -1;
1393cab2bb3Spatrick for (size_t FuncBeg = 0; FuncBeg < __dft.NumGuards;) {
1403cab2bb3Spatrick CurrentFuncNum++;
1413cab2bb3Spatrick assert(BlockIsEntry(FuncBeg));
1423cab2bb3Spatrick size_t FuncEnd = FuncBeg + 1;
1433cab2bb3Spatrick for (; FuncEnd < __dft.NumGuards && !BlockIsEntry(FuncEnd); FuncEnd++)
1443cab2bb3Spatrick ;
1453cab2bb3Spatrick if (__dft.BBExecuted[FuncBeg]) {
1463cab2bb3Spatrick fprintf(Out, "C%zd", CurrentFuncNum);
1473cab2bb3Spatrick for (size_t I = FuncBeg + 1; I < FuncEnd; I++)
1483cab2bb3Spatrick if (__dft.BBExecuted[I])
1493cab2bb3Spatrick fprintf(Out, " %zd", I - FuncBeg);
1503cab2bb3Spatrick fprintf(Out, " %zd\n", FuncEnd - FuncBeg);
1513cab2bb3Spatrick }
1523cab2bb3Spatrick FuncBeg = FuncEnd;
1533cab2bb3Spatrick }
1543cab2bb3Spatrick }
1553cab2bb3Spatrick
main(int argc,char ** argv)1563cab2bb3Spatrick int main(int argc, char **argv) {
1573cab2bb3Spatrick if (LLVMFuzzerInitialize)
1583cab2bb3Spatrick LLVMFuzzerInitialize(&argc, &argv);
1593cab2bb3Spatrick if (argc == 1)
1603cab2bb3Spatrick return PrintFunctions();
1613cab2bb3Spatrick assert(argc == 2 || argc == 3);
1623cab2bb3Spatrick
1633cab2bb3Spatrick const char *Input = argv[1];
1643cab2bb3Spatrick fprintf(stderr, "INFO: reading '%s'\n", Input);
1653cab2bb3Spatrick FILE *In = fopen(Input, "r");
1663cab2bb3Spatrick assert(In);
1673cab2bb3Spatrick fseek(In, 0, SEEK_END);
1683cab2bb3Spatrick InputLen = ftell(In);
1693cab2bb3Spatrick fseek(In, 0, SEEK_SET);
1703cab2bb3Spatrick unsigned char *Buf = (unsigned char*)malloc(InputLen);
1713cab2bb3Spatrick size_t NumBytesRead = fread(Buf, 1, InputLen, In);
1723cab2bb3Spatrick assert(NumBytesRead == InputLen);
1733cab2bb3Spatrick fclose(In);
1743cab2bb3Spatrick
1753cab2bb3Spatrick NumIterations = (NumBytesRead + kNumLabels - 1) / kNumLabels;
1763cab2bb3Spatrick FuncLabelsPerIter =
1773cab2bb3Spatrick (dfsan_label **)calloc(NumIterations, sizeof(dfsan_label *));
1783cab2bb3Spatrick for (size_t Iter = 0; Iter < NumIterations; Iter++)
1793cab2bb3Spatrick FuncLabelsPerIter[Iter] =
1803cab2bb3Spatrick (dfsan_label *)calloc(__dft.NumFuncs, sizeof(dfsan_label));
1813cab2bb3Spatrick
1823cab2bb3Spatrick for (size_t Iter = 0; Iter < NumIterations; Iter++) {
1833cab2bb3Spatrick fprintf(stderr, "INFO: running '%s' %zd/%zd\n", Input, Iter, NumIterations);
1843cab2bb3Spatrick dfsan_flush();
1853cab2bb3Spatrick dfsan_set_label(0, Buf, InputLen);
1863cab2bb3Spatrick __dft.FuncLabels = FuncLabelsPerIter[Iter];
1873cab2bb3Spatrick
1883cab2bb3Spatrick size_t BaseIdx = Iter * kNumLabels;
1893cab2bb3Spatrick size_t LastIdx = BaseIdx + kNumLabels < NumBytesRead ? BaseIdx + kNumLabels
1903cab2bb3Spatrick : NumBytesRead;
1913cab2bb3Spatrick assert(BaseIdx < LastIdx);
1923cab2bb3Spatrick for (size_t Idx = BaseIdx; Idx < LastIdx; Idx++)
1933cab2bb3Spatrick dfsan_set_label(1 << (Idx - BaseIdx), Buf + Idx, 1);
1943cab2bb3Spatrick LLVMFuzzerTestOneInput(Buf, InputLen);
1953cab2bb3Spatrick }
1963cab2bb3Spatrick free(Buf);
1973cab2bb3Spatrick
1983cab2bb3Spatrick bool OutIsStdout = argc == 2;
1993cab2bb3Spatrick fprintf(stderr, "INFO: writing dataflow to %s\n",
2003cab2bb3Spatrick OutIsStdout ? "<stdout>" : argv[2]);
2013cab2bb3Spatrick FILE *Out = OutIsStdout ? stdout : fopen(argv[2], "w");
2023cab2bb3Spatrick PrintDataFlow(Out);
2033cab2bb3Spatrick PrintCoverage(Out);
2043cab2bb3Spatrick if (!OutIsStdout) fclose(Out);
2053cab2bb3Spatrick }
206