13cab2bb3Spatrick //===-- dfsan_interface.h -------------------------------------------------===//
23cab2bb3Spatrick //
33cab2bb3Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
43cab2bb3Spatrick // See https://llvm.org/LICENSE.txt for license information.
53cab2bb3Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
63cab2bb3Spatrick //
73cab2bb3Spatrick //===----------------------------------------------------------------------===//
83cab2bb3Spatrick //
93cab2bb3Spatrick // This file is a part of DataFlowSanitizer.
103cab2bb3Spatrick //
113cab2bb3Spatrick // Public interface header.
123cab2bb3Spatrick //===----------------------------------------------------------------------===//
133cab2bb3Spatrick #ifndef DFSAN_INTERFACE_H
143cab2bb3Spatrick #define DFSAN_INTERFACE_H
153cab2bb3Spatrick
163cab2bb3Spatrick #include <stddef.h>
173cab2bb3Spatrick #include <stdint.h>
183cab2bb3Spatrick #include <sanitizer/common_interface_defs.h>
193cab2bb3Spatrick
203cab2bb3Spatrick #ifdef __cplusplus
213cab2bb3Spatrick extern "C" {
223cab2bb3Spatrick #endif
233cab2bb3Spatrick
24d89ec533Spatrick typedef uint8_t dfsan_label;
25d89ec533Spatrick typedef uint32_t dfsan_origin;
263cab2bb3Spatrick
273cab2bb3Spatrick /// Signature of the callback argument to dfsan_set_write_callback().
283cab2bb3Spatrick typedef void (*dfsan_write_callback_t)(int fd, const void *buf, size_t count);
293cab2bb3Spatrick
30*810390e3Srobert /// Signature of the callback argument to dfsan_set_conditional_callback().
31*810390e3Srobert typedef void (*dfsan_conditional_callback_t)(dfsan_label label,
32*810390e3Srobert dfsan_origin origin);
33*810390e3Srobert
34*810390e3Srobert /// Signature of the callback argument to dfsan_set_reaches_function_callback().
35*810390e3Srobert /// The description is intended to hold the name of the variable.
36*810390e3Srobert typedef void (*dfsan_reaches_function_callback_t)(dfsan_label label,
37*810390e3Srobert dfsan_origin origin,
38*810390e3Srobert const char *file,
39*810390e3Srobert unsigned int line,
40*810390e3Srobert const char *function);
41*810390e3Srobert
42d89ec533Spatrick /// Computes the union of \c l1 and \c l2, resulting in a union label.
433cab2bb3Spatrick dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2);
443cab2bb3Spatrick
453cab2bb3Spatrick /// Sets the label for each address in [addr,addr+size) to \c label.
463cab2bb3Spatrick void dfsan_set_label(dfsan_label label, void *addr, size_t size);
473cab2bb3Spatrick
483cab2bb3Spatrick /// Sets the label for each address in [addr,addr+size) to the union of the
493cab2bb3Spatrick /// current label for that address and \c label.
503cab2bb3Spatrick void dfsan_add_label(dfsan_label label, void *addr, size_t size);
513cab2bb3Spatrick
523cab2bb3Spatrick /// Retrieves the label associated with the given data.
533cab2bb3Spatrick ///
543cab2bb3Spatrick /// The type of 'data' is arbitrary. The function accepts a value of any type,
553cab2bb3Spatrick /// which can be truncated or extended (implicitly or explicitly) as necessary.
563cab2bb3Spatrick /// The truncation/extension operations will preserve the label of the original
573cab2bb3Spatrick /// value.
583cab2bb3Spatrick dfsan_label dfsan_get_label(long data);
593cab2bb3Spatrick
60d89ec533Spatrick /// Retrieves the immediate origin associated with the given data. The returned
61d89ec533Spatrick /// origin may point to another origin.
62d89ec533Spatrick ///
63d89ec533Spatrick /// The type of 'data' is arbitrary.
64d89ec533Spatrick dfsan_origin dfsan_get_origin(long data);
65d89ec533Spatrick
663cab2bb3Spatrick /// Retrieves the label associated with the data at the given address.
673cab2bb3Spatrick dfsan_label dfsan_read_label(const void *addr, size_t size);
683cab2bb3Spatrick
69*810390e3Srobert /// Return the origin associated with the first taint byte in the size bytes
70*810390e3Srobert /// from the address addr.
71*810390e3Srobert dfsan_origin dfsan_read_origin_of_first_taint(const void *addr, size_t size);
72*810390e3Srobert
733cab2bb3Spatrick /// Returns whether the given label label contains the label elem.
743cab2bb3Spatrick int dfsan_has_label(dfsan_label label, dfsan_label elem);
753cab2bb3Spatrick
763cab2bb3Spatrick /// Flushes the DFSan shadow, i.e. forgets about all labels currently associated
77d89ec533Spatrick /// with the application memory. Use this call to start over the taint tracking
78d89ec533Spatrick /// within the same process.
79d89ec533Spatrick ///
80d89ec533Spatrick /// Note: If another thread is working with tainted data during the flush, that
81d89ec533Spatrick /// taint could still be written to shadow after the flush.
823cab2bb3Spatrick void dfsan_flush(void);
833cab2bb3Spatrick
843cab2bb3Spatrick /// Sets a callback to be invoked on calls to write(). The callback is invoked
853cab2bb3Spatrick /// before the write is done. The write is not guaranteed to succeed when the
863cab2bb3Spatrick /// callback executes. Pass in NULL to remove any callback.
873cab2bb3Spatrick void dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback);
883cab2bb3Spatrick
89*810390e3Srobert /// Sets a callback to be invoked on any conditional expressions which have a
90*810390e3Srobert /// taint label set. This can be used to find where tainted data influences
91*810390e3Srobert /// the behavior of the program.
92*810390e3Srobert /// These callbacks will only be added when -dfsan-conditional-callbacks=true.
93*810390e3Srobert void dfsan_set_conditional_callback(dfsan_conditional_callback_t callback);
94*810390e3Srobert
95*810390e3Srobert /// Conditional expressions occur during signal handlers.
96*810390e3Srobert /// Making callbacks that handle signals well is tricky, so when
97*810390e3Srobert /// -dfsan-conditional-callbacks=true, conditional expressions used in signal
98*810390e3Srobert /// handlers will add the labels they see into a global (bitwise-or together).
99*810390e3Srobert /// This function returns all label bits seen in signal handler conditions.
100*810390e3Srobert dfsan_label dfsan_get_labels_in_signal_conditional();
101*810390e3Srobert
102*810390e3Srobert /// Sets a callback to be invoked when tainted data reaches a function.
103*810390e3Srobert /// This could occur at function entry, or at a load instruction.
104*810390e3Srobert /// These callbacks will only be added if -dfsan-reaches-function-callbacks=1.
105*810390e3Srobert void dfsan_set_reaches_function_callback(
106*810390e3Srobert dfsan_reaches_function_callback_t callback);
107*810390e3Srobert
108*810390e3Srobert /// Making callbacks that handle signals well is tricky, so when
109*810390e3Srobert /// -dfsan-reaches-function-callbacks=true, functions reached in signal
110*810390e3Srobert /// handlers will add the labels they see into a global (bitwise-or together).
111*810390e3Srobert /// This function returns all label bits seen during signal handlers.
112*810390e3Srobert dfsan_label dfsan_get_labels_in_signal_reaches_function();
113*810390e3Srobert
1143cab2bb3Spatrick /// Interceptor hooks.
1153cab2bb3Spatrick /// Whenever a dfsan's custom function is called the corresponding
1163cab2bb3Spatrick /// hook is called it non-zero. The hooks should be defined by the user.
1173cab2bb3Spatrick /// The primary use case is taint-guided fuzzing, where the fuzzer
1183cab2bb3Spatrick /// needs to see the parameters of the function and the labels.
1193cab2bb3Spatrick /// FIXME: implement more hooks.
1203cab2bb3Spatrick void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2,
1213cab2bb3Spatrick size_t n, dfsan_label s1_label,
1223cab2bb3Spatrick dfsan_label s2_label, dfsan_label n_label);
1233cab2bb3Spatrick void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2,
1243cab2bb3Spatrick size_t n, dfsan_label s1_label,
1253cab2bb3Spatrick dfsan_label s2_label, dfsan_label n_label);
126d89ec533Spatrick
127d89ec533Spatrick /// Prints the origin trace of the label at the address addr to stderr. It also
128d89ec533Spatrick /// prints description at the beginning of the trace. If origin tracking is not
129d89ec533Spatrick /// on, or the address is not labeled, it prints nothing.
130d89ec533Spatrick void dfsan_print_origin_trace(const void *addr, const char *description);
131*810390e3Srobert /// As above, but use an origin id from dfsan_get_origin() instead of address.
132*810390e3Srobert /// Does not include header line with taint label and address information.
133*810390e3Srobert void dfsan_print_origin_id_trace(dfsan_origin origin);
134d89ec533Spatrick
135d89ec533Spatrick /// Prints the origin trace of the label at the address \p addr to a
136d89ec533Spatrick /// pre-allocated output buffer. If origin tracking is not on, or the address is
137d89ec533Spatrick /// not labeled, it prints nothing.
138d89ec533Spatrick ///
139d89ec533Spatrick /// Typical usage:
140d89ec533Spatrick /// \code
141d89ec533Spatrick /// char kDescription[] = "...";
142d89ec533Spatrick /// char buf[1024];
143d89ec533Spatrick /// dfsan_sprint_origin_trace(&tainted_var, kDescription, buf, sizeof(buf));
144d89ec533Spatrick /// \endcode
145d89ec533Spatrick ///
146d89ec533Spatrick /// Typical usage that handles truncation:
147d89ec533Spatrick /// \code
148d89ec533Spatrick /// char buf[1024];
149d89ec533Spatrick /// int len = dfsan_sprint_origin_trace(&var, nullptr, buf, sizeof(buf));
150d89ec533Spatrick ///
151d89ec533Spatrick /// if (len < sizeof(buf)) {
152d89ec533Spatrick /// ProcessOriginTrace(buf);
153d89ec533Spatrick /// } else {
154d89ec533Spatrick /// char *tmpbuf = new char[len + 1];
155d89ec533Spatrick /// dfsan_sprint_origin_trace(&var, nullptr, tmpbuf, len + 1);
156d89ec533Spatrick /// ProcessOriginTrace(tmpbuf);
157d89ec533Spatrick /// delete[] tmpbuf;
158d89ec533Spatrick /// }
159d89ec533Spatrick /// \endcode
160d89ec533Spatrick ///
161d89ec533Spatrick /// \param addr The tainted memory address whose origin we are printing.
162d89ec533Spatrick /// \param description A description printed at the beginning of the trace.
163d89ec533Spatrick /// \param [out] out_buf The output buffer to write the results to.
164d89ec533Spatrick /// \param out_buf_size The size of \p out_buf.
165d89ec533Spatrick ///
166d89ec533Spatrick /// \returns The number of symbols that should have been written to \p out_buf
167d89ec533Spatrick /// (not including trailing null byte '\0'). Thus, the string is truncated iff
168d89ec533Spatrick /// return value is not less than \p out_buf_size.
169d89ec533Spatrick size_t dfsan_sprint_origin_trace(const void *addr, const char *description,
170d89ec533Spatrick char *out_buf, size_t out_buf_size);
171*810390e3Srobert /// As above, but use an origin id from dfsan_get_origin() instead of address.
172*810390e3Srobert /// Does not include header line with taint label and address information.
173*810390e3Srobert size_t dfsan_sprint_origin_id_trace(dfsan_origin origin, char *out_buf,
174*810390e3Srobert size_t out_buf_size);
175d89ec533Spatrick
176d89ec533Spatrick /// Prints the stack trace leading to this call to a pre-allocated output
177d89ec533Spatrick /// buffer.
178d89ec533Spatrick ///
179d89ec533Spatrick /// For usage examples, see dfsan_sprint_origin_trace.
180d89ec533Spatrick ///
181d89ec533Spatrick /// \param [out] out_buf The output buffer to write the results to.
182d89ec533Spatrick /// \param out_buf_size The size of \p out_buf.
183d89ec533Spatrick ///
184d89ec533Spatrick /// \returns The number of symbols that should have been written to \p out_buf
185d89ec533Spatrick /// (not including trailing null byte '\0'). Thus, the string is truncated iff
186d89ec533Spatrick /// return value is not less than \p out_buf_size.
187d89ec533Spatrick size_t dfsan_sprint_stack_trace(char *out_buf, size_t out_buf_size);
188d89ec533Spatrick
189d89ec533Spatrick /// Retrieves the very first origin associated with the data at the given
190d89ec533Spatrick /// address.
191d89ec533Spatrick dfsan_origin dfsan_get_init_origin(const void *addr);
192d89ec533Spatrick
193d89ec533Spatrick /// Returns the value of -dfsan-track-origins.
194d89ec533Spatrick /// * 0: do not track origins.
195d89ec533Spatrick /// * 1: track origins at memory store operations.
196d89ec533Spatrick /// * 2: track origins at memory load and store operations.
197d89ec533Spatrick int dfsan_get_track_origins(void);
1983cab2bb3Spatrick #ifdef __cplusplus
1993cab2bb3Spatrick } // extern "C"
2003cab2bb3Spatrick
dfsan_set_label(dfsan_label label,T & data)201*810390e3Srobert template <typename T> void dfsan_set_label(dfsan_label label, T &data) {
2023cab2bb3Spatrick dfsan_set_label(label, (void *)&data, sizeof(T));
2033cab2bb3Spatrick }
2043cab2bb3Spatrick
2053cab2bb3Spatrick #endif
2063cab2bb3Spatrick
2073cab2bb3Spatrick #endif // DFSAN_INTERFACE_H
208