1 /* An experimental state machine, for tracking "taint": unsanitized uses
2 of data potentially under an attacker's control.
3
4 Copyright (C) 2019-2022 Free Software Foundation, Inc.
5 Contributed by David Malcolm <dmalcolm@redhat.com>.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tree.h"
27 #include "function.h"
28 #include "basic-block.h"
29 #include "gimple.h"
30 #include "options.h"
31 #include "diagnostic-path.h"
32 #include "diagnostic-metadata.h"
33 #include "function.h"
34 #include "json.h"
35 #include "analyzer/analyzer.h"
36 #include "analyzer/analyzer-logging.h"
37 #include "gimple-iterator.h"
38 #include "tristate.h"
39 #include "selftest.h"
40 #include "ordered-hash-map.h"
41 #include "cgraph.h"
42 #include "cfg.h"
43 #include "digraph.h"
44 #include "stringpool.h"
45 #include "attribs.h"
46 #include "analyzer/supergraph.h"
47 #include "analyzer/call-string.h"
48 #include "analyzer/program-point.h"
49 #include "analyzer/store.h"
50 #include "analyzer/region-model.h"
51 #include "analyzer/sm.h"
52 #include "analyzer/program-state.h"
53 #include "analyzer/pending-diagnostic.h"
54
55 #if ENABLE_ANALYZER
56
57 namespace ana {
58
59 namespace {
60
61 /* An enum for describing tainted values. */
62
63 enum bounds
64 {
65 /* This tainted value has no upper or lower bound. */
66 BOUNDS_NONE,
67
68 /* This tainted value has an upper bound but not lower bound. */
69 BOUNDS_UPPER,
70
71 /* This tainted value has a lower bound but no upper bound. */
72 BOUNDS_LOWER
73 };
74
75 /* An experimental state machine, for tracking "taint": unsanitized uses
76 of data potentially under an attacker's control. */
77
78 class taint_state_machine : public state_machine
79 {
80 public:
81 taint_state_machine (logger *logger);
82
inherited_state_p() const83 bool inherited_state_p () const FINAL OVERRIDE { return true; }
84
85 state_t alt_get_inherited_state (const sm_state_map &map,
86 const svalue *sval,
87 const extrinsic_state &ext_state)
88 const FINAL OVERRIDE;
89
90 bool on_stmt (sm_context *sm_ctxt,
91 const supernode *node,
92 const gimple *stmt) const FINAL OVERRIDE;
93
94 void on_condition (sm_context *sm_ctxt,
95 const supernode *node,
96 const gimple *stmt,
97 const svalue *lhs,
98 enum tree_code op,
99 const svalue *rhs) const FINAL OVERRIDE;
100
101 bool can_purge_p (state_t s) const FINAL OVERRIDE;
102
103 bool get_taint (state_t s, tree type, enum bounds *out) const;
104
105 state_t combine_states (state_t s0, state_t s1) const;
106
107 private:
108 void check_for_tainted_size_arg (sm_context *sm_ctxt,
109 const supernode *node,
110 const gcall *call,
111 tree callee_fndecl) const;
112 void check_for_tainted_divisor (sm_context *sm_ctxt,
113 const supernode *node,
114 const gassign *assign) const;
115
116 public:
117 /* State for a "tainted" value: unsanitized data potentially under an
118 attacker's control. */
119 state_t m_tainted;
120
121 /* State for a "tainted" value that has a lower bound. */
122 state_t m_has_lb;
123
124 /* State for a "tainted" value that has an upper bound. */
125 state_t m_has_ub;
126
127 /* Stop state, for a value we don't want to track any more. */
128 state_t m_stop;
129 };
130
131 /* Class for diagnostics relating to taint_state_machine. */
132
133 class taint_diagnostic : public pending_diagnostic
134 {
135 public:
taint_diagnostic(const taint_state_machine & sm,tree arg,enum bounds has_bounds)136 taint_diagnostic (const taint_state_machine &sm, tree arg,
137 enum bounds has_bounds)
138 : m_sm (sm), m_arg (arg), m_has_bounds (has_bounds)
139 {}
140
subclass_equal_p(const pending_diagnostic & base_other) const141 bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
142 {
143 const taint_diagnostic &other = (const taint_diagnostic &)base_other;
144 return (same_tree_p (m_arg, other.m_arg)
145 && m_has_bounds == other.m_has_bounds);
146 }
147
describe_state_change(const evdesc::state_change & change)148 label_text describe_state_change (const evdesc::state_change &change)
149 FINAL OVERRIDE
150 {
151 if (change.m_new_state == m_sm.m_tainted)
152 {
153 if (change.m_origin)
154 return change.formatted_print ("%qE has an unchecked value here"
155 " (from %qE)",
156 change.m_expr, change.m_origin);
157 else
158 return change.formatted_print ("%qE gets an unchecked value here",
159 change.m_expr);
160 }
161 else if (change.m_new_state == m_sm.m_has_lb)
162 return change.formatted_print ("%qE has its lower bound checked here",
163 change.m_expr);
164 else if (change.m_new_state == m_sm.m_has_ub)
165 return change.formatted_print ("%qE has its upper bound checked here",
166 change.m_expr);
167 return label_text ();
168 }
169 protected:
170 const taint_state_machine &m_sm;
171 tree m_arg;
172 enum bounds m_has_bounds;
173 };
174
175 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
176 array index. */
177
178 class tainted_array_index : public taint_diagnostic
179 {
180 public:
tainted_array_index(const taint_state_machine & sm,tree arg,enum bounds has_bounds)181 tainted_array_index (const taint_state_machine &sm, tree arg,
182 enum bounds has_bounds)
183 : taint_diagnostic (sm, arg, has_bounds)
184 {}
185
get_kind() const186 const char *get_kind () const FINAL OVERRIDE { return "tainted_array_index"; }
187
get_controlling_option() const188 int get_controlling_option () const FINAL OVERRIDE
189 {
190 return OPT_Wanalyzer_tainted_array_index;
191 }
192
emit(rich_location * rich_loc)193 bool emit (rich_location *rich_loc) FINAL OVERRIDE
194 {
195 diagnostic_metadata m;
196 /* CWE-129: "Improper Validation of Array Index". */
197 m.add_cwe (129);
198 switch (m_has_bounds)
199 {
200 default:
201 gcc_unreachable ();
202 case BOUNDS_NONE:
203 return warning_meta (rich_loc, m, get_controlling_option (),
204 "use of attacker-controlled value %qE"
205 " in array lookup without bounds checking",
206 m_arg);
207 break;
208 case BOUNDS_UPPER:
209 return warning_meta (rich_loc, m, get_controlling_option (),
210 "use of attacker-controlled value %qE"
211 " in array lookup without checking for negative",
212 m_arg);
213 break;
214 case BOUNDS_LOWER:
215 return warning_meta (rich_loc, m, get_controlling_option (),
216 "use of attacker-controlled value %qE"
217 " in array lookup without upper-bounds checking",
218 m_arg);
219 break;
220 }
221 }
222
describe_final_event(const evdesc::final_event & ev)223 label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
224 {
225 switch (m_has_bounds)
226 {
227 default:
228 gcc_unreachable ();
229 case BOUNDS_NONE:
230 return ev.formatted_print
231 ("use of attacker-controlled value %qE in array lookup"
232 " without bounds checking",
233 m_arg);
234 case BOUNDS_UPPER:
235 return ev.formatted_print
236 ("use of attacker-controlled value %qE"
237 " in array lookup without checking for negative",
238 m_arg);
239 case BOUNDS_LOWER:
240 return ev.formatted_print
241 ("use of attacker-controlled value %qE"
242 " in array lookup without upper-bounds checking",
243 m_arg);
244 }
245 }
246 };
247
248 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
249 pointer offset. */
250
251 class tainted_offset : public taint_diagnostic
252 {
253 public:
tainted_offset(const taint_state_machine & sm,tree arg,enum bounds has_bounds)254 tainted_offset (const taint_state_machine &sm, tree arg,
255 enum bounds has_bounds)
256 : taint_diagnostic (sm, arg, has_bounds)
257 {}
258
get_kind() const259 const char *get_kind () const FINAL OVERRIDE { return "tainted_offset"; }
260
get_controlling_option() const261 int get_controlling_option () const FINAL OVERRIDE
262 {
263 return OPT_Wanalyzer_tainted_offset;
264 }
265
emit(rich_location * rich_loc)266 bool emit (rich_location *rich_loc) FINAL OVERRIDE
267 {
268 diagnostic_metadata m;
269 /* CWE-823: "Use of Out-of-range Pointer Offset". */
270 m.add_cwe (823);
271 if (m_arg)
272 switch (m_has_bounds)
273 {
274 default:
275 gcc_unreachable ();
276 case BOUNDS_NONE:
277 return warning_meta (rich_loc, m, get_controlling_option (),
278 "use of attacker-controlled value %qE as offset"
279 " without bounds checking",
280 m_arg);
281 break;
282 case BOUNDS_UPPER:
283 return warning_meta (rich_loc, m, get_controlling_option (),
284 "use of attacker-controlled value %qE as offset"
285 " without lower-bounds checking",
286 m_arg);
287 break;
288 case BOUNDS_LOWER:
289 return warning_meta (rich_loc, m, get_controlling_option (),
290 "use of attacker-controlled value %qE as offset"
291 " without upper-bounds checking",
292 m_arg);
293 break;
294 }
295 else
296 switch (m_has_bounds)
297 {
298 default:
299 gcc_unreachable ();
300 case BOUNDS_NONE:
301 return warning_meta (rich_loc, m, get_controlling_option (),
302 "use of attacker-controlled value as offset"
303 " without bounds checking");
304 break;
305 case BOUNDS_UPPER:
306 return warning_meta (rich_loc, m, get_controlling_option (),
307 "use of attacker-controlled value as offset"
308 " without lower-bounds checking");
309 break;
310 case BOUNDS_LOWER:
311 return warning_meta (rich_loc, m, get_controlling_option (),
312 "use of attacker-controlled value as offset"
313 " without upper-bounds checking");
314 break;
315 }
316 }
317
describe_final_event(const evdesc::final_event & ev)318 label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
319 {
320 if (m_arg)
321 switch (m_has_bounds)
322 {
323 default:
324 gcc_unreachable ();
325 case BOUNDS_NONE:
326 return ev.formatted_print ("use of attacker-controlled value %qE"
327 " as offset without bounds checking",
328 m_arg);
329 case BOUNDS_UPPER:
330 return ev.formatted_print ("use of attacker-controlled value %qE"
331 " as offset without lower-bounds checking",
332 m_arg);
333 case BOUNDS_LOWER:
334 return ev.formatted_print ("use of attacker-controlled value %qE"
335 " as offset without upper-bounds checking",
336 m_arg);
337 }
338 else
339 switch (m_has_bounds)
340 {
341 default:
342 gcc_unreachable ();
343 case BOUNDS_NONE:
344 return ev.formatted_print ("use of attacker-controlled value"
345 " as offset without bounds checking");
346 case BOUNDS_UPPER:
347 return ev.formatted_print ("use of attacker-controlled value"
348 " as offset without lower-bounds"
349 " checking");
350 case BOUNDS_LOWER:
351 return ev.formatted_print ("use of attacker-controlled value"
352 " as offset without upper-bounds"
353 " checking");
354 }
355 }
356 };
357
358 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
359 size. */
360
361 class tainted_size : public taint_diagnostic
362 {
363 public:
tainted_size(const taint_state_machine & sm,tree arg,enum bounds has_bounds)364 tainted_size (const taint_state_machine &sm, tree arg,
365 enum bounds has_bounds)
366 : taint_diagnostic (sm, arg, has_bounds)
367 {}
368
get_kind() const369 const char *get_kind () const OVERRIDE { return "tainted_size"; }
370
get_controlling_option() const371 int get_controlling_option () const FINAL OVERRIDE
372 {
373 return OPT_Wanalyzer_tainted_size;
374 }
375
emit(rich_location * rich_loc)376 bool emit (rich_location *rich_loc) OVERRIDE
377 {
378 diagnostic_metadata m;
379 m.add_cwe (129);
380 switch (m_has_bounds)
381 {
382 default:
383 gcc_unreachable ();
384 case BOUNDS_NONE:
385 return warning_meta (rich_loc, m, get_controlling_option (),
386 "use of attacker-controlled value %qE as size"
387 " without bounds checking",
388 m_arg);
389 break;
390 case BOUNDS_UPPER:
391 return warning_meta (rich_loc, m, get_controlling_option (),
392 "use of attacker-controlled value %qE as size"
393 " without lower-bounds checking",
394 m_arg);
395 break;
396 case BOUNDS_LOWER:
397 return warning_meta (rich_loc, m, get_controlling_option (),
398 "use of attacker-controlled value %qE as size"
399 " without upper-bounds checking",
400 m_arg);
401 break;
402 }
403 }
404
describe_final_event(const evdesc::final_event & ev)405 label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
406 {
407 switch (m_has_bounds)
408 {
409 default:
410 gcc_unreachable ();
411 case BOUNDS_NONE:
412 return ev.formatted_print ("use of attacker-controlled value %qE"
413 " as size without bounds checking",
414 m_arg);
415 case BOUNDS_UPPER:
416 return ev.formatted_print ("use of attacker-controlled value %qE"
417 " as size without lower-bounds checking",
418 m_arg);
419 case BOUNDS_LOWER:
420 return ev.formatted_print ("use of attacker-controlled value %qE"
421 " as size without upper-bounds checking",
422 m_arg);
423 }
424 }
425 };
426
427 /* Subclass of tainted_size for reporting on tainted size values
428 passed to an external function annotated with attribute "access". */
429
430 class tainted_access_attrib_size : public tainted_size
431 {
432 public:
tainted_access_attrib_size(const taint_state_machine & sm,tree arg,enum bounds has_bounds,tree callee_fndecl,unsigned size_argno,const char * access_str)433 tainted_access_attrib_size (const taint_state_machine &sm, tree arg,
434 enum bounds has_bounds, tree callee_fndecl,
435 unsigned size_argno, const char *access_str)
436 : tainted_size (sm, arg, has_bounds),
437 m_callee_fndecl (callee_fndecl),
438 m_size_argno (size_argno), m_access_str (access_str)
439 {
440 }
441
get_kind() const442 const char *get_kind () const OVERRIDE
443 {
444 return "tainted_access_attrib_size";
445 }
446
emit(rich_location * rich_loc)447 bool emit (rich_location *rich_loc) FINAL OVERRIDE
448 {
449 bool warned = tainted_size::emit (rich_loc);
450 if (warned)
451 {
452 inform (DECL_SOURCE_LOCATION (m_callee_fndecl),
453 "parameter %i of %qD marked as a size via attribute %qs",
454 m_size_argno + 1, m_callee_fndecl, m_access_str);
455 }
456 return warned;
457 }
458
459 private:
460 tree m_callee_fndecl;
461 unsigned m_size_argno;
462 const char *m_access_str;
463 };
464
465 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
466 divisor (so that an attacker can trigger a divide by zero). */
467
468 class tainted_divisor : public taint_diagnostic
469 {
470 public:
tainted_divisor(const taint_state_machine & sm,tree arg,enum bounds has_bounds)471 tainted_divisor (const taint_state_machine &sm, tree arg,
472 enum bounds has_bounds)
473 : taint_diagnostic (sm, arg, has_bounds)
474 {}
475
get_kind() const476 const char *get_kind () const FINAL OVERRIDE { return "tainted_divisor"; }
477
get_controlling_option() const478 int get_controlling_option () const FINAL OVERRIDE
479 {
480 return OPT_Wanalyzer_tainted_divisor;
481 }
482
emit(rich_location * rich_loc)483 bool emit (rich_location *rich_loc) FINAL OVERRIDE
484 {
485 diagnostic_metadata m;
486 /* CWE-369: "Divide By Zero". */
487 m.add_cwe (369);
488 if (m_arg)
489 return warning_meta (rich_loc, m, get_controlling_option (),
490 "use of attacker-controlled value %qE as divisor"
491 " without checking for zero",
492 m_arg);
493 else
494 return warning_meta (rich_loc, m, get_controlling_option (),
495 "use of attacker-controlled value as divisor"
496 " without checking for zero");
497 }
498
describe_final_event(const evdesc::final_event & ev)499 label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
500 {
501 if (m_arg)
502 return ev.formatted_print
503 ("use of attacker-controlled value %qE as divisor"
504 " without checking for zero",
505 m_arg);
506 else
507 return ev.formatted_print
508 ("use of attacker-controlled value as divisor"
509 " without checking for zero");
510 }
511 };
512
513 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
514 size of a dynamic allocation. */
515
516 class tainted_allocation_size : public taint_diagnostic
517 {
518 public:
tainted_allocation_size(const taint_state_machine & sm,tree arg,enum bounds has_bounds,enum memory_space mem_space)519 tainted_allocation_size (const taint_state_machine &sm, tree arg,
520 enum bounds has_bounds, enum memory_space mem_space)
521 : taint_diagnostic (sm, arg, has_bounds),
522 m_mem_space (mem_space)
523 {
524 }
525
get_kind() const526 const char *get_kind () const FINAL OVERRIDE
527 {
528 return "tainted_allocation_size";
529 }
530
subclass_equal_p(const pending_diagnostic & base_other) const531 bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
532 {
533 if (!taint_diagnostic::subclass_equal_p (base_other))
534 return false;
535 const tainted_allocation_size &other
536 = (const tainted_allocation_size &)base_other;
537 return m_mem_space == other.m_mem_space;
538 }
539
get_controlling_option() const540 int get_controlling_option () const FINAL OVERRIDE
541 {
542 return OPT_Wanalyzer_tainted_allocation_size;
543 }
544
emit(rich_location * rich_loc)545 bool emit (rich_location *rich_loc) FINAL OVERRIDE
546 {
547 diagnostic_metadata m;
548 /* "CWE-789: Memory Allocation with Excessive Size Value". */
549 m.add_cwe (789);
550
551 bool warned;
552 if (m_arg)
553 switch (m_has_bounds)
554 {
555 default:
556 gcc_unreachable ();
557 case BOUNDS_NONE:
558 warned = warning_meta (rich_loc, m, get_controlling_option (),
559 "use of attacker-controlled value %qE as"
560 " allocation size without bounds checking",
561 m_arg);
562 break;
563 case BOUNDS_UPPER:
564 warned = warning_meta (rich_loc, m, get_controlling_option (),
565 "use of attacker-controlled value %qE as"
566 " allocation size without"
567 " lower-bounds checking",
568 m_arg);
569 break;
570 case BOUNDS_LOWER:
571 warned = warning_meta (rich_loc, m, get_controlling_option (),
572 "use of attacker-controlled value %qE as"
573 " allocation size without"
574 " upper-bounds checking",
575 m_arg);
576 break;
577 }
578 else
579 switch (m_has_bounds)
580 {
581 default:
582 gcc_unreachable ();
583 case BOUNDS_NONE:
584 warned = warning_meta (rich_loc, m, get_controlling_option (),
585 "use of attacker-controlled value as"
586 " allocation size without bounds"
587 " checking");
588 break;
589 case BOUNDS_UPPER:
590 warned = warning_meta (rich_loc, m, get_controlling_option (),
591 "use of attacker-controlled value as"
592 " allocation size without"
593 " lower-bounds checking");
594 break;
595 case BOUNDS_LOWER:
596 warned = warning_meta (rich_loc, m, get_controlling_option (),
597 "use of attacker-controlled value as"
598 " allocation size without"
599 " upper-bounds checking");
600 break;
601 }
602 if (warned)
603 {
604 location_t loc = rich_loc->get_loc ();
605 switch (m_mem_space)
606 {
607 default:
608 break;
609 case MEMSPACE_STACK:
610 inform (loc, "stack-based allocation");
611 break;
612 case MEMSPACE_HEAP:
613 inform (loc, "heap-based allocation");
614 break;
615 }
616 }
617 return warned;
618 }
619
describe_final_event(const evdesc::final_event & ev)620 label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
621 {
622 if (m_arg)
623 switch (m_has_bounds)
624 {
625 default:
626 gcc_unreachable ();
627 case BOUNDS_NONE:
628 return ev.formatted_print
629 ("use of attacker-controlled value %qE as allocation size"
630 " without bounds checking",
631 m_arg);
632 case BOUNDS_UPPER:
633 return ev.formatted_print
634 ("use of attacker-controlled value %qE as allocation size"
635 " without lower-bounds checking",
636 m_arg);
637 case BOUNDS_LOWER:
638 return ev.formatted_print
639 ("use of attacker-controlled value %qE as allocation size"
640 " without upper-bounds checking",
641 m_arg);
642 }
643 else
644 switch (m_has_bounds)
645 {
646 default:
647 gcc_unreachable ();
648 case BOUNDS_NONE:
649 return ev.formatted_print
650 ("use of attacker-controlled value as allocation size"
651 " without bounds checking");
652 case BOUNDS_UPPER:
653 return ev.formatted_print
654 ("use of attacker-controlled value as allocation size"
655 " without lower-bounds checking");
656 case BOUNDS_LOWER:
657 return ev.formatted_print
658 ("use of attacker-controlled value as allocation size"
659 " without upper-bounds checking");
660 }
661 }
662
663 private:
664 enum memory_space m_mem_space;
665 };
666
667 /* taint_state_machine's ctor. */
668
taint_state_machine(logger * logger)669 taint_state_machine::taint_state_machine (logger *logger)
670 : state_machine ("taint", logger)
671 {
672 m_tainted = add_state ("tainted");
673 m_has_lb = add_state ("has_lb");
674 m_has_ub = add_state ("has_ub");
675 m_stop = add_state ("stop");
676 }
677
678 state_machine::state_t
alt_get_inherited_state(const sm_state_map & map,const svalue * sval,const extrinsic_state & ext_state) const679 taint_state_machine::alt_get_inherited_state (const sm_state_map &map,
680 const svalue *sval,
681 const extrinsic_state &ext_state)
682 const
683 {
684 switch (sval->get_kind ())
685 {
686 default:
687 break;
688 case SK_UNARYOP:
689 {
690 const unaryop_svalue *unaryop_sval
691 = as_a <const unaryop_svalue *> (sval);
692 enum tree_code op = unaryop_sval->get_op ();
693 const svalue *arg = unaryop_sval->get_arg ();
694 switch (op)
695 {
696 case NOP_EXPR:
697 {
698 state_t arg_state = map.get_state (arg, ext_state);
699 return arg_state;
700 }
701 default:
702 break;
703 }
704 }
705 break;
706 case SK_BINOP:
707 {
708 const binop_svalue *binop_sval = as_a <const binop_svalue *> (sval);
709 enum tree_code op = binop_sval->get_op ();
710 const svalue *arg0 = binop_sval->get_arg0 ();
711 const svalue *arg1 = binop_sval->get_arg1 ();
712 switch (op)
713 {
714 default:
715 break;
716 case PLUS_EXPR:
717 case MINUS_EXPR:
718 case MULT_EXPR:
719 case POINTER_PLUS_EXPR:
720 case TRUNC_DIV_EXPR:
721 case TRUNC_MOD_EXPR:
722 {
723 state_t arg0_state = map.get_state (arg0, ext_state);
724 state_t arg1_state = map.get_state (arg1, ext_state);
725 return combine_states (arg0_state, arg1_state);
726 }
727 break;
728
729 case EQ_EXPR:
730 case GE_EXPR:
731 case LE_EXPR:
732 case NE_EXPR:
733 case GT_EXPR:
734 case LT_EXPR:
735 case UNORDERED_EXPR:
736 case ORDERED_EXPR:
737 /* Comparisons are just booleans. */
738 return m_start;
739
740 case BIT_AND_EXPR:
741 case RSHIFT_EXPR:
742 return NULL;
743 }
744 }
745 break;
746 }
747 return NULL;
748 }
749
750 /* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */
751
752 bool
on_stmt(sm_context * sm_ctxt,const supernode * node,const gimple * stmt) const753 taint_state_machine::on_stmt (sm_context *sm_ctxt,
754 const supernode *node,
755 const gimple *stmt) const
756 {
757 if (const gcall *call = dyn_cast <const gcall *> (stmt))
758 if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
759 {
760 if (is_named_call_p (callee_fndecl, "fread", call, 4))
761 {
762 tree arg = gimple_call_arg (call, 0);
763
764 sm_ctxt->on_transition (node, stmt, arg, m_start, m_tainted);
765
766 /* Dereference an ADDR_EXPR. */
767 // TODO: should the engine do this?
768 if (TREE_CODE (arg) == ADDR_EXPR)
769 sm_ctxt->on_transition (node, stmt, TREE_OPERAND (arg, 0),
770 m_start, m_tainted);
771 return true;
772 }
773
774 /* External function with "access" attribute. */
775 if (sm_ctxt->unknown_side_effects_p ())
776 check_for_tainted_size_arg (sm_ctxt, node, call, callee_fndecl);
777 }
778 // TODO: ...etc; many other sources of untrusted data
779
780 if (const gassign *assign = dyn_cast <const gassign *> (stmt))
781 {
782 enum tree_code op = gimple_assign_rhs_code (assign);
783
784 switch (op)
785 {
786 default:
787 break;
788 case TRUNC_DIV_EXPR:
789 case CEIL_DIV_EXPR:
790 case FLOOR_DIV_EXPR:
791 case ROUND_DIV_EXPR:
792 case TRUNC_MOD_EXPR:
793 case CEIL_MOD_EXPR:
794 case FLOOR_MOD_EXPR:
795 case ROUND_MOD_EXPR:
796 case RDIV_EXPR:
797 case EXACT_DIV_EXPR:
798 check_for_tainted_divisor (sm_ctxt, node, assign);
799 break;
800 }
801 }
802
803 return false;
804 }
805
806 /* Implementation of state_machine::on_condition vfunc for taint_state_machine.
807 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
808 and states 'has_ub' and 'has_lb' to 'stop'. */
809
810 void
on_condition(sm_context * sm_ctxt,const supernode * node,const gimple * stmt,const svalue * lhs,enum tree_code op,const svalue * rhs ATTRIBUTE_UNUSED) const811 taint_state_machine::on_condition (sm_context *sm_ctxt,
812 const supernode *node,
813 const gimple *stmt,
814 const svalue *lhs,
815 enum tree_code op,
816 const svalue *rhs ATTRIBUTE_UNUSED) const
817 {
818 if (stmt == NULL)
819 return;
820
821 // TODO: this doesn't use the RHS; should we make it symmetric?
822
823 // TODO
824 switch (op)
825 {
826 //case NE_EXPR:
827 //case EQ_EXPR:
828 case GE_EXPR:
829 case GT_EXPR:
830 {
831 sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
832 m_has_lb);
833 sm_ctxt->on_transition (node, stmt, lhs, m_has_ub,
834 m_stop);
835 }
836 break;
837 case LE_EXPR:
838 case LT_EXPR:
839 {
840 sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
841 m_has_ub);
842 sm_ctxt->on_transition (node, stmt, lhs, m_has_lb,
843 m_stop);
844 }
845 break;
846 default:
847 break;
848 }
849 }
850
851 bool
can_purge_p(state_t s ATTRIBUTE_UNUSED) const852 taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
853 {
854 return true;
855 }
856
857 /* If STATE is a tainted state, write the bounds to *OUT and return true.
858 Otherwise return false.
859 Use the signedness of TYPE to determine if "has_ub" is tainted. */
860
861 bool
get_taint(state_t state,tree type,enum bounds * out) const862 taint_state_machine::get_taint (state_t state, tree type,
863 enum bounds *out) const
864 {
865 /* Unsigned types have an implicit lower bound. */
866 bool is_unsigned = false;
867 if (type)
868 if (INTEGRAL_TYPE_P (type))
869 is_unsigned = TYPE_UNSIGNED (type);
870
871 /* Can't use a switch as the states are non-const. */
872 if (state == m_tainted)
873 {
874 *out = is_unsigned ? BOUNDS_LOWER : BOUNDS_NONE;
875 return true;
876 }
877 else if (state == m_has_lb)
878 {
879 *out = BOUNDS_LOWER;
880 return true;
881 }
882 else if (state == m_has_ub && !is_unsigned)
883 {
884 /* Missing lower bound. */
885 *out = BOUNDS_UPPER;
886 return true;
887 }
888 return false;
889 }
890
891 /* Find the most tainted state of S0 and S1. */
892
893 state_machine::state_t
combine_states(state_t s0,state_t s1) const894 taint_state_machine::combine_states (state_t s0, state_t s1) const
895 {
896 gcc_assert (s0);
897 gcc_assert (s1);
898 if (s0 == s1)
899 return s0;
900 if (s0 == m_tainted || s1 == m_tainted)
901 return m_tainted;
902 if (s0 == m_start)
903 return s1;
904 if (s1 == m_start)
905 return s0;
906 if (s0 == m_stop)
907 return s1;
908 if (s1 == m_stop)
909 return s0;
910 /* The only remaining combinations are one of has_ub and has_lb
911 (in either order). */
912 gcc_assert ((s0 == m_has_lb && s1 == m_has_ub)
913 || (s0 == m_has_ub && s1 == m_has_lb));
914 return m_tainted;
915 }
916
917 /* Check for calls to external functions marked with
918 __attribute__((access)) with a size-index: complain about
919 tainted values passed as a size to such a function. */
920
921 void
check_for_tainted_size_arg(sm_context * sm_ctxt,const supernode * node,const gcall * call,tree callee_fndecl) const922 taint_state_machine::check_for_tainted_size_arg (sm_context *sm_ctxt,
923 const supernode *node,
924 const gcall *call,
925 tree callee_fndecl) const
926 {
927 tree fntype = TREE_TYPE (callee_fndecl);
928 if (!fntype)
929 return;
930
931 if (!TYPE_ATTRIBUTES (fntype))
932 return;
933
934 /* Initialize a map of attribute access specifications for arguments
935 to the function call. */
936 rdwr_map rdwr_idx;
937 init_attr_rdwr_indices (&rdwr_idx, TYPE_ATTRIBUTES (fntype));
938
939 unsigned argno = 0;
940
941 for (tree iter = TYPE_ARG_TYPES (fntype); iter;
942 iter = TREE_CHAIN (iter), ++argno)
943 {
944 const attr_access* access = rdwr_idx.get (argno);
945 if (!access)
946 continue;
947
948 /* Ignore any duplicate entry in the map for the size argument. */
949 if (access->ptrarg != argno)
950 continue;
951
952 if (access->sizarg == UINT_MAX)
953 continue;
954
955 tree size_arg = gimple_call_arg (call, access->sizarg);
956
957 state_t state = sm_ctxt->get_state (call, size_arg);
958 enum bounds b;
959 if (get_taint (state, TREE_TYPE (size_arg), &b))
960 {
961 const char* const access_str =
962 TREE_STRING_POINTER (access->to_external_string ());
963 tree diag_size = sm_ctxt->get_diagnostic_tree (size_arg);
964 sm_ctxt->warn (node, call, size_arg,
965 new tainted_access_attrib_size (*this, diag_size, b,
966 callee_fndecl,
967 access->sizarg,
968 access_str));
969 }
970 }
971 }
972
973 /* Complain if ASSIGN (a division operation) has a tainted divisor
974 that could be zero. */
975
976 void
check_for_tainted_divisor(sm_context * sm_ctxt,const supernode * node,const gassign * assign) const977 taint_state_machine::check_for_tainted_divisor (sm_context *sm_ctxt,
978 const supernode *node,
979 const gassign *assign) const
980 {
981 const region_model *old_model = sm_ctxt->get_old_region_model ();
982 if (!old_model)
983 return;
984
985 tree divisor_expr = gimple_assign_rhs2 (assign);;
986 const svalue *divisor_sval = old_model->get_rvalue (divisor_expr, NULL);
987
988 state_t state = sm_ctxt->get_state (assign, divisor_sval);
989 enum bounds b;
990 if (get_taint (state, TREE_TYPE (divisor_expr), &b))
991 {
992 const svalue *zero_sval
993 = old_model->get_manager ()->get_or_create_int_cst
994 (TREE_TYPE (divisor_expr), 0);
995 tristate ts
996 = old_model->eval_condition (divisor_sval, NE_EXPR, zero_sval);
997 if (ts.is_true ())
998 /* The divisor is known to not equal 0: don't warn. */
999 return;
1000
1001 tree diag_divisor = sm_ctxt->get_diagnostic_tree (divisor_expr);
1002 sm_ctxt->warn (node, assign, divisor_expr,
1003 new tainted_divisor (*this, diag_divisor, b));
1004 sm_ctxt->set_next_state (assign, divisor_sval, m_stop);
1005 }
1006 }
1007
1008 } // anonymous namespace
1009
1010 /* Internal interface to this file. */
1011
1012 state_machine *
make_taint_state_machine(logger * logger)1013 make_taint_state_machine (logger *logger)
1014 {
1015 return new taint_state_machine (logger);
1016 }
1017
1018 /* Complain to CTXT if accessing REG leads could lead to arbitrary
1019 memory access under an attacker's control (due to taint). */
1020
1021 void
check_region_for_taint(const region * reg,enum access_direction,region_model_context * ctxt) const1022 region_model::check_region_for_taint (const region *reg,
1023 enum access_direction,
1024 region_model_context *ctxt) const
1025 {
1026 gcc_assert (reg);
1027 gcc_assert (ctxt);
1028
1029 LOG_SCOPE (ctxt->get_logger ());
1030
1031 sm_state_map *smap;
1032 const state_machine *sm;
1033 unsigned sm_idx;
1034 if (!ctxt->get_taint_map (&smap, &sm, &sm_idx))
1035 return;
1036
1037 gcc_assert (smap);
1038 gcc_assert (sm);
1039
1040 const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
1041
1042 const extrinsic_state *ext_state = ctxt->get_ext_state ();
1043 if (!ext_state)
1044 return;
1045
1046 const region *iter_region = reg;
1047 while (iter_region)
1048 {
1049 switch (iter_region->get_kind ())
1050 {
1051 default:
1052 break;
1053
1054 case RK_ELEMENT:
1055 {
1056 const element_region *element_reg
1057 = (const element_region *)iter_region;
1058 const svalue *index = element_reg->get_index ();
1059 const state_machine::state_t
1060 state = smap->get_state (index, *ext_state);
1061 gcc_assert (state);
1062 enum bounds b;
1063 if (taint_sm.get_taint (state, index->get_type (), &b))
1064 {
1065 tree arg = get_representative_tree (index);
1066 ctxt->warn (new tainted_array_index (taint_sm, arg, b));
1067 }
1068 }
1069 break;
1070
1071 case RK_OFFSET:
1072 {
1073 const offset_region *offset_reg
1074 = (const offset_region *)iter_region;
1075 const svalue *offset = offset_reg->get_byte_offset ();
1076 const state_machine::state_t
1077 state = smap->get_state (offset, *ext_state);
1078 gcc_assert (state);
1079 /* Handle implicit cast to sizetype. */
1080 tree effective_type = offset->get_type ();
1081 if (const svalue *cast = offset->maybe_undo_cast ())
1082 if (cast->get_type ())
1083 effective_type = cast->get_type ();
1084 enum bounds b;
1085 if (taint_sm.get_taint (state, effective_type, &b))
1086 {
1087 tree arg = get_representative_tree (offset);
1088 ctxt->warn (new tainted_offset (taint_sm, arg, b));
1089 }
1090 }
1091 break;
1092
1093 case RK_CAST:
1094 {
1095 const cast_region *cast_reg
1096 = as_a <const cast_region *> (iter_region);
1097 iter_region = cast_reg->get_original_region ();
1098 continue;
1099 }
1100
1101 case RK_SIZED:
1102 {
1103 const sized_region *sized_reg
1104 = (const sized_region *)iter_region;
1105 const svalue *size_sval = sized_reg->get_byte_size_sval (m_mgr);
1106 const state_machine::state_t
1107 state = smap->get_state (size_sval, *ext_state);
1108 gcc_assert (state);
1109 enum bounds b;
1110 if (taint_sm.get_taint (state, size_sval->get_type (), &b))
1111 {
1112 tree arg = get_representative_tree (size_sval);
1113 ctxt->warn (new tainted_size (taint_sm, arg, b));
1114 }
1115 }
1116 break;
1117 }
1118
1119 iter_region = iter_region->get_parent_region ();
1120 }
1121 }
1122
1123 /* Complain to CTXT about a tainted allocation size if SIZE_IN_BYTES is
1124 under an attacker's control (due to taint), where the allocation
1125 is happening within MEM_SPACE. */
1126
1127 void
check_dynamic_size_for_taint(enum memory_space mem_space,const svalue * size_in_bytes,region_model_context * ctxt) const1128 region_model::check_dynamic_size_for_taint (enum memory_space mem_space,
1129 const svalue *size_in_bytes,
1130 region_model_context *ctxt) const
1131 {
1132 gcc_assert (size_in_bytes);
1133 gcc_assert (ctxt);
1134
1135 LOG_SCOPE (ctxt->get_logger ());
1136
1137 sm_state_map *smap;
1138 const state_machine *sm;
1139 unsigned sm_idx;
1140 if (!ctxt->get_taint_map (&smap, &sm, &sm_idx))
1141 return;
1142
1143 gcc_assert (smap);
1144 gcc_assert (sm);
1145
1146 const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
1147
1148 const extrinsic_state *ext_state = ctxt->get_ext_state ();
1149 if (!ext_state)
1150 return;
1151
1152 const state_machine::state_t
1153 state = smap->get_state (size_in_bytes, *ext_state);
1154 gcc_assert (state);
1155 enum bounds b;
1156 if (taint_sm.get_taint (state, size_in_bytes->get_type (), &b))
1157 {
1158 tree arg = get_representative_tree (size_in_bytes);
1159 ctxt->warn (new tainted_allocation_size (taint_sm, arg, b, mem_space));
1160 }
1161 }
1162
1163 } // namespace ana
1164
1165 #endif /* #if ENABLE_ANALYZER */
1166