xref: /netbsd-src/external/gpl3/gcc/dist/gcc/analyzer/sm-taint.cc (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* An experimental state machine, for tracking "taint": unsanitized uses
2    of data potentially under an attacker's control.
3 
4    Copyright (C) 2019-2022 Free Software Foundation, Inc.
5    Contributed by David Malcolm <dmalcolm@redhat.com>.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13 
14 GCC is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tree.h"
27 #include "function.h"
28 #include "basic-block.h"
29 #include "gimple.h"
30 #include "options.h"
31 #include "diagnostic-path.h"
32 #include "diagnostic-metadata.h"
33 #include "function.h"
34 #include "json.h"
35 #include "analyzer/analyzer.h"
36 #include "analyzer/analyzer-logging.h"
37 #include "gimple-iterator.h"
38 #include "tristate.h"
39 #include "selftest.h"
40 #include "ordered-hash-map.h"
41 #include "cgraph.h"
42 #include "cfg.h"
43 #include "digraph.h"
44 #include "stringpool.h"
45 #include "attribs.h"
46 #include "analyzer/supergraph.h"
47 #include "analyzer/call-string.h"
48 #include "analyzer/program-point.h"
49 #include "analyzer/store.h"
50 #include "analyzer/region-model.h"
51 #include "analyzer/sm.h"
52 #include "analyzer/program-state.h"
53 #include "analyzer/pending-diagnostic.h"
54 
55 #if ENABLE_ANALYZER
56 
57 namespace ana {
58 
59 namespace {
60 
61 /* An enum for describing tainted values.  */
62 
63 enum bounds
64 {
65   /* This tainted value has no upper or lower bound.  */
66   BOUNDS_NONE,
67 
68   /* This tainted value has an upper bound but not lower bound.  */
69   BOUNDS_UPPER,
70 
71   /* This tainted value has a lower bound but no upper bound.  */
72   BOUNDS_LOWER
73 };
74 
75 /* An experimental state machine, for tracking "taint": unsanitized uses
76    of data potentially under an attacker's control.  */
77 
78 class taint_state_machine : public state_machine
79 {
80 public:
81   taint_state_machine (logger *logger);
82 
inherited_state_p() const83   bool inherited_state_p () const FINAL OVERRIDE { return true; }
84 
85   state_t alt_get_inherited_state (const sm_state_map &map,
86 				   const svalue *sval,
87 				   const extrinsic_state &ext_state)
88     const FINAL OVERRIDE;
89 
90   bool on_stmt (sm_context *sm_ctxt,
91 		const supernode *node,
92 		const gimple *stmt) const FINAL OVERRIDE;
93 
94   void on_condition (sm_context *sm_ctxt,
95 		     const supernode *node,
96 		     const gimple *stmt,
97 		     const svalue *lhs,
98 		     enum tree_code op,
99 		     const svalue *rhs) const FINAL OVERRIDE;
100 
101   bool can_purge_p (state_t s) const FINAL OVERRIDE;
102 
103   bool get_taint (state_t s, tree type, enum bounds *out) const;
104 
105   state_t combine_states (state_t s0, state_t s1) const;
106 
107 private:
108   void check_for_tainted_size_arg (sm_context *sm_ctxt,
109 				   const supernode *node,
110 				   const gcall *call,
111 				   tree callee_fndecl) const;
112   void check_for_tainted_divisor (sm_context *sm_ctxt,
113 				  const supernode *node,
114 				  const gassign *assign) const;
115 
116 public:
117   /* State for a "tainted" value: unsanitized data potentially under an
118      attacker's control.  */
119   state_t m_tainted;
120 
121   /* State for a "tainted" value that has a lower bound.  */
122   state_t m_has_lb;
123 
124   /* State for a "tainted" value that has an upper bound.  */
125   state_t m_has_ub;
126 
127   /* Stop state, for a value we don't want to track any more.  */
128   state_t m_stop;
129 };
130 
131 /* Class for diagnostics relating to taint_state_machine.  */
132 
133 class taint_diagnostic : public pending_diagnostic
134 {
135 public:
taint_diagnostic(const taint_state_machine & sm,tree arg,enum bounds has_bounds)136   taint_diagnostic (const taint_state_machine &sm, tree arg,
137 		    enum bounds has_bounds)
138   : m_sm (sm), m_arg (arg), m_has_bounds (has_bounds)
139   {}
140 
subclass_equal_p(const pending_diagnostic & base_other) const141   bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
142   {
143     const taint_diagnostic &other = (const taint_diagnostic &)base_other;
144     return (same_tree_p (m_arg, other.m_arg)
145 	    && m_has_bounds == other.m_has_bounds);
146   }
147 
describe_state_change(const evdesc::state_change & change)148   label_text describe_state_change (const evdesc::state_change &change)
149     FINAL OVERRIDE
150   {
151     if (change.m_new_state == m_sm.m_tainted)
152       {
153 	if (change.m_origin)
154 	  return change.formatted_print ("%qE has an unchecked value here"
155 					 " (from %qE)",
156 					 change.m_expr, change.m_origin);
157 	else
158 	  return change.formatted_print ("%qE gets an unchecked value here",
159 					 change.m_expr);
160       }
161     else if (change.m_new_state == m_sm.m_has_lb)
162       return change.formatted_print ("%qE has its lower bound checked here",
163 				     change.m_expr);
164     else if (change.m_new_state == m_sm.m_has_ub)
165       return change.formatted_print ("%qE has its upper bound checked here",
166 				     change.m_expr);
167     return label_text ();
168   }
169 protected:
170   const taint_state_machine &m_sm;
171   tree m_arg;
172   enum bounds m_has_bounds;
173 };
174 
175 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
176    array index.  */
177 
178 class tainted_array_index : public taint_diagnostic
179 {
180 public:
tainted_array_index(const taint_state_machine & sm,tree arg,enum bounds has_bounds)181   tainted_array_index (const taint_state_machine &sm, tree arg,
182 		       enum bounds has_bounds)
183   : taint_diagnostic (sm, arg, has_bounds)
184   {}
185 
get_kind() const186   const char *get_kind () const FINAL OVERRIDE { return "tainted_array_index"; }
187 
get_controlling_option() const188   int get_controlling_option () const FINAL OVERRIDE
189   {
190     return OPT_Wanalyzer_tainted_array_index;
191   }
192 
emit(rich_location * rich_loc)193   bool emit (rich_location *rich_loc) FINAL OVERRIDE
194   {
195     diagnostic_metadata m;
196     /* CWE-129: "Improper Validation of Array Index".  */
197     m.add_cwe (129);
198     switch (m_has_bounds)
199       {
200       default:
201 	gcc_unreachable ();
202       case BOUNDS_NONE:
203 	return warning_meta (rich_loc, m, get_controlling_option (),
204 			     "use of attacker-controlled value %qE"
205 			     " in array lookup without bounds checking",
206 			     m_arg);
207 	break;
208       case BOUNDS_UPPER:
209 	return warning_meta (rich_loc, m, get_controlling_option (),
210 			     "use of attacker-controlled value %qE"
211 			     " in array lookup without checking for negative",
212 			     m_arg);
213 	break;
214       case BOUNDS_LOWER:
215 	return warning_meta (rich_loc, m, get_controlling_option (),
216 			     "use of attacker-controlled value %qE"
217 			     " in array lookup without upper-bounds checking",
218 			     m_arg);
219 	break;
220       }
221   }
222 
describe_final_event(const evdesc::final_event & ev)223   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
224   {
225     switch (m_has_bounds)
226       {
227       default:
228 	gcc_unreachable ();
229       case BOUNDS_NONE:
230 	return ev.formatted_print
231 	  ("use of attacker-controlled value %qE in array lookup"
232 	   " without bounds checking",
233 	   m_arg);
234       case BOUNDS_UPPER:
235 	return ev.formatted_print
236 	  ("use of attacker-controlled value %qE"
237 	   " in array lookup without checking for negative",
238 	   m_arg);
239       case BOUNDS_LOWER:
240 	return ev.formatted_print
241 	  ("use of attacker-controlled value %qE"
242 	   " in array lookup without upper-bounds checking",
243 	   m_arg);
244       }
245   }
246 };
247 
248 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
249    pointer offset.  */
250 
251 class tainted_offset : public taint_diagnostic
252 {
253 public:
tainted_offset(const taint_state_machine & sm,tree arg,enum bounds has_bounds)254   tainted_offset (const taint_state_machine &sm, tree arg,
255 		       enum bounds has_bounds)
256   : taint_diagnostic (sm, arg, has_bounds)
257   {}
258 
get_kind() const259   const char *get_kind () const FINAL OVERRIDE { return "tainted_offset"; }
260 
get_controlling_option() const261   int get_controlling_option () const FINAL OVERRIDE
262   {
263     return OPT_Wanalyzer_tainted_offset;
264   }
265 
emit(rich_location * rich_loc)266   bool emit (rich_location *rich_loc) FINAL OVERRIDE
267   {
268     diagnostic_metadata m;
269     /* CWE-823: "Use of Out-of-range Pointer Offset".  */
270     m.add_cwe (823);
271     if (m_arg)
272       switch (m_has_bounds)
273 	{
274 	default:
275 	  gcc_unreachable ();
276 	case BOUNDS_NONE:
277 	  return warning_meta (rich_loc, m, get_controlling_option (),
278 			       "use of attacker-controlled value %qE as offset"
279 			       " without bounds checking",
280 			       m_arg);
281 	  break;
282 	case BOUNDS_UPPER:
283 	  return warning_meta (rich_loc, m, get_controlling_option (),
284 			       "use of attacker-controlled value %qE as offset"
285 			       " without lower-bounds checking",
286 			       m_arg);
287 	  break;
288 	case BOUNDS_LOWER:
289 	  return warning_meta (rich_loc, m, get_controlling_option (),
290 			       "use of attacker-controlled value %qE as offset"
291 			       " without upper-bounds checking",
292 			       m_arg);
293 	  break;
294 	}
295     else
296       switch (m_has_bounds)
297 	{
298 	default:
299 	  gcc_unreachable ();
300 	case BOUNDS_NONE:
301 	  return warning_meta (rich_loc, m, get_controlling_option (),
302 			       "use of attacker-controlled value as offset"
303 			       " without bounds checking");
304 	  break;
305 	case BOUNDS_UPPER:
306 	  return warning_meta (rich_loc, m, get_controlling_option (),
307 			       "use of attacker-controlled value as offset"
308 			       " without lower-bounds checking");
309 	  break;
310 	case BOUNDS_LOWER:
311 	  return warning_meta (rich_loc, m, get_controlling_option (),
312 			       "use of attacker-controlled value as offset"
313 			       " without upper-bounds checking");
314 	  break;
315 	}
316   }
317 
describe_final_event(const evdesc::final_event & ev)318   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
319   {
320     if (m_arg)
321       switch (m_has_bounds)
322 	{
323 	default:
324 	  gcc_unreachable ();
325 	case BOUNDS_NONE:
326 	  return ev.formatted_print ("use of attacker-controlled value %qE"
327 				     " as offset without bounds checking",
328 				     m_arg);
329 	case BOUNDS_UPPER:
330 	  return ev.formatted_print ("use of attacker-controlled value %qE"
331 				     " as offset without lower-bounds checking",
332 				     m_arg);
333 	case BOUNDS_LOWER:
334 	  return ev.formatted_print ("use of attacker-controlled value %qE"
335 				     " as offset without upper-bounds checking",
336 				     m_arg);
337 	}
338     else
339       switch (m_has_bounds)
340 	{
341 	default:
342 	  gcc_unreachable ();
343 	case BOUNDS_NONE:
344 	  return ev.formatted_print ("use of attacker-controlled value"
345 				     " as offset without bounds checking");
346 	case BOUNDS_UPPER:
347 	  return ev.formatted_print ("use of attacker-controlled value"
348 				     " as offset without lower-bounds"
349 				     " checking");
350 	case BOUNDS_LOWER:
351 	  return ev.formatted_print ("use of attacker-controlled value"
352 				     " as offset without upper-bounds"
353 				     " checking");
354 	}
355   }
356 };
357 
358 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
359    size.  */
360 
361 class tainted_size : public taint_diagnostic
362 {
363 public:
tainted_size(const taint_state_machine & sm,tree arg,enum bounds has_bounds)364   tainted_size (const taint_state_machine &sm, tree arg,
365 		enum bounds has_bounds)
366   : taint_diagnostic (sm, arg, has_bounds)
367   {}
368 
get_kind() const369   const char *get_kind () const OVERRIDE { return "tainted_size"; }
370 
get_controlling_option() const371   int get_controlling_option () const FINAL OVERRIDE
372   {
373     return OPT_Wanalyzer_tainted_size;
374   }
375 
emit(rich_location * rich_loc)376   bool emit (rich_location *rich_loc) OVERRIDE
377   {
378     diagnostic_metadata m;
379     m.add_cwe (129);
380     switch (m_has_bounds)
381       {
382       default:
383 	gcc_unreachable ();
384       case BOUNDS_NONE:
385 	return warning_meta (rich_loc, m, get_controlling_option (),
386 			     "use of attacker-controlled value %qE as size"
387 			     " without bounds checking",
388 			     m_arg);
389 	break;
390       case BOUNDS_UPPER:
391 	return warning_meta (rich_loc, m, get_controlling_option (),
392 			     "use of attacker-controlled value %qE as size"
393 			     " without lower-bounds checking",
394 			     m_arg);
395 	break;
396       case BOUNDS_LOWER:
397 	return warning_meta (rich_loc, m, get_controlling_option (),
398 			     "use of attacker-controlled value %qE as size"
399 			     " without upper-bounds checking",
400 			     m_arg);
401 	break;
402       }
403   }
404 
describe_final_event(const evdesc::final_event & ev)405   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
406   {
407     switch (m_has_bounds)
408       {
409       default:
410 	gcc_unreachable ();
411       case BOUNDS_NONE:
412 	return ev.formatted_print ("use of attacker-controlled value %qE"
413 				   " as size without bounds checking",
414 				   m_arg);
415       case BOUNDS_UPPER:
416 	return ev.formatted_print ("use of attacker-controlled value %qE"
417 				   " as size without lower-bounds checking",
418 				   m_arg);
419       case BOUNDS_LOWER:
420 	return ev.formatted_print ("use of attacker-controlled value %qE"
421 				   " as size without upper-bounds checking",
422 				   m_arg);
423       }
424   }
425 };
426 
427 /* Subclass of tainted_size for reporting on tainted size values
428    passed to an external function annotated with attribute "access".  */
429 
430 class tainted_access_attrib_size : public tainted_size
431 {
432 public:
tainted_access_attrib_size(const taint_state_machine & sm,tree arg,enum bounds has_bounds,tree callee_fndecl,unsigned size_argno,const char * access_str)433   tainted_access_attrib_size (const taint_state_machine &sm, tree arg,
434 			      enum bounds has_bounds, tree callee_fndecl,
435 			      unsigned size_argno, const char *access_str)
436   : tainted_size (sm, arg, has_bounds),
437     m_callee_fndecl (callee_fndecl),
438     m_size_argno (size_argno), m_access_str (access_str)
439   {
440   }
441 
get_kind() const442   const char *get_kind () const OVERRIDE
443   {
444     return "tainted_access_attrib_size";
445   }
446 
emit(rich_location * rich_loc)447   bool emit (rich_location *rich_loc) FINAL OVERRIDE
448   {
449     bool warned = tainted_size::emit (rich_loc);
450     if (warned)
451       {
452 	inform (DECL_SOURCE_LOCATION (m_callee_fndecl),
453 		"parameter %i of %qD marked as a size via attribute %qs",
454 		m_size_argno + 1, m_callee_fndecl, m_access_str);
455       }
456     return warned;
457   }
458 
459 private:
460   tree m_callee_fndecl;
461   unsigned m_size_argno;
462   const char *m_access_str;
463 };
464 
465 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
466    divisor (so that an attacker can trigger a divide by zero).  */
467 
468 class tainted_divisor : public taint_diagnostic
469 {
470 public:
tainted_divisor(const taint_state_machine & sm,tree arg,enum bounds has_bounds)471   tainted_divisor (const taint_state_machine &sm, tree arg,
472 		   enum bounds has_bounds)
473   : taint_diagnostic (sm, arg, has_bounds)
474   {}
475 
get_kind() const476   const char *get_kind () const FINAL OVERRIDE { return "tainted_divisor"; }
477 
get_controlling_option() const478   int get_controlling_option () const FINAL OVERRIDE
479   {
480     return OPT_Wanalyzer_tainted_divisor;
481   }
482 
emit(rich_location * rich_loc)483   bool emit (rich_location *rich_loc) FINAL OVERRIDE
484   {
485     diagnostic_metadata m;
486     /* CWE-369: "Divide By Zero".  */
487     m.add_cwe (369);
488     if (m_arg)
489       return warning_meta (rich_loc, m, get_controlling_option (),
490 			   "use of attacker-controlled value %qE as divisor"
491 			   " without checking for zero",
492 			   m_arg);
493     else
494       return warning_meta (rich_loc, m, get_controlling_option (),
495 			   "use of attacker-controlled value as divisor"
496 			   " without checking for zero");
497   }
498 
describe_final_event(const evdesc::final_event & ev)499   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
500   {
501     if (m_arg)
502       return ev.formatted_print
503 	("use of attacker-controlled value %qE as divisor"
504 	 " without checking for zero",
505 	 m_arg);
506     else
507       return ev.formatted_print
508 	("use of attacker-controlled value as divisor"
509 	 " without checking for zero");
510   }
511 };
512 
513 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
514    size of a dynamic allocation.  */
515 
516 class tainted_allocation_size : public taint_diagnostic
517 {
518 public:
tainted_allocation_size(const taint_state_machine & sm,tree arg,enum bounds has_bounds,enum memory_space mem_space)519   tainted_allocation_size (const taint_state_machine &sm, tree arg,
520 			   enum bounds has_bounds, enum memory_space mem_space)
521   : taint_diagnostic (sm, arg, has_bounds),
522     m_mem_space (mem_space)
523   {
524   }
525 
get_kind() const526   const char *get_kind () const FINAL OVERRIDE
527   {
528     return "tainted_allocation_size";
529   }
530 
subclass_equal_p(const pending_diagnostic & base_other) const531   bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
532   {
533     if (!taint_diagnostic::subclass_equal_p (base_other))
534       return false;
535     const tainted_allocation_size &other
536       = (const tainted_allocation_size &)base_other;
537     return m_mem_space == other.m_mem_space;
538   }
539 
get_controlling_option() const540   int get_controlling_option () const FINAL OVERRIDE
541   {
542     return OPT_Wanalyzer_tainted_allocation_size;
543   }
544 
emit(rich_location * rich_loc)545   bool emit (rich_location *rich_loc) FINAL OVERRIDE
546   {
547     diagnostic_metadata m;
548     /* "CWE-789: Memory Allocation with Excessive Size Value".  */
549     m.add_cwe (789);
550 
551     bool warned;
552     if (m_arg)
553       switch (m_has_bounds)
554 	{
555 	default:
556 	  gcc_unreachable ();
557 	case BOUNDS_NONE:
558 	  warned = warning_meta (rich_loc, m, get_controlling_option (),
559 				 "use of attacker-controlled value %qE as"
560 				 " allocation size without bounds checking",
561 				 m_arg);
562 	  break;
563 	case BOUNDS_UPPER:
564 	  warned = warning_meta (rich_loc, m, get_controlling_option (),
565 				 "use of attacker-controlled value %qE as"
566 				 " allocation size without"
567 				 " lower-bounds checking",
568 				 m_arg);
569 	  break;
570 	case BOUNDS_LOWER:
571 	  warned = warning_meta (rich_loc, m, get_controlling_option (),
572 				 "use of attacker-controlled value %qE as"
573 				 " allocation size without"
574 				 " upper-bounds checking",
575 				 m_arg);
576 	  break;
577 	}
578     else
579       switch (m_has_bounds)
580 	{
581 	default:
582 	  gcc_unreachable ();
583 	case BOUNDS_NONE:
584 	  warned = warning_meta (rich_loc, m, get_controlling_option (),
585 				 "use of attacker-controlled value as"
586 				 " allocation size without bounds"
587 				 " checking");
588 	  break;
589 	case BOUNDS_UPPER:
590 	  warned = warning_meta (rich_loc, m, get_controlling_option (),
591 				 "use of attacker-controlled value as"
592 				 " allocation size without"
593 				 " lower-bounds checking");
594 	  break;
595 	case BOUNDS_LOWER:
596 	  warned = warning_meta (rich_loc, m, get_controlling_option (),
597 				 "use of attacker-controlled value as"
598 				 " allocation size without"
599 				 " upper-bounds checking");
600 	  break;
601 	}
602     if (warned)
603       {
604 	location_t loc = rich_loc->get_loc ();
605 	switch (m_mem_space)
606 	  {
607 	  default:
608 	    break;
609 	  case MEMSPACE_STACK:
610 	    inform (loc, "stack-based allocation");
611 	    break;
612 	  case MEMSPACE_HEAP:
613 	    inform (loc, "heap-based allocation");
614 	    break;
615 	  }
616       }
617     return warned;
618   }
619 
describe_final_event(const evdesc::final_event & ev)620   label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
621   {
622     if (m_arg)
623       switch (m_has_bounds)
624 	{
625 	default:
626 	  gcc_unreachable ();
627 	case BOUNDS_NONE:
628 	  return ev.formatted_print
629 	    ("use of attacker-controlled value %qE as allocation size"
630 	     " without bounds checking",
631 	     m_arg);
632 	case BOUNDS_UPPER:
633 	  return ev.formatted_print
634 	    ("use of attacker-controlled value %qE as allocation size"
635 	     " without lower-bounds checking",
636 	     m_arg);
637 	case BOUNDS_LOWER:
638 	  return ev.formatted_print
639 	    ("use of attacker-controlled value %qE as allocation size"
640 	     " without upper-bounds checking",
641 	     m_arg);
642 	}
643     else
644       switch (m_has_bounds)
645 	{
646 	default:
647 	  gcc_unreachable ();
648 	case BOUNDS_NONE:
649 	  return ev.formatted_print
650 	    ("use of attacker-controlled value as allocation size"
651 	     " without bounds checking");
652 	case BOUNDS_UPPER:
653 	  return ev.formatted_print
654 	    ("use of attacker-controlled value as allocation size"
655 	     " without lower-bounds checking");
656 	case BOUNDS_LOWER:
657 	  return ev.formatted_print
658 	    ("use of attacker-controlled value as allocation size"
659 	     " without upper-bounds checking");
660 	}
661   }
662 
663 private:
664   enum memory_space m_mem_space;
665 };
666 
667 /* taint_state_machine's ctor.  */
668 
taint_state_machine(logger * logger)669 taint_state_machine::taint_state_machine (logger *logger)
670 : state_machine ("taint", logger)
671 {
672   m_tainted = add_state ("tainted");
673   m_has_lb = add_state ("has_lb");
674   m_has_ub = add_state ("has_ub");
675   m_stop = add_state ("stop");
676 }
677 
678 state_machine::state_t
alt_get_inherited_state(const sm_state_map & map,const svalue * sval,const extrinsic_state & ext_state) const679 taint_state_machine::alt_get_inherited_state (const sm_state_map &map,
680 					      const svalue *sval,
681 					      const extrinsic_state &ext_state)
682   const
683 {
684   switch (sval->get_kind ())
685     {
686     default:
687       break;
688     case SK_UNARYOP:
689       {
690 	const unaryop_svalue *unaryop_sval
691 	  = as_a <const unaryop_svalue *> (sval);
692 	enum tree_code op = unaryop_sval->get_op ();
693 	const svalue *arg = unaryop_sval->get_arg ();
694 	switch (op)
695 	  {
696 	  case NOP_EXPR:
697 	    {
698 	      state_t arg_state = map.get_state (arg, ext_state);
699 	      return arg_state;
700 	    }
701 	  default:
702 	    break;
703 	  }
704       }
705       break;
706     case SK_BINOP:
707       {
708 	const binop_svalue *binop_sval = as_a <const binop_svalue *> (sval);
709 	enum tree_code op = binop_sval->get_op ();
710 	const svalue *arg0 = binop_sval->get_arg0 ();
711 	const svalue *arg1 = binop_sval->get_arg1 ();
712 	switch (op)
713 	  {
714 	  default:
715 	    break;
716 	  case PLUS_EXPR:
717 	  case MINUS_EXPR:
718 	  case MULT_EXPR:
719 	  case POINTER_PLUS_EXPR:
720 	  case TRUNC_DIV_EXPR:
721 	  case TRUNC_MOD_EXPR:
722 	    {
723 	      state_t arg0_state = map.get_state (arg0, ext_state);
724 	      state_t arg1_state = map.get_state (arg1, ext_state);
725 	      return combine_states (arg0_state, arg1_state);
726 	    }
727 	    break;
728 
729 	  case EQ_EXPR:
730 	  case GE_EXPR:
731 	  case LE_EXPR:
732 	  case NE_EXPR:
733 	  case GT_EXPR:
734 	  case LT_EXPR:
735 	  case UNORDERED_EXPR:
736 	  case ORDERED_EXPR:
737 	    /* Comparisons are just booleans.  */
738 	    return m_start;
739 
740 	  case BIT_AND_EXPR:
741 	  case RSHIFT_EXPR:
742 	    return NULL;
743 	  }
744       }
745       break;
746     }
747   return NULL;
748 }
749 
750 /* Implementation of state_machine::on_stmt vfunc for taint_state_machine.  */
751 
752 bool
on_stmt(sm_context * sm_ctxt,const supernode * node,const gimple * stmt) const753 taint_state_machine::on_stmt (sm_context *sm_ctxt,
754 			       const supernode *node,
755 			       const gimple *stmt) const
756 {
757   if (const gcall *call = dyn_cast <const gcall *> (stmt))
758     if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
759       {
760 	if (is_named_call_p (callee_fndecl, "fread", call, 4))
761 	  {
762 	    tree arg = gimple_call_arg (call, 0);
763 
764 	    sm_ctxt->on_transition (node, stmt, arg, m_start, m_tainted);
765 
766 	    /* Dereference an ADDR_EXPR.  */
767 	    // TODO: should the engine do this?
768 	    if (TREE_CODE (arg) == ADDR_EXPR)
769 	      sm_ctxt->on_transition (node, stmt, TREE_OPERAND (arg, 0),
770 				      m_start, m_tainted);
771 	    return true;
772 	  }
773 
774 	/* External function with "access" attribute. */
775 	if (sm_ctxt->unknown_side_effects_p ())
776 	  check_for_tainted_size_arg (sm_ctxt, node, call, callee_fndecl);
777       }
778   // TODO: ...etc; many other sources of untrusted data
779 
780   if (const gassign *assign = dyn_cast <const gassign *> (stmt))
781     {
782       enum tree_code op = gimple_assign_rhs_code (assign);
783 
784       switch (op)
785 	{
786 	default:
787 	  break;
788 	case TRUNC_DIV_EXPR:
789 	case CEIL_DIV_EXPR:
790 	case FLOOR_DIV_EXPR:
791 	case ROUND_DIV_EXPR:
792 	case TRUNC_MOD_EXPR:
793 	case CEIL_MOD_EXPR:
794 	case FLOOR_MOD_EXPR:
795 	case ROUND_MOD_EXPR:
796 	case RDIV_EXPR:
797 	case EXACT_DIV_EXPR:
798 	  check_for_tainted_divisor (sm_ctxt, node, assign);
799 	  break;
800 	}
801     }
802 
803   return false;
804 }
805 
806 /* Implementation of state_machine::on_condition vfunc for taint_state_machine.
807    Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
808    and states 'has_ub' and 'has_lb' to 'stop'.  */
809 
810 void
on_condition(sm_context * sm_ctxt,const supernode * node,const gimple * stmt,const svalue * lhs,enum tree_code op,const svalue * rhs ATTRIBUTE_UNUSED) const811 taint_state_machine::on_condition (sm_context *sm_ctxt,
812 				   const supernode *node,
813 				   const gimple *stmt,
814 				   const svalue *lhs,
815 				   enum tree_code op,
816 				   const svalue *rhs ATTRIBUTE_UNUSED) const
817 {
818   if (stmt == NULL)
819     return;
820 
821   // TODO: this doesn't use the RHS; should we make it symmetric?
822 
823   // TODO
824   switch (op)
825     {
826       //case NE_EXPR:
827       //case EQ_EXPR:
828     case GE_EXPR:
829     case GT_EXPR:
830       {
831 	sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
832 				m_has_lb);
833 	sm_ctxt->on_transition (node, stmt, lhs, m_has_ub,
834 				m_stop);
835       }
836       break;
837     case LE_EXPR:
838     case LT_EXPR:
839       {
840 	sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
841 				m_has_ub);
842 	sm_ctxt->on_transition (node, stmt, lhs, m_has_lb,
843 				m_stop);
844       }
845       break;
846     default:
847       break;
848     }
849 }
850 
851 bool
can_purge_p(state_t s ATTRIBUTE_UNUSED) const852 taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
853 {
854   return true;
855 }
856 
857 /* If STATE is a tainted state, write the bounds to *OUT and return true.
858    Otherwise return false.
859    Use the signedness of TYPE to determine if "has_ub" is tainted.  */
860 
861 bool
get_taint(state_t state,tree type,enum bounds * out) const862 taint_state_machine::get_taint (state_t state, tree type,
863 				enum bounds *out) const
864 {
865   /* Unsigned types have an implicit lower bound.  */
866   bool is_unsigned = false;
867   if (type)
868     if (INTEGRAL_TYPE_P (type))
869       is_unsigned = TYPE_UNSIGNED (type);
870 
871   /* Can't use a switch as the states are non-const.  */
872   if (state == m_tainted)
873     {
874       *out = is_unsigned ? BOUNDS_LOWER : BOUNDS_NONE;
875       return true;
876     }
877   else if (state == m_has_lb)
878     {
879       *out = BOUNDS_LOWER;
880       return true;
881     }
882   else if (state == m_has_ub && !is_unsigned)
883     {
884       /* Missing lower bound.  */
885       *out = BOUNDS_UPPER;
886       return true;
887     }
888   return false;
889 }
890 
891 /* Find the most tainted state of S0 and S1.  */
892 
893 state_machine::state_t
combine_states(state_t s0,state_t s1) const894 taint_state_machine::combine_states (state_t s0, state_t s1) const
895 {
896   gcc_assert (s0);
897   gcc_assert (s1);
898   if (s0 == s1)
899     return s0;
900   if (s0 == m_tainted || s1 == m_tainted)
901     return m_tainted;
902   if (s0 == m_start)
903     return s1;
904   if (s1 == m_start)
905     return s0;
906   if (s0 == m_stop)
907     return s1;
908   if (s1 == m_stop)
909     return s0;
910   /* The only remaining combinations are one of has_ub and has_lb
911      (in either order).  */
912   gcc_assert ((s0 == m_has_lb && s1 == m_has_ub)
913 	      || (s0 == m_has_ub && s1 == m_has_lb));
914   return m_tainted;
915 }
916 
917 /* Check for calls to external functions marked with
918    __attribute__((access)) with a size-index: complain about
919    tainted values passed as a size to such a function.  */
920 
921 void
check_for_tainted_size_arg(sm_context * sm_ctxt,const supernode * node,const gcall * call,tree callee_fndecl) const922 taint_state_machine::check_for_tainted_size_arg (sm_context *sm_ctxt,
923 						 const supernode *node,
924 						 const gcall *call,
925 						 tree callee_fndecl) const
926 {
927   tree fntype = TREE_TYPE (callee_fndecl);
928   if (!fntype)
929     return;
930 
931   if (!TYPE_ATTRIBUTES (fntype))
932     return;
933 
934   /* Initialize a map of attribute access specifications for arguments
935      to the function call.  */
936   rdwr_map rdwr_idx;
937   init_attr_rdwr_indices (&rdwr_idx, TYPE_ATTRIBUTES (fntype));
938 
939   unsigned argno = 0;
940 
941   for (tree iter = TYPE_ARG_TYPES (fntype); iter;
942        iter = TREE_CHAIN (iter), ++argno)
943     {
944       const attr_access* access = rdwr_idx.get (argno);
945       if (!access)
946 	continue;
947 
948       /* Ignore any duplicate entry in the map for the size argument.  */
949       if (access->ptrarg != argno)
950 	continue;
951 
952       if (access->sizarg == UINT_MAX)
953 	continue;
954 
955       tree size_arg = gimple_call_arg (call, access->sizarg);
956 
957       state_t state = sm_ctxt->get_state (call, size_arg);
958       enum bounds b;
959       if (get_taint (state, TREE_TYPE (size_arg), &b))
960 	{
961 	  const char* const access_str =
962 	    TREE_STRING_POINTER (access->to_external_string ());
963 	  tree diag_size = sm_ctxt->get_diagnostic_tree (size_arg);
964 	  sm_ctxt->warn (node, call, size_arg,
965 			 new tainted_access_attrib_size (*this, diag_size, b,
966 							 callee_fndecl,
967 							 access->sizarg,
968 							 access_str));
969 	}
970     }
971 }
972 
973 /* Complain if ASSIGN (a division operation) has a tainted divisor
974    that could be zero.  */
975 
976 void
check_for_tainted_divisor(sm_context * sm_ctxt,const supernode * node,const gassign * assign) const977 taint_state_machine::check_for_tainted_divisor (sm_context *sm_ctxt,
978 						const supernode *node,
979 						const gassign *assign) const
980 {
981   const region_model *old_model = sm_ctxt->get_old_region_model ();
982   if (!old_model)
983     return;
984 
985   tree divisor_expr = gimple_assign_rhs2 (assign);;
986   const svalue *divisor_sval = old_model->get_rvalue (divisor_expr, NULL);
987 
988   state_t state = sm_ctxt->get_state (assign, divisor_sval);
989   enum bounds b;
990   if (get_taint (state, TREE_TYPE (divisor_expr), &b))
991     {
992       const svalue *zero_sval
993 	= old_model->get_manager ()->get_or_create_int_cst
994 	    (TREE_TYPE (divisor_expr), 0);
995       tristate ts
996 	= old_model->eval_condition (divisor_sval, NE_EXPR, zero_sval);
997       if (ts.is_true ())
998 	/* The divisor is known to not equal 0: don't warn.  */
999 	return;
1000 
1001       tree diag_divisor = sm_ctxt->get_diagnostic_tree (divisor_expr);
1002       sm_ctxt->warn (node, assign, divisor_expr,
1003 		     new tainted_divisor (*this, diag_divisor, b));
1004       sm_ctxt->set_next_state (assign, divisor_sval, m_stop);
1005     }
1006 }
1007 
1008 } // anonymous namespace
1009 
1010 /* Internal interface to this file. */
1011 
1012 state_machine *
make_taint_state_machine(logger * logger)1013 make_taint_state_machine (logger *logger)
1014 {
1015   return new taint_state_machine (logger);
1016 }
1017 
1018 /* Complain to CTXT if accessing REG leads could lead to arbitrary
1019    memory access under an attacker's control (due to taint).  */
1020 
1021 void
check_region_for_taint(const region * reg,enum access_direction,region_model_context * ctxt) const1022 region_model::check_region_for_taint (const region *reg,
1023 				      enum access_direction,
1024 				      region_model_context *ctxt) const
1025 {
1026   gcc_assert (reg);
1027   gcc_assert (ctxt);
1028 
1029   LOG_SCOPE (ctxt->get_logger ());
1030 
1031   sm_state_map *smap;
1032   const state_machine *sm;
1033   unsigned sm_idx;
1034   if (!ctxt->get_taint_map (&smap, &sm, &sm_idx))
1035     return;
1036 
1037   gcc_assert (smap);
1038   gcc_assert (sm);
1039 
1040   const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
1041 
1042   const extrinsic_state *ext_state = ctxt->get_ext_state ();
1043   if (!ext_state)
1044     return;
1045 
1046   const region *iter_region = reg;
1047   while (iter_region)
1048     {
1049       switch (iter_region->get_kind ())
1050 	{
1051 	default:
1052 	  break;
1053 
1054 	case RK_ELEMENT:
1055 	  {
1056 	    const element_region *element_reg
1057 	      = (const element_region *)iter_region;
1058 	    const svalue *index = element_reg->get_index ();
1059 	    const state_machine::state_t
1060 	      state = smap->get_state (index, *ext_state);
1061 	    gcc_assert (state);
1062 	    enum bounds b;
1063 	    if (taint_sm.get_taint (state, index->get_type (), &b))
1064 	    {
1065 	      tree arg = get_representative_tree (index);
1066 	      ctxt->warn (new tainted_array_index (taint_sm, arg, b));
1067 	    }
1068 	  }
1069 	  break;
1070 
1071 	case RK_OFFSET:
1072 	  {
1073 	    const offset_region *offset_reg
1074 	      = (const offset_region *)iter_region;
1075 	    const svalue *offset = offset_reg->get_byte_offset ();
1076 	    const state_machine::state_t
1077 	      state = smap->get_state (offset, *ext_state);
1078 	    gcc_assert (state);
1079 	    /* Handle implicit cast to sizetype.  */
1080 	    tree effective_type = offset->get_type ();
1081 	    if (const svalue *cast = offset->maybe_undo_cast ())
1082 	      if (cast->get_type ())
1083 		effective_type = cast->get_type ();
1084 	    enum bounds b;
1085 	    if (taint_sm.get_taint (state, effective_type, &b))
1086 	      {
1087 		tree arg = get_representative_tree (offset);
1088 		ctxt->warn (new tainted_offset (taint_sm, arg, b));
1089 	      }
1090 	  }
1091 	  break;
1092 
1093 	case RK_CAST:
1094 	  {
1095 	    const cast_region *cast_reg
1096 	      = as_a <const cast_region *> (iter_region);
1097 	    iter_region = cast_reg->get_original_region ();
1098 	    continue;
1099 	  }
1100 
1101 	case RK_SIZED:
1102 	  {
1103 	    const sized_region *sized_reg
1104 	      = (const sized_region *)iter_region;
1105 	    const svalue *size_sval = sized_reg->get_byte_size_sval (m_mgr);
1106 	    const state_machine::state_t
1107 	      state = smap->get_state (size_sval, *ext_state);
1108 	    gcc_assert (state);
1109 	    enum bounds b;
1110 	    if (taint_sm.get_taint (state, size_sval->get_type (), &b))
1111 	      {
1112 		tree arg = get_representative_tree (size_sval);
1113 		ctxt->warn (new tainted_size (taint_sm, arg, b));
1114 	      }
1115 	  }
1116 	  break;
1117 	}
1118 
1119       iter_region = iter_region->get_parent_region ();
1120     }
1121 }
1122 
1123 /* Complain to CTXT about a tainted allocation size if SIZE_IN_BYTES is
1124    under an attacker's control (due to taint), where the allocation
1125    is happening within MEM_SPACE.  */
1126 
1127 void
check_dynamic_size_for_taint(enum memory_space mem_space,const svalue * size_in_bytes,region_model_context * ctxt) const1128 region_model::check_dynamic_size_for_taint (enum memory_space mem_space,
1129 					    const svalue *size_in_bytes,
1130 					    region_model_context *ctxt) const
1131 {
1132   gcc_assert (size_in_bytes);
1133   gcc_assert (ctxt);
1134 
1135   LOG_SCOPE (ctxt->get_logger ());
1136 
1137   sm_state_map *smap;
1138   const state_machine *sm;
1139   unsigned sm_idx;
1140   if (!ctxt->get_taint_map (&smap, &sm, &sm_idx))
1141     return;
1142 
1143   gcc_assert (smap);
1144   gcc_assert (sm);
1145 
1146   const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
1147 
1148   const extrinsic_state *ext_state = ctxt->get_ext_state ();
1149   if (!ext_state)
1150     return;
1151 
1152   const state_machine::state_t
1153     state = smap->get_state (size_in_bytes, *ext_state);
1154   gcc_assert (state);
1155   enum bounds b;
1156   if (taint_sm.get_taint (state, size_in_bytes->get_type (), &b))
1157     {
1158       tree arg = get_representative_tree (size_in_bytes);
1159       ctxt->warn (new tainted_allocation_size (taint_sm, arg, b, mem_space));
1160     }
1161 }
1162 
1163 } // namespace ana
1164 
1165 #endif /* #if ENABLE_ANALYZER */
1166