xref: /netbsd-src/external/gpl3/gcc/dist/gcc/analyzer/region-model-asm.cc (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* Handling inline asm in the analyzer.
2    Copyright (C) 2021-2022 Free Software Foundation, Inc.
3    Contributed by David Malcolm <dmalcolm@redhat.com>.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "function.h"
26 #include "basic-block.h"
27 #include "gimple.h"
28 #include "gimple-iterator.h"
29 #include "diagnostic-core.h"
30 #include "pretty-print.h"
31 #include "tristate.h"
32 #include "selftest.h"
33 #include "json.h"
34 #include "analyzer/analyzer.h"
35 #include "analyzer/analyzer-logging.h"
36 #include "options.h"
37 #include "analyzer/call-string.h"
38 #include "analyzer/program-point.h"
39 #include "analyzer/store.h"
40 #include "analyzer/region-model.h"
41 #include "analyzer/region-model-reachability.h"
42 #include "stmt.h"
43 
44 #if ENABLE_ANALYZER
45 
46 namespace ana {
47 
48 /* Minimal asm support for the analyzer.
49 
50    The objective of this code is to:
51    - minimize false positives from the analyzer on the Linux kernel
52    (which makes heavy use of inline asm), whilst
53    - avoiding having to "teach" the compiler anything about specific strings
54    in asm statements.
55 
56    Specifically, we want to:
57 
58    (a) mark asm outputs and certain other regions as having been written to,
59        to avoid false postives from -Wanalyzer-use-of-uninitialized-value.
60 
61    (b) identify some of these stmts as "deterministic" so that we can
62        write consistent outputs given consistent inputs, so that we can
63        avoid false positives for paths in which an asm is invoked twice
64        with the same inputs and is expected to emit the same output.
65 
66    This file implements heuristics for achieving the above.  */
67 
68 /* Determine if ASM_STMT is deterministic, in the sense of (b) above.
69 
70    Consider this x86 function taken from the Linux kernel
71    (arch/x86/include/asm/barrier.h):
72 
73      static inline unsigned long array_index_mask_nospec(unsigned long index,
74 							 unsigned long size)
75      {
76        unsigned long mask;
77 
78        asm volatile ("cmp %1,%2; sbb %0,%0;"
79 		     :"=r" (mask)
80 		     :"g"(size),"r" (index)
81 		     :"cc");
82        return mask;
83      }
84 
85    The above is a mitigation for Spectre-variant-1 attacks, for clamping
86    an array access to within the range of [0, size] if the CPU speculates
87    past the array bounds.
88 
89    However, it is ultimately used to implement wdev_to_wvif:
90 
91      static inline struct wfx_vif *
92      wdev_to_wvif(struct wfx_dev *wdev, int vif_id)
93      {
94        vif_id = array_index_nospec(vif_id, ARRAY_SIZE(wdev->vif));
95        if (!wdev->vif[vif_id]) {
96 	 return NULL;
97        }
98        return (struct wfx_vif *)wdev->vif[vif_id]->drv_priv;
99      }
100 
101    which is used by:
102 
103      if (wdev_to_wvif(wvif->wdev, 1))
104        return wdev_to_wvif(wvif->wdev, 1)->vif;
105 
106    The code has been written to assume that wdev_to_wvif is deterministic,
107    and won't change from returning non-NULL at the "if" clause to
108    returning NULL at the "->vif" dereference.
109 
110    By treating the above specific "asm volatile" as deterministic we avoid
111    a false positive from -Wanalyzer-null-dereference.  */
112 
113 static bool
deterministic_p(const gasm * asm_stmt)114 deterministic_p (const gasm *asm_stmt)
115 {
116   /* Assume something volatile with no inputs is querying
117      changeable state e.g. rdtsc.  */
118   if (gimple_asm_ninputs (asm_stmt) == 0
119       && gimple_asm_volatile_p (asm_stmt))
120     return false;
121 
122   /* Otherwise assume it's purely a function of its inputs.  */
123   return true;
124 }
125 
126 /* Update this model for the asm STMT, using CTXT to report any
127    diagnostics.
128 
129    Compare with cfgexpand.cc: expand_asm_stmt.  */
130 
131 void
on_asm_stmt(const gasm * stmt,region_model_context * ctxt)132 region_model::on_asm_stmt (const gasm *stmt, region_model_context *ctxt)
133 {
134   logger *logger = ctxt ? ctxt->get_logger () : NULL;
135   LOG_SCOPE (logger);
136 
137   const unsigned noutputs = gimple_asm_noutputs (stmt);
138   const unsigned ninputs = gimple_asm_ninputs (stmt);
139 
140   auto_vec<tree> output_tvec;
141   auto_vec<tree> input_tvec;
142   auto_vec<const char *> constraints;
143 
144   /* Copy the gimple vectors into new vectors that we can manipulate.  */
145   output_tvec.safe_grow (noutputs, true);
146   input_tvec.safe_grow (ninputs, true);
147   constraints.safe_grow (noutputs + ninputs, true);
148 
149   for (unsigned i = 0; i < noutputs; ++i)
150     {
151       tree t = gimple_asm_output_op (stmt, i);
152       output_tvec[i] = TREE_VALUE (t);
153       constraints[i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
154     }
155   for (unsigned i = 0; i < ninputs; i++)
156     {
157       tree t = gimple_asm_input_op (stmt, i);
158       input_tvec[i] = TREE_VALUE (t);
159       constraints[i + noutputs]
160 	= TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
161     }
162 
163   /* Determine which regions are reachable from the inputs
164      to this stmt.  */
165   reachable_regions reachable_regs (this);
166 
167   int num_errors = 0;
168 
169   auto_vec<const region *> output_regions (noutputs);
170   for (unsigned i = 0; i < noutputs; ++i)
171     {
172       tree val = output_tvec[i];
173       const char *constraint;
174       bool is_inout;
175       bool allows_reg;
176       bool allows_mem;
177 
178       const region *dst_reg = get_lvalue (val, ctxt);
179       output_regions.quick_push (dst_reg);
180       reachable_regs.add (dst_reg, true);
181 
182       /* Try to parse the output constraint.  If that fails, there's
183 	 no point in going further.  */
184       constraint = constraints[i];
185       if (!parse_output_constraint (&constraint, i, ninputs, noutputs,
186 				    &allows_mem, &allows_reg, &is_inout))
187 	{
188 	  if (logger)
189 	    logger->log ("error parsing constraint for output %i: %qs",
190 			 i, constraint);
191 	  num_errors++;
192 	  continue;
193 	}
194 
195       if (logger)
196 	{
197 	  logger->log ("output %i: %qs %qE"
198 		       " is_inout: %i allows_reg: %i allows_mem: %i",
199 		       i, constraint, val,
200 		       (int)is_inout, (int)allows_reg, (int)allows_mem);
201 	  logger->start_log_line ();
202 	  logger->log_partial ("  region: ");
203 	  dst_reg->dump_to_pp (logger->get_printer (), true);
204 	  logger->end_log_line ();
205 	}
206 
207     }
208 
209   /* Ideally should combine with inout_svals to determine the
210      "effective inputs" and use this for the asm_output_svalue.  */
211 
212   auto_vec<const svalue *> input_svals (ninputs);
213   for (unsigned i = 0; i < ninputs; i++)
214     {
215       tree val = input_tvec[i];
216       const char *constraint = constraints[i + noutputs];
217       bool allows_reg, allows_mem;
218       if (! parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
219 				    constraints.address (),
220 				    &allows_mem, &allows_reg))
221 	{
222 	  if (logger)
223 	    logger->log ("error parsing constraint for input %i: %qs",
224 			 i, constraint);
225 	  num_errors++;
226 	  continue;
227 	}
228 
229       tree src_expr = input_tvec[i];
230       const svalue *src_sval = get_rvalue (src_expr, ctxt);
231       check_for_poison (src_sval, src_expr, ctxt);
232       input_svals.quick_push (src_sval);
233       reachable_regs.handle_sval (src_sval);
234 
235       if (logger)
236 	{
237 	  logger->log ("input %i: %qs %qE"
238 		       " allows_reg: %i allows_mem: %i",
239 		       i, constraint, val,
240 		       (int)allows_reg, (int)allows_mem);
241 	  logger->start_log_line ();
242 	  logger->log_partial ("  sval: ");
243 	  src_sval->dump_to_pp (logger->get_printer (), true);
244 	  logger->end_log_line ();
245 	}
246     }
247 
248   if (num_errors > 0)
249     gcc_unreachable ();
250 
251   if (logger)
252     {
253       logger->log ("reachability: ");
254       reachable_regs.dump_to_pp (logger->get_printer ());
255       logger->end_log_line ();
256     }
257 
258   /* Given the regions that were reachable from the inputs we
259      want to clobber them.
260      This is similar to region_model::handle_unrecognized_call,
261      but the unknown call policies seems too aggressive (e.g. purging state
262      from anything that's ever escaped).  Instead, clobber any clusters
263      that were reachable in *this* asm stmt, rather than those that
264      escaped, and we don't treat the values as having escaped.
265      We also assume that asm stmts don't affect sm-state.  */
266   for (auto iter = reachable_regs.begin_mutable_base_regs ();
267        iter != reachable_regs.end_mutable_base_regs (); ++iter)
268     {
269       const region *base_reg = *iter;
270       if (base_reg->symbolic_for_unknown_ptr_p ()
271 	  || !base_reg->tracked_p ())
272 	continue;
273 
274       binding_cluster *cluster = m_store.get_or_create_cluster (base_reg);
275       cluster->on_asm (stmt, m_mgr->get_store_manager (),
276 		       conjured_purge (this, ctxt));
277     }
278 
279   /* Update the outputs.  */
280   for (unsigned output_idx = 0; output_idx < noutputs; output_idx++)
281     {
282       tree dst_expr = output_tvec[output_idx];
283       const region *dst_reg = output_regions[output_idx];
284 
285       const svalue *sval;
286       if (deterministic_p (stmt)
287 	  && input_svals.length () <= asm_output_svalue::MAX_INPUTS)
288 	sval = m_mgr->get_or_create_asm_output_svalue (TREE_TYPE (dst_expr),
289 						       stmt,
290 						       output_idx,
291 						       input_svals);
292       else
293 	{
294 	  sval = m_mgr->get_or_create_conjured_svalue (TREE_TYPE (dst_expr),
295 						       stmt,
296 						       dst_reg,
297 						       conjured_purge (this,
298 								       ctxt));
299 	}
300       set_value (dst_reg, sval, ctxt);
301     }
302 }
303 
304 } // namespace ana
305 
306 #endif /* #if ENABLE_ANALYZER */
307