1 /* Handling inline asm in the analyzer.
2 Copyright (C) 2021-2022 Free Software Foundation, Inc.
3 Contributed by David Malcolm <dmalcolm@redhat.com>.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "function.h"
26 #include "basic-block.h"
27 #include "gimple.h"
28 #include "gimple-iterator.h"
29 #include "diagnostic-core.h"
30 #include "pretty-print.h"
31 #include "tristate.h"
32 #include "selftest.h"
33 #include "json.h"
34 #include "analyzer/analyzer.h"
35 #include "analyzer/analyzer-logging.h"
36 #include "options.h"
37 #include "analyzer/call-string.h"
38 #include "analyzer/program-point.h"
39 #include "analyzer/store.h"
40 #include "analyzer/region-model.h"
41 #include "analyzer/region-model-reachability.h"
42 #include "stmt.h"
43
44 #if ENABLE_ANALYZER
45
46 namespace ana {
47
48 /* Minimal asm support for the analyzer.
49
50 The objective of this code is to:
51 - minimize false positives from the analyzer on the Linux kernel
52 (which makes heavy use of inline asm), whilst
53 - avoiding having to "teach" the compiler anything about specific strings
54 in asm statements.
55
56 Specifically, we want to:
57
58 (a) mark asm outputs and certain other regions as having been written to,
59 to avoid false postives from -Wanalyzer-use-of-uninitialized-value.
60
61 (b) identify some of these stmts as "deterministic" so that we can
62 write consistent outputs given consistent inputs, so that we can
63 avoid false positives for paths in which an asm is invoked twice
64 with the same inputs and is expected to emit the same output.
65
66 This file implements heuristics for achieving the above. */
67
68 /* Determine if ASM_STMT is deterministic, in the sense of (b) above.
69
70 Consider this x86 function taken from the Linux kernel
71 (arch/x86/include/asm/barrier.h):
72
73 static inline unsigned long array_index_mask_nospec(unsigned long index,
74 unsigned long size)
75 {
76 unsigned long mask;
77
78 asm volatile ("cmp %1,%2; sbb %0,%0;"
79 :"=r" (mask)
80 :"g"(size),"r" (index)
81 :"cc");
82 return mask;
83 }
84
85 The above is a mitigation for Spectre-variant-1 attacks, for clamping
86 an array access to within the range of [0, size] if the CPU speculates
87 past the array bounds.
88
89 However, it is ultimately used to implement wdev_to_wvif:
90
91 static inline struct wfx_vif *
92 wdev_to_wvif(struct wfx_dev *wdev, int vif_id)
93 {
94 vif_id = array_index_nospec(vif_id, ARRAY_SIZE(wdev->vif));
95 if (!wdev->vif[vif_id]) {
96 return NULL;
97 }
98 return (struct wfx_vif *)wdev->vif[vif_id]->drv_priv;
99 }
100
101 which is used by:
102
103 if (wdev_to_wvif(wvif->wdev, 1))
104 return wdev_to_wvif(wvif->wdev, 1)->vif;
105
106 The code has been written to assume that wdev_to_wvif is deterministic,
107 and won't change from returning non-NULL at the "if" clause to
108 returning NULL at the "->vif" dereference.
109
110 By treating the above specific "asm volatile" as deterministic we avoid
111 a false positive from -Wanalyzer-null-dereference. */
112
113 static bool
deterministic_p(const gasm * asm_stmt)114 deterministic_p (const gasm *asm_stmt)
115 {
116 /* Assume something volatile with no inputs is querying
117 changeable state e.g. rdtsc. */
118 if (gimple_asm_ninputs (asm_stmt) == 0
119 && gimple_asm_volatile_p (asm_stmt))
120 return false;
121
122 /* Otherwise assume it's purely a function of its inputs. */
123 return true;
124 }
125
126 /* Update this model for the asm STMT, using CTXT to report any
127 diagnostics.
128
129 Compare with cfgexpand.cc: expand_asm_stmt. */
130
131 void
on_asm_stmt(const gasm * stmt,region_model_context * ctxt)132 region_model::on_asm_stmt (const gasm *stmt, region_model_context *ctxt)
133 {
134 logger *logger = ctxt ? ctxt->get_logger () : NULL;
135 LOG_SCOPE (logger);
136
137 const unsigned noutputs = gimple_asm_noutputs (stmt);
138 const unsigned ninputs = gimple_asm_ninputs (stmt);
139
140 auto_vec<tree> output_tvec;
141 auto_vec<tree> input_tvec;
142 auto_vec<const char *> constraints;
143
144 /* Copy the gimple vectors into new vectors that we can manipulate. */
145 output_tvec.safe_grow (noutputs, true);
146 input_tvec.safe_grow (ninputs, true);
147 constraints.safe_grow (noutputs + ninputs, true);
148
149 for (unsigned i = 0; i < noutputs; ++i)
150 {
151 tree t = gimple_asm_output_op (stmt, i);
152 output_tvec[i] = TREE_VALUE (t);
153 constraints[i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
154 }
155 for (unsigned i = 0; i < ninputs; i++)
156 {
157 tree t = gimple_asm_input_op (stmt, i);
158 input_tvec[i] = TREE_VALUE (t);
159 constraints[i + noutputs]
160 = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
161 }
162
163 /* Determine which regions are reachable from the inputs
164 to this stmt. */
165 reachable_regions reachable_regs (this);
166
167 int num_errors = 0;
168
169 auto_vec<const region *> output_regions (noutputs);
170 for (unsigned i = 0; i < noutputs; ++i)
171 {
172 tree val = output_tvec[i];
173 const char *constraint;
174 bool is_inout;
175 bool allows_reg;
176 bool allows_mem;
177
178 const region *dst_reg = get_lvalue (val, ctxt);
179 output_regions.quick_push (dst_reg);
180 reachable_regs.add (dst_reg, true);
181
182 /* Try to parse the output constraint. If that fails, there's
183 no point in going further. */
184 constraint = constraints[i];
185 if (!parse_output_constraint (&constraint, i, ninputs, noutputs,
186 &allows_mem, &allows_reg, &is_inout))
187 {
188 if (logger)
189 logger->log ("error parsing constraint for output %i: %qs",
190 i, constraint);
191 num_errors++;
192 continue;
193 }
194
195 if (logger)
196 {
197 logger->log ("output %i: %qs %qE"
198 " is_inout: %i allows_reg: %i allows_mem: %i",
199 i, constraint, val,
200 (int)is_inout, (int)allows_reg, (int)allows_mem);
201 logger->start_log_line ();
202 logger->log_partial (" region: ");
203 dst_reg->dump_to_pp (logger->get_printer (), true);
204 logger->end_log_line ();
205 }
206
207 }
208
209 /* Ideally should combine with inout_svals to determine the
210 "effective inputs" and use this for the asm_output_svalue. */
211
212 auto_vec<const svalue *> input_svals (ninputs);
213 for (unsigned i = 0; i < ninputs; i++)
214 {
215 tree val = input_tvec[i];
216 const char *constraint = constraints[i + noutputs];
217 bool allows_reg, allows_mem;
218 if (! parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
219 constraints.address (),
220 &allows_mem, &allows_reg))
221 {
222 if (logger)
223 logger->log ("error parsing constraint for input %i: %qs",
224 i, constraint);
225 num_errors++;
226 continue;
227 }
228
229 tree src_expr = input_tvec[i];
230 const svalue *src_sval = get_rvalue (src_expr, ctxt);
231 check_for_poison (src_sval, src_expr, ctxt);
232 input_svals.quick_push (src_sval);
233 reachable_regs.handle_sval (src_sval);
234
235 if (logger)
236 {
237 logger->log ("input %i: %qs %qE"
238 " allows_reg: %i allows_mem: %i",
239 i, constraint, val,
240 (int)allows_reg, (int)allows_mem);
241 logger->start_log_line ();
242 logger->log_partial (" sval: ");
243 src_sval->dump_to_pp (logger->get_printer (), true);
244 logger->end_log_line ();
245 }
246 }
247
248 if (num_errors > 0)
249 gcc_unreachable ();
250
251 if (logger)
252 {
253 logger->log ("reachability: ");
254 reachable_regs.dump_to_pp (logger->get_printer ());
255 logger->end_log_line ();
256 }
257
258 /* Given the regions that were reachable from the inputs we
259 want to clobber them.
260 This is similar to region_model::handle_unrecognized_call,
261 but the unknown call policies seems too aggressive (e.g. purging state
262 from anything that's ever escaped). Instead, clobber any clusters
263 that were reachable in *this* asm stmt, rather than those that
264 escaped, and we don't treat the values as having escaped.
265 We also assume that asm stmts don't affect sm-state. */
266 for (auto iter = reachable_regs.begin_mutable_base_regs ();
267 iter != reachable_regs.end_mutable_base_regs (); ++iter)
268 {
269 const region *base_reg = *iter;
270 if (base_reg->symbolic_for_unknown_ptr_p ()
271 || !base_reg->tracked_p ())
272 continue;
273
274 binding_cluster *cluster = m_store.get_or_create_cluster (base_reg);
275 cluster->on_asm (stmt, m_mgr->get_store_manager (),
276 conjured_purge (this, ctxt));
277 }
278
279 /* Update the outputs. */
280 for (unsigned output_idx = 0; output_idx < noutputs; output_idx++)
281 {
282 tree dst_expr = output_tvec[output_idx];
283 const region *dst_reg = output_regions[output_idx];
284
285 const svalue *sval;
286 if (deterministic_p (stmt)
287 && input_svals.length () <= asm_output_svalue::MAX_INPUTS)
288 sval = m_mgr->get_or_create_asm_output_svalue (TREE_TYPE (dst_expr),
289 stmt,
290 output_idx,
291 input_svals);
292 else
293 {
294 sval = m_mgr->get_or_create_conjured_svalue (TREE_TYPE (dst_expr),
295 stmt,
296 dst_reg,
297 conjured_purge (this,
298 ctxt));
299 }
300 set_value (dst_reg, sval, ctxt);
301 }
302 }
303
304 } // namespace ana
305
306 #endif /* #if ENABLE_ANALYZER */
307