xref: /netbsd-src/external/gpl3/gdb/dist/gdb/nat/linux-btrace.c (revision 63aea4bd5b445e491ff0389fe27ec78b3099dba3)
1 /* Linux-dependent part of branch trace support for GDB, and GDBserver.
2 
3    Copyright (C) 2013-2015 Free Software Foundation, Inc.
4 
5    Contributed by Intel Corp. <markus.t.metzger@intel.com>
6 
7    This file is part of GDB.
8 
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
21 
22 #include "common-defs.h"
23 #include "linux-btrace.h"
24 #include "common-regcache.h"
25 #include "gdb_wait.h"
26 #include "x86-cpuid.h"
27 
28 #ifdef HAVE_SYS_SYSCALL_H
29 #include <sys/syscall.h>
30 #endif
31 
32 #if HAVE_LINUX_PERF_EVENT_H && defined(SYS_perf_event_open)
33 
34 #include <stdint.h>
35 #include <unistd.h>
36 #include <sys/mman.h>
37 #include <sys/user.h>
38 #include <sys/ptrace.h>
39 #include <sys/types.h>
40 #include <signal.h>
41 
42 /* A branch trace record in perf_event.  */
43 struct perf_event_bts
44 {
45   /* The linear address of the branch source.  */
46   uint64_t from;
47 
48   /* The linear address of the branch destination.  */
49   uint64_t to;
50 };
51 
52 /* A perf_event branch trace sample.  */
53 struct perf_event_sample
54 {
55   /* The perf_event sample header.  */
56   struct perf_event_header header;
57 
58   /* The perf_event branch tracing payload.  */
59   struct perf_event_bts bts;
60 };
61 
62 /* Get the perf_event header.  */
63 
64 static inline volatile struct perf_event_mmap_page *
65 perf_event_header (struct btrace_target_info* tinfo)
66 {
67   return tinfo->buffer;
68 }
69 
70 /* Get the size of the perf_event mmap buffer.  */
71 
72 static inline size_t
73 perf_event_mmap_size (const struct btrace_target_info *tinfo)
74 {
75   /* The branch trace buffer is preceded by a configuration page.  */
76   return (tinfo->size + 1) * PAGE_SIZE;
77 }
78 
79 /* Get the size of the perf_event buffer.  */
80 
81 static inline size_t
82 perf_event_buffer_size (struct btrace_target_info* tinfo)
83 {
84   return tinfo->size * PAGE_SIZE;
85 }
86 
87 /* Get the start address of the perf_event buffer.  */
88 
89 static inline const uint8_t *
90 perf_event_buffer_begin (struct btrace_target_info* tinfo)
91 {
92   return ((const uint8_t *) tinfo->buffer) + PAGE_SIZE;
93 }
94 
95 /* Get the end address of the perf_event buffer.  */
96 
97 static inline const uint8_t *
98 perf_event_buffer_end (struct btrace_target_info* tinfo)
99 {
100   return perf_event_buffer_begin (tinfo) + perf_event_buffer_size (tinfo);
101 }
102 
103 /* Check whether an address is in the kernel.  */
104 
105 static inline int
106 perf_event_is_kernel_addr (const struct btrace_target_info *tinfo,
107 			   uint64_t addr)
108 {
109   uint64_t mask;
110 
111   /* If we don't know the size of a pointer, we can't check.  Let's assume it's
112      not a kernel address in this case.  */
113   if (tinfo->ptr_bits == 0)
114     return 0;
115 
116   /* A bit mask for the most significant bit in an address.  */
117   mask = (uint64_t) 1 << (tinfo->ptr_bits - 1);
118 
119   /* Check whether the most significant bit in the address is set.  */
120   return (addr & mask) != 0;
121 }
122 
123 /* Check whether a perf event record should be skipped.  */
124 
125 static inline int
126 perf_event_skip_record (const struct btrace_target_info *tinfo,
127 			const struct perf_event_bts *bts)
128 {
129   /* The hardware may report branches from kernel into user space.  Branches
130      from user into kernel space will be suppressed.  We filter the former to
131      provide a consistent branch trace excluding kernel.  */
132   return perf_event_is_kernel_addr (tinfo, bts->from);
133 }
134 
135 /* Perform a few consistency checks on a perf event sample record.  This is
136    meant to catch cases when we get out of sync with the perf event stream.  */
137 
138 static inline int
139 perf_event_sample_ok (const struct perf_event_sample *sample)
140 {
141   if (sample->header.type != PERF_RECORD_SAMPLE)
142     return 0;
143 
144   if (sample->header.size != sizeof (*sample))
145     return 0;
146 
147   return 1;
148 }
149 
150 /* Branch trace is collected in a circular buffer [begin; end) as pairs of from
151    and to addresses (plus a header).
152 
153    Start points into that buffer at the next sample position.
154    We read the collected samples backwards from start.
155 
156    While reading the samples, we convert the information into a list of blocks.
157    For two adjacent samples s1 and s2, we form a block b such that b.begin =
158    s1.to and b.end = s2.from.
159 
160    In case the buffer overflows during sampling, one sample may have its lower
161    part at the end and its upper part at the beginning of the buffer.  */
162 
163 static VEC (btrace_block_s) *
164 perf_event_read_bts (struct btrace_target_info* tinfo, const uint8_t *begin,
165 		     const uint8_t *end, const uint8_t *start, size_t size)
166 {
167   VEC (btrace_block_s) *btrace = NULL;
168   struct perf_event_sample sample;
169   size_t read = 0;
170   struct btrace_block block = { 0, 0 };
171   struct regcache *regcache;
172 
173   gdb_assert (begin <= start);
174   gdb_assert (start <= end);
175 
176   /* The first block ends at the current pc.  */
177   regcache = get_thread_regcache_for_ptid (tinfo->ptid);
178   block.end = regcache_read_pc (regcache);
179 
180   /* The buffer may contain a partial record as its last entry (i.e. when the
181      buffer size is not a multiple of the sample size).  */
182   read = sizeof (sample) - 1;
183 
184   for (; read < size; read += sizeof (sample))
185     {
186       const struct perf_event_sample *psample;
187 
188       /* Find the next perf_event sample in a backwards traversal.  */
189       start -= sizeof (sample);
190 
191       /* If we're still inside the buffer, we're done.  */
192       if (begin <= start)
193 	psample = (const struct perf_event_sample *) start;
194       else
195 	{
196 	  int missing;
197 
198 	  /* We're to the left of the ring buffer, we will wrap around and
199 	     reappear at the very right of the ring buffer.  */
200 
201 	  missing = (begin - start);
202 	  start = (end - missing);
203 
204 	  /* If the entire sample is missing, we're done.  */
205 	  if (missing == sizeof (sample))
206 	    psample = (const struct perf_event_sample *) start;
207 	  else
208 	    {
209 	      uint8_t *stack;
210 
211 	      /* The sample wrapped around.  The lower part is at the end and
212 		 the upper part is at the beginning of the buffer.  */
213 	      stack = (uint8_t *) &sample;
214 
215 	      /* Copy the two parts so we have a contiguous sample.  */
216 	      memcpy (stack, start, missing);
217 	      memcpy (stack + missing, begin, sizeof (sample) - missing);
218 
219 	      psample = &sample;
220 	    }
221 	}
222 
223       if (!perf_event_sample_ok (psample))
224 	{
225 	  warning (_("Branch trace may be incomplete."));
226 	  break;
227 	}
228 
229       if (perf_event_skip_record (tinfo, &psample->bts))
230 	continue;
231 
232       /* We found a valid sample, so we can complete the current block.  */
233       block.begin = psample->bts.to;
234 
235       VEC_safe_push (btrace_block_s, btrace, &block);
236 
237       /* Start the next block.  */
238       block.end = psample->bts.from;
239     }
240 
241   /* Push the last block (i.e. the first one of inferior execution), as well.
242      We don't know where it ends, but we know where it starts.  If we're
243      reading delta trace, we can fill in the start address later on.
244      Otherwise we will prune it.  */
245   block.begin = 0;
246   VEC_safe_push (btrace_block_s, btrace, &block);
247 
248   return btrace;
249 }
250 
251 /* Check whether the kernel supports branch tracing.  */
252 
253 static int
254 kernel_supports_btrace (void)
255 {
256   struct perf_event_attr attr;
257   pid_t child, pid;
258   int status, file;
259 
260   errno = 0;
261   child = fork ();
262   switch (child)
263     {
264     case -1:
265       warning (_("test branch tracing: cannot fork: %s."), strerror (errno));
266       return 0;
267 
268     case 0:
269       status = ptrace (PTRACE_TRACEME, 0, NULL, NULL);
270       if (status != 0)
271 	{
272 	  warning (_("test branch tracing: cannot PTRACE_TRACEME: %s."),
273 		   strerror (errno));
274 	  _exit (1);
275 	}
276 
277       status = raise (SIGTRAP);
278       if (status != 0)
279 	{
280 	  warning (_("test branch tracing: cannot raise SIGTRAP: %s."),
281 		   strerror (errno));
282 	  _exit (1);
283 	}
284 
285       _exit (1);
286 
287     default:
288       pid = waitpid (child, &status, 0);
289       if (pid != child)
290 	{
291 	  warning (_("test branch tracing: bad pid %ld, error: %s."),
292 		   (long) pid, strerror (errno));
293 	  return 0;
294 	}
295 
296       if (!WIFSTOPPED (status))
297 	{
298 	  warning (_("test branch tracing: expected stop. status: %d."),
299 		   status);
300 	  return 0;
301 	}
302 
303       memset (&attr, 0, sizeof (attr));
304 
305       attr.type = PERF_TYPE_HARDWARE;
306       attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
307       attr.sample_period = 1;
308       attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_ADDR;
309       attr.exclude_kernel = 1;
310       attr.exclude_hv = 1;
311       attr.exclude_idle = 1;
312 
313       file = syscall (SYS_perf_event_open, &attr, child, -1, -1, 0);
314       if (file >= 0)
315 	close (file);
316 
317       kill (child, SIGKILL);
318       ptrace (PTRACE_KILL, child, NULL, NULL);
319 
320       pid = waitpid (child, &status, 0);
321       if (pid != child)
322 	{
323 	  warning (_("test branch tracing: bad pid %ld, error: %s."),
324 		   (long) pid, strerror (errno));
325 	  if (!WIFSIGNALED (status))
326 	    warning (_("test branch tracing: expected killed. status: %d."),
327 		     status);
328 	}
329 
330       return (file >= 0);
331     }
332 }
333 
334 /* Check whether an Intel cpu supports branch tracing.  */
335 
336 static int
337 intel_supports_btrace (void)
338 {
339   unsigned int cpuid, model, family;
340 
341   if (!x86_cpuid (1, &cpuid, NULL, NULL, NULL))
342     return 0;
343 
344   family = (cpuid >> 8) & 0xf;
345   model = (cpuid >> 4) & 0xf;
346 
347   switch (family)
348     {
349     case 0x6:
350       model += (cpuid >> 12) & 0xf0;
351 
352       switch (model)
353 	{
354 	case 0x1a: /* Nehalem */
355 	case 0x1f:
356 	case 0x1e:
357 	case 0x2e:
358 	case 0x25: /* Westmere */
359 	case 0x2c:
360 	case 0x2f:
361 	case 0x2a: /* Sandy Bridge */
362 	case 0x2d:
363 	case 0x3a: /* Ivy Bridge */
364 
365 	  /* AAJ122: LBR, BTM, or BTS records may have incorrect branch
366 	     "from" information afer an EIST transition, T-states, C1E, or
367 	     Adaptive Thermal Throttling.  */
368 	  return 0;
369 	}
370     }
371 
372   return 1;
373 }
374 
375 /* Check whether the cpu supports branch tracing.  */
376 
377 static int
378 cpu_supports_btrace (void)
379 {
380   unsigned int ebx, ecx, edx;
381 
382   if (!x86_cpuid (0, NULL, &ebx, &ecx, &edx))
383     return 0;
384 
385   if (ebx == signature_INTEL_ebx && ecx == signature_INTEL_ecx
386       && edx == signature_INTEL_edx)
387     return intel_supports_btrace ();
388 
389   /* Don't know about others.  Let's assume they do.  */
390   return 1;
391 }
392 
393 /* See linux-btrace.h.  */
394 
395 int
396 linux_supports_btrace (struct target_ops *ops)
397 {
398   static int cached;
399 
400   if (cached == 0)
401     {
402       if (!kernel_supports_btrace ())
403 	cached = -1;
404       else if (!cpu_supports_btrace ())
405 	cached = -1;
406       else
407 	cached = 1;
408     }
409 
410   return cached > 0;
411 }
412 
413 /* See linux-btrace.h.  */
414 
415 struct btrace_target_info *
416 linux_enable_btrace (ptid_t ptid)
417 {
418   struct btrace_target_info *tinfo;
419   int pid, pg;
420 
421   tinfo = xzalloc (sizeof (*tinfo));
422   tinfo->ptid = ptid;
423 
424   tinfo->attr.size = sizeof (tinfo->attr);
425   tinfo->attr.type = PERF_TYPE_HARDWARE;
426   tinfo->attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
427   tinfo->attr.sample_period = 1;
428 
429   /* We sample from and to address.  */
430   tinfo->attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_ADDR;
431 
432   tinfo->attr.exclude_kernel = 1;
433   tinfo->attr.exclude_hv = 1;
434   tinfo->attr.exclude_idle = 1;
435 
436   tinfo->ptr_bits = 0;
437 
438   pid = ptid_get_lwp (ptid);
439   if (pid == 0)
440     pid = ptid_get_pid (ptid);
441 
442   errno = 0;
443   tinfo->file = syscall (SYS_perf_event_open, &tinfo->attr, pid, -1, -1, 0);
444   if (tinfo->file < 0)
445     goto err;
446 
447   /* We try to allocate as much buffer as we can get.
448      We could allow the user to specify the size of the buffer, but then
449      we'd leave this search for the maximum buffer size to him.  */
450   for (pg = 4; pg >= 0; --pg)
451     {
452       /* The number of pages we request needs to be a power of two.  */
453       tinfo->size = 1 << pg;
454       tinfo->buffer = mmap (NULL, perf_event_mmap_size (tinfo),
455 			    PROT_READ, MAP_SHARED, tinfo->file, 0);
456       if (tinfo->buffer == MAP_FAILED)
457 	continue;
458 
459       return tinfo;
460     }
461 
462   /* We were not able to allocate any buffer.  */
463   close (tinfo->file);
464 
465  err:
466   xfree (tinfo);
467   return NULL;
468 }
469 
470 /* See linux-btrace.h.  */
471 
472 enum btrace_error
473 linux_disable_btrace (struct btrace_target_info *tinfo)
474 {
475   int errcode;
476 
477   errno = 0;
478   errcode = munmap (tinfo->buffer, perf_event_mmap_size (tinfo));
479   if (errcode != 0)
480     return BTRACE_ERR_UNKNOWN;
481 
482   close (tinfo->file);
483   xfree (tinfo);
484 
485   return BTRACE_ERR_NONE;
486 }
487 
488 /* Check whether the branch trace has changed.  */
489 
490 static int
491 linux_btrace_has_changed (struct btrace_target_info *tinfo)
492 {
493   volatile struct perf_event_mmap_page *header = perf_event_header (tinfo);
494 
495   return header->data_head != tinfo->data_head;
496 }
497 
498 /* See linux-btrace.h.  */
499 
500 enum btrace_error
501 linux_read_btrace (VEC (btrace_block_s) **btrace,
502 		   struct btrace_target_info *tinfo,
503 		   enum btrace_read_type type)
504 {
505   volatile struct perf_event_mmap_page *header;
506   const uint8_t *begin, *end, *start;
507   unsigned long data_head, data_tail, retries = 5;
508   size_t buffer_size, size;
509 
510   /* For delta reads, we return at least the partial last block containing
511      the current PC.  */
512   if (type == BTRACE_READ_NEW && !linux_btrace_has_changed (tinfo))
513     return BTRACE_ERR_NONE;
514 
515   header = perf_event_header (tinfo);
516   buffer_size = perf_event_buffer_size (tinfo);
517   data_tail = tinfo->data_head;
518 
519   /* We may need to retry reading the trace.  See below.  */
520   while (retries--)
521     {
522       data_head = header->data_head;
523 
524       /* Delete any leftover trace from the previous iteration.  */
525       VEC_free (btrace_block_s, *btrace);
526 
527       if (type == BTRACE_READ_DELTA)
528 	{
529 	  /* Determine the number of bytes to read and check for buffer
530 	     overflows.  */
531 
532 	  /* Check for data head overflows.  We might be able to recover from
533 	     those but they are very unlikely and it's not really worth the
534 	     effort, I think.  */
535 	  if (data_head < data_tail)
536 	    return BTRACE_ERR_OVERFLOW;
537 
538 	  /* If the buffer is smaller than the trace delta, we overflowed.  */
539 	  size = data_head - data_tail;
540 	  if (buffer_size < size)
541 	    return BTRACE_ERR_OVERFLOW;
542 	}
543       else
544 	{
545 	  /* Read the entire buffer.  */
546 	  size = buffer_size;
547 
548 	  /* Adjust the size if the buffer has not overflowed, yet.  */
549 	  if (data_head < size)
550 	    size = data_head;
551 	}
552 
553       /* Data_head keeps growing; the buffer itself is circular.  */
554       begin = perf_event_buffer_begin (tinfo);
555       start = begin + data_head % buffer_size;
556 
557       if (data_head <= buffer_size)
558 	end = start;
559       else
560 	end = perf_event_buffer_end (tinfo);
561 
562       *btrace = perf_event_read_bts (tinfo, begin, end, start, size);
563 
564       /* The stopping thread notifies its ptracer before it is scheduled out.
565 	 On multi-core systems, the debugger might therefore run while the
566 	 kernel might be writing the last branch trace records.
567 
568 	 Let's check whether the data head moved while we read the trace.  */
569       if (data_head == header->data_head)
570 	break;
571     }
572 
573   tinfo->data_head = data_head;
574 
575   /* Prune the incomplete last block (i.e. the first one of inferior execution)
576      if we're not doing a delta read.  There is no way of filling in its zeroed
577      BEGIN element.  */
578   if (!VEC_empty (btrace_block_s, *btrace) && type != BTRACE_READ_DELTA)
579     VEC_pop (btrace_block_s, *btrace);
580 
581   return BTRACE_ERR_NONE;
582 }
583 
584 #else /* !HAVE_LINUX_PERF_EVENT_H */
585 
586 /* See linux-btrace.h.  */
587 
588 int
589 linux_supports_btrace (struct target_ops *ops)
590 {
591   return 0;
592 }
593 
594 /* See linux-btrace.h.  */
595 
596 struct btrace_target_info *
597 linux_enable_btrace (ptid_t ptid)
598 {
599   return NULL;
600 }
601 
602 /* See linux-btrace.h.  */
603 
604 enum btrace_error
605 linux_disable_btrace (struct btrace_target_info *tinfo)
606 {
607   return BTRACE_ERR_NOT_SUPPORTED;
608 }
609 
610 /* See linux-btrace.h.  */
611 
612 enum btrace_error
613 linux_read_btrace (VEC (btrace_block_s) **btrace,
614 		   struct btrace_target_info *tinfo,
615 		   enum btrace_read_type type)
616 {
617   return BTRACE_ERR_NOT_SUPPORTED;
618 }
619 
620 #endif /* !HAVE_LINUX_PERF_EVENT_H */
621