xref: /netbsd-src/external/gpl3/gdb/dist/gdb/nat/linux-btrace.c (revision 796c32c94f6e154afc9de0f63da35c91bb739b45)
1 /* Linux-dependent part of branch trace support for GDB, and GDBserver.
2 
3    Copyright (C) 2013-2017 Free Software Foundation, Inc.
4 
5    Contributed by Intel Corp. <markus.t.metzger@intel.com>
6 
7    This file is part of GDB.
8 
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
21 
22 #include "common-defs.h"
23 #include "linux-btrace.h"
24 #include "common-regcache.h"
25 #include "gdb_wait.h"
26 #include "x86-cpuid.h"
27 #include "filestuff.h"
28 
29 #include <inttypes.h>
30 
31 #ifdef HAVE_SYS_SYSCALL_H
32 #include <sys/syscall.h>
33 #endif
34 
35 #if HAVE_LINUX_PERF_EVENT_H && defined(SYS_perf_event_open)
36 #include <unistd.h>
37 #include <sys/mman.h>
38 #include <sys/user.h>
39 #include "nat/gdb_ptrace.h"
40 #include <sys/types.h>
41 #include <signal.h>
42 
43 /* A branch trace record in perf_event.  */
44 struct perf_event_bts
45 {
46   /* The linear address of the branch source.  */
47   uint64_t from;
48 
49   /* The linear address of the branch destination.  */
50   uint64_t to;
51 };
52 
53 /* A perf_event branch trace sample.  */
54 struct perf_event_sample
55 {
56   /* The perf_event sample header.  */
57   struct perf_event_header header;
58 
59   /* The perf_event branch tracing payload.  */
60   struct perf_event_bts bts;
61 };
62 
63 /* Identify the cpu we're running on.  */
64 static struct btrace_cpu
65 btrace_this_cpu (void)
66 {
67   struct btrace_cpu cpu;
68   unsigned int eax, ebx, ecx, edx;
69   int ok;
70 
71   memset (&cpu, 0, sizeof (cpu));
72 
73   ok = x86_cpuid (0, &eax, &ebx, &ecx, &edx);
74   if (ok != 0)
75     {
76       if (ebx == signature_INTEL_ebx && ecx == signature_INTEL_ecx
77 	  && edx == signature_INTEL_edx)
78 	{
79 	  unsigned int cpuid, ignore;
80 
81 	  ok = x86_cpuid (1, &cpuid, &ignore, &ignore, &ignore);
82 	  if (ok != 0)
83 	    {
84 	      cpu.vendor = CV_INTEL;
85 
86 	      cpu.family = (cpuid >> 8) & 0xf;
87 	      cpu.model = (cpuid >> 4) & 0xf;
88 
89 	      if (cpu.family == 0x6)
90 		cpu.model += (cpuid >> 12) & 0xf0;
91 	    }
92 	}
93     }
94 
95   return cpu;
96 }
97 
98 /* Return non-zero if there is new data in PEVENT; zero otherwise.  */
99 
100 static int
101 perf_event_new_data (const struct perf_event_buffer *pev)
102 {
103   return *pev->data_head != pev->last_head;
104 }
105 
106 /* Copy the last SIZE bytes from PEV ending at DATA_HEAD and return a pointer
107    to the memory holding the copy.
108    The caller is responsible for freeing the memory.  */
109 
110 static gdb_byte *
111 perf_event_read (const struct perf_event_buffer *pev, __u64 data_head,
112 		 size_t size)
113 {
114   const gdb_byte *begin, *end, *start, *stop;
115   gdb_byte *buffer;
116   size_t buffer_size;
117   __u64 data_tail;
118 
119   if (size == 0)
120     return NULL;
121 
122   /* We should never ask for more data than the buffer can hold.  */
123   buffer_size = pev->size;
124   gdb_assert (size <= buffer_size);
125 
126   /* If we ask for more data than we seem to have, we wrap around and read
127      data from the end of the buffer.  This is already handled by the %
128      BUFFER_SIZE operation, below.  Here, we just need to make sure that we
129      don't underflow.
130 
131      Note that this is perfectly OK for perf event buffers where data_head
132      doesn'grow indefinitely and instead wraps around to remain within the
133      buffer's boundaries.  */
134   if (data_head < size)
135     data_head += buffer_size;
136 
137   gdb_assert (size <= data_head);
138   data_tail = data_head - size;
139 
140   begin = pev->mem;
141   start = begin + data_tail % buffer_size;
142   stop = begin + data_head % buffer_size;
143 
144   buffer = (gdb_byte *) xmalloc (size);
145 
146   if (start < stop)
147     memcpy (buffer, start, stop - start);
148   else
149     {
150       end = begin + buffer_size;
151 
152       memcpy (buffer, start, end - start);
153       memcpy (buffer + (end - start), begin, stop - begin);
154     }
155 
156   return buffer;
157 }
158 
159 /* Copy the perf event buffer data from PEV.
160    Store a pointer to the copy into DATA and its size in SIZE.  */
161 
162 static void
163 perf_event_read_all (struct perf_event_buffer *pev, gdb_byte **data,
164 		     size_t *psize)
165 {
166   size_t size;
167   __u64 data_head;
168 
169   data_head = *pev->data_head;
170   size = pev->size;
171 
172   *data = perf_event_read (pev, data_head, size);
173   *psize = size;
174 
175   pev->last_head = data_head;
176 }
177 
178 /* Determine the event type.
179    Returns zero on success and fills in TYPE; returns -1 otherwise.  */
180 
181 static int
182 perf_event_pt_event_type (int *type)
183 {
184   FILE *file;
185   int found;
186 
187   file = fopen ("/sys/bus/event_source/devices/intel_pt/type", "r");
188   if (file == NULL)
189     return -1;
190 
191   found = fscanf (file, "%d", type);
192 
193   fclose (file);
194 
195   if (found == 1)
196     return 0;
197   return -1;
198 }
199 
200 /* Try to determine the start address of the Linux kernel.  */
201 
202 static uint64_t
203 linux_determine_kernel_start (void)
204 {
205   static uint64_t kernel_start;
206   static int cached;
207   FILE *file;
208 
209   if (cached != 0)
210     return kernel_start;
211 
212   cached = 1;
213 
214   file = gdb_fopen_cloexec ("/proc/kallsyms", "r");
215   if (file == NULL)
216     return kernel_start;
217 
218   while (!feof (file))
219     {
220       char buffer[1024], symbol[8], *line;
221       uint64_t addr;
222       int match;
223 
224       line = fgets (buffer, sizeof (buffer), file);
225       if (line == NULL)
226 	break;
227 
228       match = sscanf (line, "%" SCNx64 " %*[tT] %7s", &addr, symbol);
229       if (match != 2)
230 	continue;
231 
232       if (strcmp (symbol, "_text") == 0)
233 	{
234 	  kernel_start = addr;
235 	  break;
236 	}
237     }
238 
239   fclose (file);
240 
241   return kernel_start;
242 }
243 
244 /* Check whether an address is in the kernel.  */
245 
246 static inline int
247 perf_event_is_kernel_addr (uint64_t addr)
248 {
249   uint64_t kernel_start;
250 
251   kernel_start = linux_determine_kernel_start ();
252   if (kernel_start != 0ull)
253     return (addr >= kernel_start);
254 
255   /* If we don't know the kernel's start address, let's check the most
256      significant bit.  This will work at least for 64-bit kernels.  */
257   return ((addr & (1ull << 63)) != 0);
258 }
259 
260 /* Check whether a perf event record should be skipped.  */
261 
262 static inline int
263 perf_event_skip_bts_record (const struct perf_event_bts *bts)
264 {
265   /* The hardware may report branches from kernel into user space.  Branches
266      from user into kernel space will be suppressed.  We filter the former to
267      provide a consistent branch trace excluding kernel.  */
268   return perf_event_is_kernel_addr (bts->from);
269 }
270 
271 /* Perform a few consistency checks on a perf event sample record.  This is
272    meant to catch cases when we get out of sync with the perf event stream.  */
273 
274 static inline int
275 perf_event_sample_ok (const struct perf_event_sample *sample)
276 {
277   if (sample->header.type != PERF_RECORD_SAMPLE)
278     return 0;
279 
280   if (sample->header.size != sizeof (*sample))
281     return 0;
282 
283   return 1;
284 }
285 
286 /* Branch trace is collected in a circular buffer [begin; end) as pairs of from
287    and to addresses (plus a header).
288 
289    Start points into that buffer at the next sample position.
290    We read the collected samples backwards from start.
291 
292    While reading the samples, we convert the information into a list of blocks.
293    For two adjacent samples s1 and s2, we form a block b such that b.begin =
294    s1.to and b.end = s2.from.
295 
296    In case the buffer overflows during sampling, one sample may have its lower
297    part at the end and its upper part at the beginning of the buffer.  */
298 
299 static VEC (btrace_block_s) *
300 perf_event_read_bts (struct btrace_target_info* tinfo, const uint8_t *begin,
301 		     const uint8_t *end, const uint8_t *start, size_t size)
302 {
303   VEC (btrace_block_s) *btrace = NULL;
304   struct perf_event_sample sample;
305   size_t read = 0;
306   struct btrace_block block = { 0, 0 };
307   struct regcache *regcache;
308 
309   gdb_assert (begin <= start);
310   gdb_assert (start <= end);
311 
312   /* The first block ends at the current pc.  */
313   regcache = get_thread_regcache_for_ptid (tinfo->ptid);
314   block.end = regcache_read_pc (regcache);
315 
316   /* The buffer may contain a partial record as its last entry (i.e. when the
317      buffer size is not a multiple of the sample size).  */
318   read = sizeof (sample) - 1;
319 
320   for (; read < size; read += sizeof (sample))
321     {
322       const struct perf_event_sample *psample;
323 
324       /* Find the next perf_event sample in a backwards traversal.  */
325       start -= sizeof (sample);
326 
327       /* If we're still inside the buffer, we're done.  */
328       if (begin <= start)
329 	psample = (const struct perf_event_sample *) start;
330       else
331 	{
332 	  int missing;
333 
334 	  /* We're to the left of the ring buffer, we will wrap around and
335 	     reappear at the very right of the ring buffer.  */
336 
337 	  missing = (begin - start);
338 	  start = (end - missing);
339 
340 	  /* If the entire sample is missing, we're done.  */
341 	  if (missing == sizeof (sample))
342 	    psample = (const struct perf_event_sample *) start;
343 	  else
344 	    {
345 	      uint8_t *stack;
346 
347 	      /* The sample wrapped around.  The lower part is at the end and
348 		 the upper part is at the beginning of the buffer.  */
349 	      stack = (uint8_t *) &sample;
350 
351 	      /* Copy the two parts so we have a contiguous sample.  */
352 	      memcpy (stack, start, missing);
353 	      memcpy (stack + missing, begin, sizeof (sample) - missing);
354 
355 	      psample = &sample;
356 	    }
357 	}
358 
359       if (!perf_event_sample_ok (psample))
360 	{
361 	  warning (_("Branch trace may be incomplete."));
362 	  break;
363 	}
364 
365       if (perf_event_skip_bts_record (&psample->bts))
366 	continue;
367 
368       /* We found a valid sample, so we can complete the current block.  */
369       block.begin = psample->bts.to;
370 
371       VEC_safe_push (btrace_block_s, btrace, &block);
372 
373       /* Start the next block.  */
374       block.end = psample->bts.from;
375     }
376 
377   /* Push the last block (i.e. the first one of inferior execution), as well.
378      We don't know where it ends, but we know where it starts.  If we're
379      reading delta trace, we can fill in the start address later on.
380      Otherwise we will prune it.  */
381   block.begin = 0;
382   VEC_safe_push (btrace_block_s, btrace, &block);
383 
384   return btrace;
385 }
386 
387 /* Check whether the kernel supports BTS.  */
388 
389 static int
390 kernel_supports_bts (void)
391 {
392   struct perf_event_attr attr;
393   pid_t child, pid;
394   int status, file;
395 
396   errno = 0;
397   child = fork ();
398   switch (child)
399     {
400     case -1:
401       warning (_("test bts: cannot fork: %s."), safe_strerror (errno));
402       return 0;
403 
404     case 0:
405       status = ptrace (PTRACE_TRACEME, 0, NULL, NULL);
406       if (status != 0)
407 	{
408 	  warning (_("test bts: cannot PTRACE_TRACEME: %s."),
409 		   safe_strerror (errno));
410 	  _exit (1);
411 	}
412 
413       status = raise (SIGTRAP);
414       if (status != 0)
415 	{
416 	  warning (_("test bts: cannot raise SIGTRAP: %s."),
417 		   safe_strerror (errno));
418 	  _exit (1);
419 	}
420 
421       _exit (1);
422 
423     default:
424       pid = waitpid (child, &status, 0);
425       if (pid != child)
426 	{
427 	  warning (_("test bts: bad pid %ld, error: %s."),
428 		   (long) pid, safe_strerror (errno));
429 	  return 0;
430 	}
431 
432       if (!WIFSTOPPED (status))
433 	{
434 	  warning (_("test bts: expected stop. status: %d."),
435 		   status);
436 	  return 0;
437 	}
438 
439       memset (&attr, 0, sizeof (attr));
440 
441       attr.type = PERF_TYPE_HARDWARE;
442       attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
443       attr.sample_period = 1;
444       attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_ADDR;
445       attr.exclude_kernel = 1;
446       attr.exclude_hv = 1;
447       attr.exclude_idle = 1;
448 
449       file = syscall (SYS_perf_event_open, &attr, child, -1, -1, 0);
450       if (file >= 0)
451 	close (file);
452 
453       kill (child, SIGKILL);
454       ptrace (PTRACE_KILL, child, NULL, NULL);
455 
456       pid = waitpid (child, &status, 0);
457       if (pid != child)
458 	{
459 	  warning (_("test bts: bad pid %ld, error: %s."),
460 		   (long) pid, safe_strerror (errno));
461 	  if (!WIFSIGNALED (status))
462 	    warning (_("test bts: expected killed. status: %d."),
463 		     status);
464 	}
465 
466       return (file >= 0);
467     }
468 }
469 
470 /* Check whether the kernel supports Intel Processor Trace.  */
471 
472 static int
473 kernel_supports_pt (void)
474 {
475   struct perf_event_attr attr;
476   pid_t child, pid;
477   int status, file, type;
478 
479   errno = 0;
480   child = fork ();
481   switch (child)
482     {
483     case -1:
484       warning (_("test pt: cannot fork: %s."), safe_strerror (errno));
485       return 0;
486 
487     case 0:
488       status = ptrace (PTRACE_TRACEME, 0, NULL, NULL);
489       if (status != 0)
490 	{
491 	  warning (_("test pt: cannot PTRACE_TRACEME: %s."),
492 		   safe_strerror (errno));
493 	  _exit (1);
494 	}
495 
496       status = raise (SIGTRAP);
497       if (status != 0)
498 	{
499 	  warning (_("test pt: cannot raise SIGTRAP: %s."),
500 		   safe_strerror (errno));
501 	  _exit (1);
502 	}
503 
504       _exit (1);
505 
506     default:
507       pid = waitpid (child, &status, 0);
508       if (pid != child)
509 	{
510 	  warning (_("test pt: bad pid %ld, error: %s."),
511 		   (long) pid, safe_strerror (errno));
512 	  return 0;
513 	}
514 
515       if (!WIFSTOPPED (status))
516 	{
517 	  warning (_("test pt: expected stop. status: %d."),
518 		   status);
519 	  return 0;
520 	}
521 
522       status = perf_event_pt_event_type (&type);
523       if (status != 0)
524 	file = -1;
525       else
526 	{
527 	  memset (&attr, 0, sizeof (attr));
528 
529 	  attr.size = sizeof (attr);
530 	  attr.type = type;
531 	  attr.exclude_kernel = 1;
532 	  attr.exclude_hv = 1;
533 	  attr.exclude_idle = 1;
534 
535 	  file = syscall (SYS_perf_event_open, &attr, child, -1, -1, 0);
536 	  if (file >= 0)
537 	    close (file);
538 	}
539 
540       kill (child, SIGKILL);
541       ptrace (PTRACE_KILL, child, NULL, NULL);
542 
543       pid = waitpid (child, &status, 0);
544       if (pid != child)
545 	{
546 	  warning (_("test pt: bad pid %ld, error: %s."),
547 		   (long) pid, safe_strerror (errno));
548 	  if (!WIFSIGNALED (status))
549 	    warning (_("test pt: expected killed. status: %d."),
550 		     status);
551 	}
552 
553       return (file >= 0);
554     }
555 }
556 
557 /* Check whether an Intel cpu supports BTS.  */
558 
559 static int
560 intel_supports_bts (const struct btrace_cpu *cpu)
561 {
562   switch (cpu->family)
563     {
564     case 0x6:
565       switch (cpu->model)
566 	{
567 	case 0x1a: /* Nehalem */
568 	case 0x1f:
569 	case 0x1e:
570 	case 0x2e:
571 	case 0x25: /* Westmere */
572 	case 0x2c:
573 	case 0x2f:
574 	case 0x2a: /* Sandy Bridge */
575 	case 0x2d:
576 	case 0x3a: /* Ivy Bridge */
577 
578 	  /* AAJ122: LBR, BTM, or BTS records may have incorrect branch
579 	     "from" information afer an EIST transition, T-states, C1E, or
580 	     Adaptive Thermal Throttling.  */
581 	  return 0;
582 	}
583     }
584 
585   return 1;
586 }
587 
588 /* Check whether the cpu supports BTS.  */
589 
590 static int
591 cpu_supports_bts (void)
592 {
593   struct btrace_cpu cpu;
594 
595   cpu = btrace_this_cpu ();
596   switch (cpu.vendor)
597     {
598     default:
599       /* Don't know about others.  Let's assume they do.  */
600       return 1;
601 
602     case CV_INTEL:
603       return intel_supports_bts (&cpu);
604     }
605 }
606 
607 /* Check whether the linux target supports BTS.  */
608 
609 static int
610 linux_supports_bts (void)
611 {
612   static int cached;
613 
614   if (cached == 0)
615     {
616       if (!kernel_supports_bts ())
617 	cached = -1;
618       else if (!cpu_supports_bts ())
619 	cached = -1;
620       else
621 	cached = 1;
622     }
623 
624   return cached > 0;
625 }
626 
627 /* Check whether the linux target supports Intel Processor Trace.  */
628 
629 static int
630 linux_supports_pt (void)
631 {
632   static int cached;
633 
634   if (cached == 0)
635     {
636       if (!kernel_supports_pt ())
637 	cached = -1;
638       else
639 	cached = 1;
640     }
641 
642   return cached > 0;
643 }
644 
645 /* See linux-btrace.h.  */
646 
647 int
648 linux_supports_btrace (struct target_ops *ops, enum btrace_format format)
649 {
650   switch (format)
651     {
652     case BTRACE_FORMAT_NONE:
653       return 0;
654 
655     case BTRACE_FORMAT_BTS:
656       return linux_supports_bts ();
657 
658     case BTRACE_FORMAT_PT:
659       return linux_supports_pt ();
660     }
661 
662   internal_error (__FILE__, __LINE__, _("Unknown branch trace format"));
663 }
664 
665 /* Enable branch tracing in BTS format.  */
666 
667 static struct btrace_target_info *
668 linux_enable_bts (ptid_t ptid, const struct btrace_config_bts *conf)
669 {
670   struct perf_event_mmap_page *header;
671   struct btrace_target_info *tinfo;
672   struct btrace_tinfo_bts *bts;
673   size_t size, pages;
674   __u64 data_offset;
675   int pid, pg;
676 
677   tinfo = XCNEW (struct btrace_target_info);
678   tinfo->ptid = ptid;
679 
680   tinfo->conf.format = BTRACE_FORMAT_BTS;
681   bts = &tinfo->variant.bts;
682 
683   bts->attr.size = sizeof (bts->attr);
684   bts->attr.type = PERF_TYPE_HARDWARE;
685   bts->attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
686   bts->attr.sample_period = 1;
687 
688   /* We sample from and to address.  */
689   bts->attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_ADDR;
690 
691   bts->attr.exclude_kernel = 1;
692   bts->attr.exclude_hv = 1;
693   bts->attr.exclude_idle = 1;
694 
695   pid = ptid_get_lwp (ptid);
696   if (pid == 0)
697     pid = ptid_get_pid (ptid);
698 
699   errno = 0;
700   bts->file = syscall (SYS_perf_event_open, &bts->attr, pid, -1, -1, 0);
701   if (bts->file < 0)
702     goto err_out;
703 
704   /* Convert the requested size in bytes to pages (rounding up).  */
705   pages = ((size_t) conf->size / PAGE_SIZE
706 	   + ((conf->size % PAGE_SIZE) == 0 ? 0 : 1));
707   /* We need at least one page.  */
708   if (pages == 0)
709     pages = 1;
710 
711   /* The buffer size can be requested in powers of two pages.  Adjust PAGES
712      to the next power of two.  */
713   for (pg = 0; pages != ((size_t) 1 << pg); ++pg)
714     if ((pages & ((size_t) 1 << pg)) != 0)
715       pages += ((size_t) 1 << pg);
716 
717   /* We try to allocate the requested size.
718      If that fails, try to get as much as we can.  */
719   for (; pages > 0; pages >>= 1)
720     {
721       size_t length;
722       __u64 data_size;
723 
724       data_size = (__u64) pages * PAGE_SIZE;
725 
726       /* Don't ask for more than we can represent in the configuration.  */
727       if ((__u64) UINT_MAX < data_size)
728 	continue;
729 
730       size = (size_t) data_size;
731       length = size + PAGE_SIZE;
732 
733       /* Check for overflows.  */
734       if ((__u64) length != data_size + PAGE_SIZE)
735 	continue;
736 
737       /* The number of pages we request needs to be a power of two.  */
738       header = ((struct perf_event_mmap_page *)
739 		mmap (NULL, length, PROT_READ, MAP_SHARED, bts->file, 0));
740       if (header != MAP_FAILED)
741 	break;
742     }
743 
744   if (pages == 0)
745     goto err_file;
746 
747   data_offset = PAGE_SIZE;
748 
749 #if defined (PERF_ATTR_SIZE_VER5)
750   if (offsetof (struct perf_event_mmap_page, data_size) <= header->size)
751     {
752       __u64 data_size;
753 
754       data_offset = header->data_offset;
755       data_size = header->data_size;
756 
757       size = (unsigned int) data_size;
758 
759       /* Check for overflows.  */
760       if ((__u64) size != data_size)
761 	{
762 	  munmap ((void *) header, size + PAGE_SIZE);
763 	  goto err_file;
764 	}
765     }
766 #endif /* defined (PERF_ATTR_SIZE_VER5) */
767 
768   bts->header = header;
769   bts->bts.mem = ((const uint8_t *) header) + data_offset;
770   bts->bts.size = size;
771   bts->bts.data_head = &header->data_head;
772   bts->bts.last_head = 0ull;
773 
774   tinfo->conf.bts.size = (unsigned int) size;
775   return tinfo;
776 
777  err_file:
778   /* We were not able to allocate any buffer.  */
779   close (bts->file);
780 
781  err_out:
782   xfree (tinfo);
783   return NULL;
784 }
785 
786 #if defined (PERF_ATTR_SIZE_VER5)
787 
788 /* Enable branch tracing in Intel Processor Trace format.  */
789 
790 static struct btrace_target_info *
791 linux_enable_pt (ptid_t ptid, const struct btrace_config_pt *conf)
792 {
793   struct perf_event_mmap_page *header;
794   struct btrace_target_info *tinfo;
795   struct btrace_tinfo_pt *pt;
796   size_t pages, size;
797   int pid, pg, errcode, type;
798 
799   if (conf->size == 0)
800     return NULL;
801 
802   errcode = perf_event_pt_event_type (&type);
803   if (errcode != 0)
804     return NULL;
805 
806   pid = ptid_get_lwp (ptid);
807   if (pid == 0)
808     pid = ptid_get_pid (ptid);
809 
810   tinfo = XCNEW (struct btrace_target_info);
811   tinfo->ptid = ptid;
812 
813   tinfo->conf.format = BTRACE_FORMAT_PT;
814   pt = &tinfo->variant.pt;
815 
816   pt->attr.size = sizeof (pt->attr);
817   pt->attr.type = type;
818 
819   pt->attr.exclude_kernel = 1;
820   pt->attr.exclude_hv = 1;
821   pt->attr.exclude_idle = 1;
822 
823   errno = 0;
824   pt->file = syscall (SYS_perf_event_open, &pt->attr, pid, -1, -1, 0);
825   if (pt->file < 0)
826     goto err;
827 
828   /* Allocate the configuration page. */
829   header = ((struct perf_event_mmap_page *)
830 	    mmap (NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
831 		  pt->file, 0));
832   if (header == MAP_FAILED)
833     goto err_file;
834 
835   header->aux_offset = header->data_offset + header->data_size;
836 
837   /* Convert the requested size in bytes to pages (rounding up).  */
838   pages = ((size_t) conf->size / PAGE_SIZE
839 	   + ((conf->size % PAGE_SIZE) == 0 ? 0 : 1));
840   /* We need at least one page.  */
841   if (pages == 0)
842     pages = 1;
843 
844   /* The buffer size can be requested in powers of two pages.  Adjust PAGES
845      to the next power of two.  */
846   for (pg = 0; pages != ((size_t) 1 << pg); ++pg)
847     if ((pages & ((size_t) 1 << pg)) != 0)
848       pages += ((size_t) 1 << pg);
849 
850   /* We try to allocate the requested size.
851      If that fails, try to get as much as we can.  */
852   for (; pages > 0; pages >>= 1)
853     {
854       size_t length;
855       __u64 data_size;
856 
857       data_size = (__u64) pages * PAGE_SIZE;
858 
859       /* Don't ask for more than we can represent in the configuration.  */
860       if ((__u64) UINT_MAX < data_size)
861 	continue;
862 
863       size = (size_t) data_size;
864 
865       /* Check for overflows.  */
866       if ((__u64) size != data_size)
867 	continue;
868 
869       header->aux_size = data_size;
870       length = size;
871 
872       pt->pt.mem = ((const uint8_t *)
873 		    mmap (NULL, length, PROT_READ, MAP_SHARED, pt->file,
874 			  header->aux_offset));
875       if (pt->pt.mem != MAP_FAILED)
876 	break;
877     }
878 
879   if (pages == 0)
880     goto err_conf;
881 
882   pt->header = header;
883   pt->pt.size = size;
884   pt->pt.data_head = &header->aux_head;
885 
886   tinfo->conf.pt.size = (unsigned int) size;
887   return tinfo;
888 
889  err_conf:
890   munmap((void *) header, PAGE_SIZE);
891 
892  err_file:
893   close (pt->file);
894 
895  err:
896   xfree (tinfo);
897   return NULL;
898 }
899 
900 #else /* !defined (PERF_ATTR_SIZE_VER5) */
901 
902 static struct btrace_target_info *
903 linux_enable_pt (ptid_t ptid, const struct btrace_config_pt *conf)
904 {
905   errno = EOPNOTSUPP;
906   return NULL;
907 }
908 
909 #endif /* !defined (PERF_ATTR_SIZE_VER5) */
910 
911 /* See linux-btrace.h.  */
912 
913 struct btrace_target_info *
914 linux_enable_btrace (ptid_t ptid, const struct btrace_config *conf)
915 {
916   struct btrace_target_info *tinfo;
917 
918   tinfo = NULL;
919   switch (conf->format)
920     {
921     case BTRACE_FORMAT_NONE:
922       break;
923 
924     case BTRACE_FORMAT_BTS:
925       tinfo = linux_enable_bts (ptid, &conf->bts);
926       break;
927 
928     case BTRACE_FORMAT_PT:
929       tinfo = linux_enable_pt (ptid, &conf->pt);
930       break;
931     }
932 
933   return tinfo;
934 }
935 
936 /* Disable BTS tracing.  */
937 
938 static enum btrace_error
939 linux_disable_bts (struct btrace_tinfo_bts *tinfo)
940 {
941   munmap((void *) tinfo->header, tinfo->bts.size + PAGE_SIZE);
942   close (tinfo->file);
943 
944   return BTRACE_ERR_NONE;
945 }
946 
947 /* Disable Intel Processor Trace tracing.  */
948 
949 static enum btrace_error
950 linux_disable_pt (struct btrace_tinfo_pt *tinfo)
951 {
952   munmap((void *) tinfo->pt.mem, tinfo->pt.size);
953   munmap((void *) tinfo->header, PAGE_SIZE);
954   close (tinfo->file);
955 
956   return BTRACE_ERR_NONE;
957 }
958 
959 /* See linux-btrace.h.  */
960 
961 enum btrace_error
962 linux_disable_btrace (struct btrace_target_info *tinfo)
963 {
964   enum btrace_error errcode;
965 
966   errcode = BTRACE_ERR_NOT_SUPPORTED;
967   switch (tinfo->conf.format)
968     {
969     case BTRACE_FORMAT_NONE:
970       break;
971 
972     case BTRACE_FORMAT_BTS:
973       errcode = linux_disable_bts (&tinfo->variant.bts);
974       break;
975 
976     case BTRACE_FORMAT_PT:
977       errcode = linux_disable_pt (&tinfo->variant.pt);
978       break;
979     }
980 
981   if (errcode == BTRACE_ERR_NONE)
982     xfree (tinfo);
983 
984   return errcode;
985 }
986 
987 /* Read branch trace data in BTS format for the thread given by TINFO into
988    BTRACE using the TYPE reading method.  */
989 
990 static enum btrace_error
991 linux_read_bts (struct btrace_data_bts *btrace,
992 		struct btrace_target_info *tinfo,
993 		enum btrace_read_type type)
994 {
995   struct perf_event_buffer *pevent;
996   const uint8_t *begin, *end, *start;
997   size_t buffer_size, size;
998   __u64 data_head, data_tail;
999   unsigned int retries = 5;
1000 
1001   pevent = &tinfo->variant.bts.bts;
1002 
1003   /* For delta reads, we return at least the partial last block containing
1004      the current PC.  */
1005   if (type == BTRACE_READ_NEW && !perf_event_new_data (pevent))
1006     return BTRACE_ERR_NONE;
1007 
1008   buffer_size = pevent->size;
1009   data_tail = pevent->last_head;
1010 
1011   /* We may need to retry reading the trace.  See below.  */
1012   while (retries--)
1013     {
1014       data_head = *pevent->data_head;
1015 
1016       /* Delete any leftover trace from the previous iteration.  */
1017       VEC_free (btrace_block_s, btrace->blocks);
1018 
1019       if (type == BTRACE_READ_DELTA)
1020 	{
1021 	  __u64 data_size;
1022 
1023 	  /* Determine the number of bytes to read and check for buffer
1024 	     overflows.  */
1025 
1026 	  /* Check for data head overflows.  We might be able to recover from
1027 	     those but they are very unlikely and it's not really worth the
1028 	     effort, I think.  */
1029 	  if (data_head < data_tail)
1030 	    return BTRACE_ERR_OVERFLOW;
1031 
1032 	  /* If the buffer is smaller than the trace delta, we overflowed.  */
1033 	  data_size = data_head - data_tail;
1034 	  if (buffer_size < data_size)
1035 	    return BTRACE_ERR_OVERFLOW;
1036 
1037 	  /* DATA_SIZE <= BUFFER_SIZE and therefore fits into a size_t.  */
1038 	  size = (size_t) data_size;
1039 	}
1040       else
1041 	{
1042 	  /* Read the entire buffer.  */
1043 	  size = buffer_size;
1044 
1045 	  /* Adjust the size if the buffer has not overflowed, yet.  */
1046 	  if (data_head < size)
1047 	    size = (size_t) data_head;
1048 	}
1049 
1050       /* Data_head keeps growing; the buffer itself is circular.  */
1051       begin = pevent->mem;
1052       start = begin + data_head % buffer_size;
1053 
1054       if (data_head <= buffer_size)
1055 	end = start;
1056       else
1057 	end = begin + pevent->size;
1058 
1059       btrace->blocks = perf_event_read_bts (tinfo, begin, end, start, size);
1060 
1061       /* The stopping thread notifies its ptracer before it is scheduled out.
1062 	 On multi-core systems, the debugger might therefore run while the
1063 	 kernel might be writing the last branch trace records.
1064 
1065 	 Let's check whether the data head moved while we read the trace.  */
1066       if (data_head == *pevent->data_head)
1067 	break;
1068     }
1069 
1070   pevent->last_head = data_head;
1071 
1072   /* Prune the incomplete last block (i.e. the first one of inferior execution)
1073      if we're not doing a delta read.  There is no way of filling in its zeroed
1074      BEGIN element.  */
1075   if (!VEC_empty (btrace_block_s, btrace->blocks)
1076       && type != BTRACE_READ_DELTA)
1077     VEC_pop (btrace_block_s, btrace->blocks);
1078 
1079   return BTRACE_ERR_NONE;
1080 }
1081 
1082 /* Fill in the Intel Processor Trace configuration information.  */
1083 
1084 static void
1085 linux_fill_btrace_pt_config (struct btrace_data_pt_config *conf)
1086 {
1087   conf->cpu = btrace_this_cpu ();
1088 }
1089 
1090 /* Read branch trace data in Intel Processor Trace format for the thread
1091    given by TINFO into BTRACE using the TYPE reading method.  */
1092 
1093 static enum btrace_error
1094 linux_read_pt (struct btrace_data_pt *btrace,
1095 	       struct btrace_target_info *tinfo,
1096 	       enum btrace_read_type type)
1097 {
1098   struct perf_event_buffer *pt;
1099 
1100   pt = &tinfo->variant.pt.pt;
1101 
1102   linux_fill_btrace_pt_config (&btrace->config);
1103 
1104   switch (type)
1105     {
1106     case BTRACE_READ_DELTA:
1107       /* We don't support delta reads.  The data head (i.e. aux_head) wraps
1108 	 around to stay inside the aux buffer.  */
1109       return BTRACE_ERR_NOT_SUPPORTED;
1110 
1111     case BTRACE_READ_NEW:
1112       if (!perf_event_new_data (pt))
1113 	return BTRACE_ERR_NONE;
1114 
1115       /* Fall through.  */
1116     case BTRACE_READ_ALL:
1117       perf_event_read_all (pt, &btrace->data, &btrace->size);
1118       return BTRACE_ERR_NONE;
1119     }
1120 
1121   internal_error (__FILE__, __LINE__, _("Unkown btrace read type."));
1122 }
1123 
1124 /* See linux-btrace.h.  */
1125 
1126 enum btrace_error
1127 linux_read_btrace (struct btrace_data *btrace,
1128 		   struct btrace_target_info *tinfo,
1129 		   enum btrace_read_type type)
1130 {
1131   switch (tinfo->conf.format)
1132     {
1133     case BTRACE_FORMAT_NONE:
1134       return BTRACE_ERR_NOT_SUPPORTED;
1135 
1136     case BTRACE_FORMAT_BTS:
1137       /* We read btrace in BTS format.  */
1138       btrace->format = BTRACE_FORMAT_BTS;
1139       btrace->variant.bts.blocks = NULL;
1140 
1141       return linux_read_bts (&btrace->variant.bts, tinfo, type);
1142 
1143     case BTRACE_FORMAT_PT:
1144       /* We read btrace in Intel Processor Trace format.  */
1145       btrace->format = BTRACE_FORMAT_PT;
1146       btrace->variant.pt.data = NULL;
1147       btrace->variant.pt.size = 0;
1148 
1149       return linux_read_pt (&btrace->variant.pt, tinfo, type);
1150     }
1151 
1152   internal_error (__FILE__, __LINE__, _("Unkown branch trace format."));
1153 }
1154 
1155 /* See linux-btrace.h.  */
1156 
1157 const struct btrace_config *
1158 linux_btrace_conf (const struct btrace_target_info *tinfo)
1159 {
1160   return &tinfo->conf;
1161 }
1162 
1163 #else /* !HAVE_LINUX_PERF_EVENT_H */
1164 
1165 /* See linux-btrace.h.  */
1166 
1167 int
1168 linux_supports_btrace (struct target_ops *ops, enum btrace_format format)
1169 {
1170   return 0;
1171 }
1172 
1173 /* See linux-btrace.h.  */
1174 
1175 struct btrace_target_info *
1176 linux_enable_btrace (ptid_t ptid, const struct btrace_config *conf)
1177 {
1178   return NULL;
1179 }
1180 
1181 /* See linux-btrace.h.  */
1182 
1183 enum btrace_error
1184 linux_disable_btrace (struct btrace_target_info *tinfo)
1185 {
1186   return BTRACE_ERR_NOT_SUPPORTED;
1187 }
1188 
1189 /* See linux-btrace.h.  */
1190 
1191 enum btrace_error
1192 linux_read_btrace (struct btrace_data *btrace,
1193 		   struct btrace_target_info *tinfo,
1194 		   enum btrace_read_type type)
1195 {
1196   return BTRACE_ERR_NOT_SUPPORTED;
1197 }
1198 
1199 /* See linux-btrace.h.  */
1200 
1201 const struct btrace_config *
1202 linux_btrace_conf (const struct btrace_target_info *tinfo)
1203 {
1204   return NULL;
1205 }
1206 
1207 #endif /* !HAVE_LINUX_PERF_EVENT_H */
1208