xref: /netbsd-src/external/gpl3/gdb.old/dist/gdb/nat/linux-btrace.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Linux-dependent part of branch trace support for GDB, and GDBserver.
2 
3    Copyright (C) 2013-2016 Free Software Foundation, Inc.
4 
5    Contributed by Intel Corp. <markus.t.metzger@intel.com>
6 
7    This file is part of GDB.
8 
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
21 
22 #include "common-defs.h"
23 #include "linux-btrace.h"
24 #include "common-regcache.h"
25 #include "gdb_wait.h"
26 #include "x86-cpuid.h"
27 #include "filestuff.h"
28 
29 #include <inttypes.h>
30 
31 #ifdef HAVE_SYS_SYSCALL_H
32 #include <sys/syscall.h>
33 #endif
34 
35 #if HAVE_LINUX_PERF_EVENT_H && defined(SYS_perf_event_open)
36 #include <unistd.h>
37 #include <sys/mman.h>
38 #include <sys/user.h>
39 #include "nat/gdb_ptrace.h"
40 #include <sys/types.h>
41 #include <signal.h>
42 
43 /* A branch trace record in perf_event.  */
44 struct perf_event_bts
45 {
46   /* The linear address of the branch source.  */
47   uint64_t from;
48 
49   /* The linear address of the branch destination.  */
50   uint64_t to;
51 };
52 
53 /* A perf_event branch trace sample.  */
54 struct perf_event_sample
55 {
56   /* The perf_event sample header.  */
57   struct perf_event_header header;
58 
59   /* The perf_event branch tracing payload.  */
60   struct perf_event_bts bts;
61 };
62 
63 /* Identify the cpu we're running on.  */
64 static struct btrace_cpu
65 btrace_this_cpu (void)
66 {
67   struct btrace_cpu cpu;
68   unsigned int eax, ebx, ecx, edx;
69   int ok;
70 
71   memset (&cpu, 0, sizeof (cpu));
72 
73   ok = x86_cpuid (0, &eax, &ebx, &ecx, &edx);
74   if (ok != 0)
75     {
76       if (ebx == signature_INTEL_ebx && ecx == signature_INTEL_ecx
77 	  && edx == signature_INTEL_edx)
78 	{
79 	  unsigned int cpuid, ignore;
80 
81 	  ok = x86_cpuid (1, &cpuid, &ignore, &ignore, &ignore);
82 	  if (ok != 0)
83 	    {
84 	      cpu.vendor = CV_INTEL;
85 
86 	      cpu.family = (cpuid >> 8) & 0xf;
87 	      cpu.model = (cpuid >> 4) & 0xf;
88 
89 	      if (cpu.family == 0x6)
90 		cpu.model += (cpuid >> 12) & 0xf0;
91 	    }
92 	}
93     }
94 
95   return cpu;
96 }
97 
98 /* Return non-zero if there is new data in PEVENT; zero otherwise.  */
99 
100 static int
101 perf_event_new_data (const struct perf_event_buffer *pev)
102 {
103   return *pev->data_head != pev->last_head;
104 }
105 
106 /* Try to determine the size of a pointer in bits for the OS.
107 
108    This is the same as the size of a pointer for the inferior process
109    except when a 32-bit inferior is running on a 64-bit OS.  */
110 
111 /* Copy the last SIZE bytes from PEV ending at DATA_HEAD and return a pointer
112    to the memory holding the copy.
113    The caller is responsible for freeing the memory.  */
114 
115 static gdb_byte *
116 perf_event_read (const struct perf_event_buffer *pev, __u64 data_head,
117 		 size_t size)
118 {
119   const gdb_byte *begin, *end, *start, *stop;
120   gdb_byte *buffer;
121   size_t buffer_size;
122   __u64 data_tail;
123 
124   if (size == 0)
125     return NULL;
126 
127   gdb_assert (size <= data_head);
128   data_tail = data_head - size;
129 
130   buffer_size = pev->size;
131   begin = pev->mem;
132   start = begin + data_tail % buffer_size;
133   stop = begin + data_head % buffer_size;
134 
135   buffer = (gdb_byte *) xmalloc (size);
136 
137   if (start < stop)
138     memcpy (buffer, start, stop - start);
139   else
140     {
141       end = begin + buffer_size;
142 
143       memcpy (buffer, start, end - start);
144       memcpy (buffer + (end - start), begin, stop - begin);
145     }
146 
147   return buffer;
148 }
149 
150 /* Copy the perf event buffer data from PEV.
151    Store a pointer to the copy into DATA and its size in SIZE.  */
152 
153 static void
154 perf_event_read_all (struct perf_event_buffer *pev, gdb_byte **data,
155 		     size_t *psize)
156 {
157   size_t size;
158   __u64 data_head;
159 
160   data_head = *pev->data_head;
161 
162   size = pev->size;
163   if (data_head < size)
164     size = (size_t) data_head;
165 
166   *data = perf_event_read (pev, data_head, size);
167   *psize = size;
168 
169   pev->last_head = data_head;
170 }
171 
172 /* Determine the event type.
173    Returns zero on success and fills in TYPE; returns -1 otherwise.  */
174 
175 static int
176 perf_event_pt_event_type (int *type)
177 {
178   FILE *file;
179   int found;
180 
181   file = fopen ("/sys/bus/event_source/devices/intel_pt/type", "r");
182   if (file == NULL)
183     return -1;
184 
185   found = fscanf (file, "%d", type);
186 
187   fclose (file);
188 
189   if (found == 1)
190     return 0;
191   return -1;
192 }
193 
194 /* Try to determine the start address of the Linux kernel.  */
195 
196 static uint64_t
197 linux_determine_kernel_start (void)
198 {
199   static uint64_t kernel_start;
200   static int cached;
201   FILE *file;
202 
203   if (cached != 0)
204     return kernel_start;
205 
206   cached = 1;
207 
208   file = gdb_fopen_cloexec ("/proc/kallsyms", "r");
209   if (file == NULL)
210     return kernel_start;
211 
212   while (!feof (file))
213     {
214       char buffer[1024], symbol[8], *line;
215       uint64_t addr;
216       int match;
217 
218       line = fgets (buffer, sizeof (buffer), file);
219       if (line == NULL)
220 	break;
221 
222       match = sscanf (line, "%" SCNx64 " %*[tT] %7s", &addr, symbol);
223       if (match != 2)
224 	continue;
225 
226       if (strcmp (symbol, "_text") == 0)
227 	{
228 	  kernel_start = addr;
229 	  break;
230 	}
231     }
232 
233   fclose (file);
234 
235   return kernel_start;
236 }
237 
238 /* Check whether an address is in the kernel.  */
239 
240 static inline int
241 perf_event_is_kernel_addr (uint64_t addr)
242 {
243   uint64_t kernel_start;
244 
245   kernel_start = linux_determine_kernel_start ();
246   if (kernel_start != 0ull)
247     return (addr >= kernel_start);
248 
249   /* If we don't know the kernel's start address, let's check the most
250      significant bit.  This will work at least for 64-bit kernels.  */
251   return ((addr & (1ull << 63)) != 0);
252 }
253 
254 /* Check whether a perf event record should be skipped.  */
255 
256 static inline int
257 perf_event_skip_bts_record (const struct perf_event_bts *bts)
258 {
259   /* The hardware may report branches from kernel into user space.  Branches
260      from user into kernel space will be suppressed.  We filter the former to
261      provide a consistent branch trace excluding kernel.  */
262   return perf_event_is_kernel_addr (bts->from);
263 }
264 
265 /* Perform a few consistency checks on a perf event sample record.  This is
266    meant to catch cases when we get out of sync with the perf event stream.  */
267 
268 static inline int
269 perf_event_sample_ok (const struct perf_event_sample *sample)
270 {
271   if (sample->header.type != PERF_RECORD_SAMPLE)
272     return 0;
273 
274   if (sample->header.size != sizeof (*sample))
275     return 0;
276 
277   return 1;
278 }
279 
280 /* Branch trace is collected in a circular buffer [begin; end) as pairs of from
281    and to addresses (plus a header).
282 
283    Start points into that buffer at the next sample position.
284    We read the collected samples backwards from start.
285 
286    While reading the samples, we convert the information into a list of blocks.
287    For two adjacent samples s1 and s2, we form a block b such that b.begin =
288    s1.to and b.end = s2.from.
289 
290    In case the buffer overflows during sampling, one sample may have its lower
291    part at the end and its upper part at the beginning of the buffer.  */
292 
293 static VEC (btrace_block_s) *
294 perf_event_read_bts (struct btrace_target_info* tinfo, const uint8_t *begin,
295 		     const uint8_t *end, const uint8_t *start, size_t size)
296 {
297   VEC (btrace_block_s) *btrace = NULL;
298   struct perf_event_sample sample;
299   size_t read = 0;
300   struct btrace_block block = { 0, 0 };
301   struct regcache *regcache;
302 
303   gdb_assert (begin <= start);
304   gdb_assert (start <= end);
305 
306   /* The first block ends at the current pc.  */
307   regcache = get_thread_regcache_for_ptid (tinfo->ptid);
308   block.end = regcache_read_pc (regcache);
309 
310   /* The buffer may contain a partial record as its last entry (i.e. when the
311      buffer size is not a multiple of the sample size).  */
312   read = sizeof (sample) - 1;
313 
314   for (; read < size; read += sizeof (sample))
315     {
316       const struct perf_event_sample *psample;
317 
318       /* Find the next perf_event sample in a backwards traversal.  */
319       start -= sizeof (sample);
320 
321       /* If we're still inside the buffer, we're done.  */
322       if (begin <= start)
323 	psample = (const struct perf_event_sample *) start;
324       else
325 	{
326 	  int missing;
327 
328 	  /* We're to the left of the ring buffer, we will wrap around and
329 	     reappear at the very right of the ring buffer.  */
330 
331 	  missing = (begin - start);
332 	  start = (end - missing);
333 
334 	  /* If the entire sample is missing, we're done.  */
335 	  if (missing == sizeof (sample))
336 	    psample = (const struct perf_event_sample *) start;
337 	  else
338 	    {
339 	      uint8_t *stack;
340 
341 	      /* The sample wrapped around.  The lower part is at the end and
342 		 the upper part is at the beginning of the buffer.  */
343 	      stack = (uint8_t *) &sample;
344 
345 	      /* Copy the two parts so we have a contiguous sample.  */
346 	      memcpy (stack, start, missing);
347 	      memcpy (stack + missing, begin, sizeof (sample) - missing);
348 
349 	      psample = &sample;
350 	    }
351 	}
352 
353       if (!perf_event_sample_ok (psample))
354 	{
355 	  warning (_("Branch trace may be incomplete."));
356 	  break;
357 	}
358 
359       if (perf_event_skip_bts_record (&psample->bts))
360 	continue;
361 
362       /* We found a valid sample, so we can complete the current block.  */
363       block.begin = psample->bts.to;
364 
365       VEC_safe_push (btrace_block_s, btrace, &block);
366 
367       /* Start the next block.  */
368       block.end = psample->bts.from;
369     }
370 
371   /* Push the last block (i.e. the first one of inferior execution), as well.
372      We don't know where it ends, but we know where it starts.  If we're
373      reading delta trace, we can fill in the start address later on.
374      Otherwise we will prune it.  */
375   block.begin = 0;
376   VEC_safe_push (btrace_block_s, btrace, &block);
377 
378   return btrace;
379 }
380 
381 /* Check whether the kernel supports BTS.  */
382 
383 static int
384 kernel_supports_bts (void)
385 {
386   struct perf_event_attr attr;
387   pid_t child, pid;
388   int status, file;
389 
390   errno = 0;
391   child = fork ();
392   switch (child)
393     {
394     case -1:
395       warning (_("test bts: cannot fork: %s."), safe_strerror (errno));
396       return 0;
397 
398     case 0:
399       status = ptrace (PTRACE_TRACEME, 0, NULL, NULL);
400       if (status != 0)
401 	{
402 	  warning (_("test bts: cannot PTRACE_TRACEME: %s."),
403 		   safe_strerror (errno));
404 	  _exit (1);
405 	}
406 
407       status = raise (SIGTRAP);
408       if (status != 0)
409 	{
410 	  warning (_("test bts: cannot raise SIGTRAP: %s."),
411 		   safe_strerror (errno));
412 	  _exit (1);
413 	}
414 
415       _exit (1);
416 
417     default:
418       pid = waitpid (child, &status, 0);
419       if (pid != child)
420 	{
421 	  warning (_("test bts: bad pid %ld, error: %s."),
422 		   (long) pid, safe_strerror (errno));
423 	  return 0;
424 	}
425 
426       if (!WIFSTOPPED (status))
427 	{
428 	  warning (_("test bts: expected stop. status: %d."),
429 		   status);
430 	  return 0;
431 	}
432 
433       memset (&attr, 0, sizeof (attr));
434 
435       attr.type = PERF_TYPE_HARDWARE;
436       attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
437       attr.sample_period = 1;
438       attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_ADDR;
439       attr.exclude_kernel = 1;
440       attr.exclude_hv = 1;
441       attr.exclude_idle = 1;
442 
443       file = syscall (SYS_perf_event_open, &attr, child, -1, -1, 0);
444       if (file >= 0)
445 	close (file);
446 
447       kill (child, SIGKILL);
448       ptrace (PTRACE_KILL, child, NULL, NULL);
449 
450       pid = waitpid (child, &status, 0);
451       if (pid != child)
452 	{
453 	  warning (_("test bts: bad pid %ld, error: %s."),
454 		   (long) pid, safe_strerror (errno));
455 	  if (!WIFSIGNALED (status))
456 	    warning (_("test bts: expected killed. status: %d."),
457 		     status);
458 	}
459 
460       return (file >= 0);
461     }
462 }
463 
464 /* Check whether the kernel supports Intel Processor Trace.  */
465 
466 static int
467 kernel_supports_pt (void)
468 {
469   struct perf_event_attr attr;
470   pid_t child, pid;
471   int status, file, type;
472 
473   errno = 0;
474   child = fork ();
475   switch (child)
476     {
477     case -1:
478       warning (_("test pt: cannot fork: %s."), safe_strerror (errno));
479       return 0;
480 
481     case 0:
482       status = ptrace (PTRACE_TRACEME, 0, NULL, NULL);
483       if (status != 0)
484 	{
485 	  warning (_("test pt: cannot PTRACE_TRACEME: %s."),
486 		   safe_strerror (errno));
487 	  _exit (1);
488 	}
489 
490       status = raise (SIGTRAP);
491       if (status != 0)
492 	{
493 	  warning (_("test pt: cannot raise SIGTRAP: %s."),
494 		   safe_strerror (errno));
495 	  _exit (1);
496 	}
497 
498       _exit (1);
499 
500     default:
501       pid = waitpid (child, &status, 0);
502       if (pid != child)
503 	{
504 	  warning (_("test pt: bad pid %ld, error: %s."),
505 		   (long) pid, safe_strerror (errno));
506 	  return 0;
507 	}
508 
509       if (!WIFSTOPPED (status))
510 	{
511 	  warning (_("test pt: expected stop. status: %d."),
512 		   status);
513 	  return 0;
514 	}
515 
516       status = perf_event_pt_event_type (&type);
517       if (status != 0)
518 	file = -1;
519       else
520 	{
521 	  memset (&attr, 0, sizeof (attr));
522 
523 	  attr.size = sizeof (attr);
524 	  attr.type = type;
525 	  attr.exclude_kernel = 1;
526 	  attr.exclude_hv = 1;
527 	  attr.exclude_idle = 1;
528 
529 	  file = syscall (SYS_perf_event_open, &attr, child, -1, -1, 0);
530 	  if (file >= 0)
531 	    close (file);
532 	}
533 
534       kill (child, SIGKILL);
535       ptrace (PTRACE_KILL, child, NULL, NULL);
536 
537       pid = waitpid (child, &status, 0);
538       if (pid != child)
539 	{
540 	  warning (_("test pt: bad pid %ld, error: %s."),
541 		   (long) pid, safe_strerror (errno));
542 	  if (!WIFSIGNALED (status))
543 	    warning (_("test pt: expected killed. status: %d."),
544 		     status);
545 	}
546 
547       return (file >= 0);
548     }
549 }
550 
551 /* Check whether an Intel cpu supports BTS.  */
552 
553 static int
554 intel_supports_bts (const struct btrace_cpu *cpu)
555 {
556   switch (cpu->family)
557     {
558     case 0x6:
559       switch (cpu->model)
560 	{
561 	case 0x1a: /* Nehalem */
562 	case 0x1f:
563 	case 0x1e:
564 	case 0x2e:
565 	case 0x25: /* Westmere */
566 	case 0x2c:
567 	case 0x2f:
568 	case 0x2a: /* Sandy Bridge */
569 	case 0x2d:
570 	case 0x3a: /* Ivy Bridge */
571 
572 	  /* AAJ122: LBR, BTM, or BTS records may have incorrect branch
573 	     "from" information afer an EIST transition, T-states, C1E, or
574 	     Adaptive Thermal Throttling.  */
575 	  return 0;
576 	}
577     }
578 
579   return 1;
580 }
581 
582 /* Check whether the cpu supports BTS.  */
583 
584 static int
585 cpu_supports_bts (void)
586 {
587   struct btrace_cpu cpu;
588 
589   cpu = btrace_this_cpu ();
590   switch (cpu.vendor)
591     {
592     default:
593       /* Don't know about others.  Let's assume they do.  */
594       return 1;
595 
596     case CV_INTEL:
597       return intel_supports_bts (&cpu);
598     }
599 }
600 
601 /* Check whether the linux target supports BTS.  */
602 
603 static int
604 linux_supports_bts (void)
605 {
606   static int cached;
607 
608   if (cached == 0)
609     {
610       if (!kernel_supports_bts ())
611 	cached = -1;
612       else if (!cpu_supports_bts ())
613 	cached = -1;
614       else
615 	cached = 1;
616     }
617 
618   return cached > 0;
619 }
620 
621 /* Check whether the linux target supports Intel Processor Trace.  */
622 
623 static int
624 linux_supports_pt (void)
625 {
626   static int cached;
627 
628   if (cached == 0)
629     {
630       if (!kernel_supports_pt ())
631 	cached = -1;
632       else
633 	cached = 1;
634     }
635 
636   return cached > 0;
637 }
638 
639 /* See linux-btrace.h.  */
640 
641 int
642 linux_supports_btrace (struct target_ops *ops, enum btrace_format format)
643 {
644   switch (format)
645     {
646     case BTRACE_FORMAT_NONE:
647       return 0;
648 
649     case BTRACE_FORMAT_BTS:
650       return linux_supports_bts ();
651 
652     case BTRACE_FORMAT_PT:
653       return linux_supports_pt ();
654     }
655 
656   internal_error (__FILE__, __LINE__, _("Unknown branch trace format"));
657 }
658 
659 /* Enable branch tracing in BTS format.  */
660 
661 static struct btrace_target_info *
662 linux_enable_bts (ptid_t ptid, const struct btrace_config_bts *conf)
663 {
664   struct perf_event_mmap_page *header;
665   struct btrace_target_info *tinfo;
666   struct btrace_tinfo_bts *bts;
667   size_t size, pages;
668   __u64 data_offset;
669   int pid, pg;
670 
671   tinfo = XCNEW (struct btrace_target_info);
672   tinfo->ptid = ptid;
673 
674   tinfo->conf.format = BTRACE_FORMAT_BTS;
675   bts = &tinfo->variant.bts;
676 
677   bts->attr.size = sizeof (bts->attr);
678   bts->attr.type = PERF_TYPE_HARDWARE;
679   bts->attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
680   bts->attr.sample_period = 1;
681 
682   /* We sample from and to address.  */
683   bts->attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_ADDR;
684 
685   bts->attr.exclude_kernel = 1;
686   bts->attr.exclude_hv = 1;
687   bts->attr.exclude_idle = 1;
688 
689   pid = ptid_get_lwp (ptid);
690   if (pid == 0)
691     pid = ptid_get_pid (ptid);
692 
693   errno = 0;
694   bts->file = syscall (SYS_perf_event_open, &bts->attr, pid, -1, -1, 0);
695   if (bts->file < 0)
696     goto err_out;
697 
698   /* Convert the requested size in bytes to pages (rounding up).  */
699   pages = ((size_t) conf->size / PAGE_SIZE
700 	   + ((conf->size % PAGE_SIZE) == 0 ? 0 : 1));
701   /* We need at least one page.  */
702   if (pages == 0)
703     pages = 1;
704 
705   /* The buffer size can be requested in powers of two pages.  Adjust PAGES
706      to the next power of two.  */
707   for (pg = 0; pages != ((size_t) 1 << pg); ++pg)
708     if ((pages & ((size_t) 1 << pg)) != 0)
709       pages += ((size_t) 1 << pg);
710 
711   /* We try to allocate the requested size.
712      If that fails, try to get as much as we can.  */
713   for (; pages > 0; pages >>= 1)
714     {
715       size_t length;
716       __u64 data_size;
717 
718       data_size = (__u64) pages * PAGE_SIZE;
719 
720       /* Don't ask for more than we can represent in the configuration.  */
721       if ((__u64) UINT_MAX < data_size)
722 	continue;
723 
724       size = (size_t) data_size;
725       length = size + PAGE_SIZE;
726 
727       /* Check for overflows.  */
728       if ((__u64) length != data_size + PAGE_SIZE)
729 	continue;
730 
731       /* The number of pages we request needs to be a power of two.  */
732       header = ((struct perf_event_mmap_page *)
733 		mmap (NULL, length, PROT_READ, MAP_SHARED, bts->file, 0));
734       if (header != MAP_FAILED)
735 	break;
736     }
737 
738   if (pages == 0)
739     goto err_file;
740 
741   data_offset = PAGE_SIZE;
742 
743 #if defined (PERF_ATTR_SIZE_VER5)
744   if (offsetof (struct perf_event_mmap_page, data_size) <= header->size)
745     {
746       __u64 data_size;
747 
748       data_offset = header->data_offset;
749       data_size = header->data_size;
750 
751       size = (unsigned int) data_size;
752 
753       /* Check for overflows.  */
754       if ((__u64) size != data_size)
755 	{
756 	  munmap ((void *) header, size + PAGE_SIZE);
757 	  goto err_file;
758 	}
759     }
760 #endif /* defined (PERF_ATTR_SIZE_VER5) */
761 
762   bts->header = header;
763   bts->bts.mem = ((const uint8_t *) header) + data_offset;
764   bts->bts.size = size;
765   bts->bts.data_head = &header->data_head;
766   bts->bts.last_head = 0ull;
767 
768   tinfo->conf.bts.size = (unsigned int) size;
769   return tinfo;
770 
771  err_file:
772   /* We were not able to allocate any buffer.  */
773   close (bts->file);
774 
775  err_out:
776   xfree (tinfo);
777   return NULL;
778 }
779 
780 #if defined (PERF_ATTR_SIZE_VER5)
781 
782 /* Enable branch tracing in Intel Processor Trace format.  */
783 
784 static struct btrace_target_info *
785 linux_enable_pt (ptid_t ptid, const struct btrace_config_pt *conf)
786 {
787   struct perf_event_mmap_page *header;
788   struct btrace_target_info *tinfo;
789   struct btrace_tinfo_pt *pt;
790   size_t pages, size;
791   int pid, pg, errcode, type;
792 
793   if (conf->size == 0)
794     return NULL;
795 
796   errcode = perf_event_pt_event_type (&type);
797   if (errcode != 0)
798     return NULL;
799 
800   pid = ptid_get_lwp (ptid);
801   if (pid == 0)
802     pid = ptid_get_pid (ptid);
803 
804   tinfo = XCNEW (struct btrace_target_info);
805   tinfo->ptid = ptid;
806 
807   tinfo->conf.format = BTRACE_FORMAT_PT;
808   pt = &tinfo->variant.pt;
809 
810   pt->attr.size = sizeof (pt->attr);
811   pt->attr.type = type;
812 
813   pt->attr.exclude_kernel = 1;
814   pt->attr.exclude_hv = 1;
815   pt->attr.exclude_idle = 1;
816 
817   errno = 0;
818   pt->file = syscall (SYS_perf_event_open, &pt->attr, pid, -1, -1, 0);
819   if (pt->file < 0)
820     goto err;
821 
822   /* Allocate the configuration page. */
823   header = ((struct perf_event_mmap_page *)
824 	    mmap (NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
825 		  pt->file, 0));
826   if (header == MAP_FAILED)
827     goto err_file;
828 
829   header->aux_offset = header->data_offset + header->data_size;
830 
831   /* Convert the requested size in bytes to pages (rounding up).  */
832   pages = ((size_t) conf->size / PAGE_SIZE
833 	   + ((conf->size % PAGE_SIZE) == 0 ? 0 : 1));
834   /* We need at least one page.  */
835   if (pages == 0)
836     pages = 1;
837 
838   /* The buffer size can be requested in powers of two pages.  Adjust PAGES
839      to the next power of two.  */
840   for (pg = 0; pages != ((size_t) 1 << pg); ++pg)
841     if ((pages & ((size_t) 1 << pg)) != 0)
842       pages += ((size_t) 1 << pg);
843 
844   /* We try to allocate the requested size.
845      If that fails, try to get as much as we can.  */
846   for (; pages > 0; pages >>= 1)
847     {
848       size_t length;
849       __u64 data_size;
850 
851       data_size = (__u64) pages * PAGE_SIZE;
852 
853       /* Don't ask for more than we can represent in the configuration.  */
854       if ((__u64) UINT_MAX < data_size)
855 	continue;
856 
857       size = (size_t) data_size;
858 
859       /* Check for overflows.  */
860       if ((__u64) size != data_size)
861 	continue;
862 
863       header->aux_size = data_size;
864       length = size;
865 
866       pt->pt.mem = ((const uint8_t *)
867 		    mmap (NULL, length, PROT_READ, MAP_SHARED, pt->file,
868 			  header->aux_offset));
869       if (pt->pt.mem != MAP_FAILED)
870 	break;
871     }
872 
873   if (pages == 0)
874     goto err_conf;
875 
876   pt->header = header;
877   pt->pt.size = size;
878   pt->pt.data_head = &header->aux_head;
879 
880   tinfo->conf.pt.size = (unsigned int) size;
881   return tinfo;
882 
883  err_conf:
884   munmap((void *) header, PAGE_SIZE);
885 
886  err_file:
887   close (pt->file);
888 
889  err:
890   xfree (tinfo);
891   return NULL;
892 }
893 
894 #else /* !defined (PERF_ATTR_SIZE_VER5) */
895 
896 static struct btrace_target_info *
897 linux_enable_pt (ptid_t ptid, const struct btrace_config_pt *conf)
898 {
899   errno = EOPNOTSUPP;
900   return NULL;
901 }
902 
903 #endif /* !defined (PERF_ATTR_SIZE_VER5) */
904 
905 /* See linux-btrace.h.  */
906 
907 struct btrace_target_info *
908 linux_enable_btrace (ptid_t ptid, const struct btrace_config *conf)
909 {
910   struct btrace_target_info *tinfo;
911 
912   tinfo = NULL;
913   switch (conf->format)
914     {
915     case BTRACE_FORMAT_NONE:
916       break;
917 
918     case BTRACE_FORMAT_BTS:
919       tinfo = linux_enable_bts (ptid, &conf->bts);
920       break;
921 
922     case BTRACE_FORMAT_PT:
923       tinfo = linux_enable_pt (ptid, &conf->pt);
924       break;
925     }
926 
927   return tinfo;
928 }
929 
930 /* Disable BTS tracing.  */
931 
932 static enum btrace_error
933 linux_disable_bts (struct btrace_tinfo_bts *tinfo)
934 {
935   munmap((void *) tinfo->header, tinfo->bts.size + PAGE_SIZE);
936   close (tinfo->file);
937 
938   return BTRACE_ERR_NONE;
939 }
940 
941 /* Disable Intel Processor Trace tracing.  */
942 
943 static enum btrace_error
944 linux_disable_pt (struct btrace_tinfo_pt *tinfo)
945 {
946   munmap((void *) tinfo->pt.mem, tinfo->pt.size);
947   munmap((void *) tinfo->header, PAGE_SIZE);
948   close (tinfo->file);
949 
950   return BTRACE_ERR_NONE;
951 }
952 
953 /* See linux-btrace.h.  */
954 
955 enum btrace_error
956 linux_disable_btrace (struct btrace_target_info *tinfo)
957 {
958   enum btrace_error errcode;
959 
960   errcode = BTRACE_ERR_NOT_SUPPORTED;
961   switch (tinfo->conf.format)
962     {
963     case BTRACE_FORMAT_NONE:
964       break;
965 
966     case BTRACE_FORMAT_BTS:
967       errcode = linux_disable_bts (&tinfo->variant.bts);
968       break;
969 
970     case BTRACE_FORMAT_PT:
971       errcode = linux_disable_pt (&tinfo->variant.pt);
972       break;
973     }
974 
975   if (errcode == BTRACE_ERR_NONE)
976     xfree (tinfo);
977 
978   return errcode;
979 }
980 
981 /* Read branch trace data in BTS format for the thread given by TINFO into
982    BTRACE using the TYPE reading method.  */
983 
984 static enum btrace_error
985 linux_read_bts (struct btrace_data_bts *btrace,
986 		struct btrace_target_info *tinfo,
987 		enum btrace_read_type type)
988 {
989   struct perf_event_buffer *pevent;
990   const uint8_t *begin, *end, *start;
991   size_t buffer_size, size;
992   __u64 data_head, data_tail;
993   unsigned int retries = 5;
994 
995   pevent = &tinfo->variant.bts.bts;
996 
997   /* For delta reads, we return at least the partial last block containing
998      the current PC.  */
999   if (type == BTRACE_READ_NEW && !perf_event_new_data (pevent))
1000     return BTRACE_ERR_NONE;
1001 
1002   buffer_size = pevent->size;
1003   data_tail = pevent->last_head;
1004 
1005   /* We may need to retry reading the trace.  See below.  */
1006   while (retries--)
1007     {
1008       data_head = *pevent->data_head;
1009 
1010       /* Delete any leftover trace from the previous iteration.  */
1011       VEC_free (btrace_block_s, btrace->blocks);
1012 
1013       if (type == BTRACE_READ_DELTA)
1014 	{
1015 	  __u64 data_size;
1016 
1017 	  /* Determine the number of bytes to read and check for buffer
1018 	     overflows.  */
1019 
1020 	  /* Check for data head overflows.  We might be able to recover from
1021 	     those but they are very unlikely and it's not really worth the
1022 	     effort, I think.  */
1023 	  if (data_head < data_tail)
1024 	    return BTRACE_ERR_OVERFLOW;
1025 
1026 	  /* If the buffer is smaller than the trace delta, we overflowed.  */
1027 	  data_size = data_head - data_tail;
1028 	  if (buffer_size < data_size)
1029 	    return BTRACE_ERR_OVERFLOW;
1030 
1031 	  /* DATA_SIZE <= BUFFER_SIZE and therefore fits into a size_t.  */
1032 	  size = (size_t) data_size;
1033 	}
1034       else
1035 	{
1036 	  /* Read the entire buffer.  */
1037 	  size = buffer_size;
1038 
1039 	  /* Adjust the size if the buffer has not overflowed, yet.  */
1040 	  if (data_head < size)
1041 	    size = (size_t) data_head;
1042 	}
1043 
1044       /* Data_head keeps growing; the buffer itself is circular.  */
1045       begin = pevent->mem;
1046       start = begin + data_head % buffer_size;
1047 
1048       if (data_head <= buffer_size)
1049 	end = start;
1050       else
1051 	end = begin + pevent->size;
1052 
1053       btrace->blocks = perf_event_read_bts (tinfo, begin, end, start, size);
1054 
1055       /* The stopping thread notifies its ptracer before it is scheduled out.
1056 	 On multi-core systems, the debugger might therefore run while the
1057 	 kernel might be writing the last branch trace records.
1058 
1059 	 Let's check whether the data head moved while we read the trace.  */
1060       if (data_head == *pevent->data_head)
1061 	break;
1062     }
1063 
1064   pevent->last_head = data_head;
1065 
1066   /* Prune the incomplete last block (i.e. the first one of inferior execution)
1067      if we're not doing a delta read.  There is no way of filling in its zeroed
1068      BEGIN element.  */
1069   if (!VEC_empty (btrace_block_s, btrace->blocks)
1070       && type != BTRACE_READ_DELTA)
1071     VEC_pop (btrace_block_s, btrace->blocks);
1072 
1073   return BTRACE_ERR_NONE;
1074 }
1075 
1076 /* Fill in the Intel Processor Trace configuration information.  */
1077 
1078 static void
1079 linux_fill_btrace_pt_config (struct btrace_data_pt_config *conf)
1080 {
1081   conf->cpu = btrace_this_cpu ();
1082 }
1083 
1084 /* Read branch trace data in Intel Processor Trace format for the thread
1085    given by TINFO into BTRACE using the TYPE reading method.  */
1086 
1087 static enum btrace_error
1088 linux_read_pt (struct btrace_data_pt *btrace,
1089 	       struct btrace_target_info *tinfo,
1090 	       enum btrace_read_type type)
1091 {
1092   struct perf_event_buffer *pt;
1093 
1094   pt = &tinfo->variant.pt.pt;
1095 
1096   linux_fill_btrace_pt_config (&btrace->config);
1097 
1098   switch (type)
1099     {
1100     case BTRACE_READ_DELTA:
1101       /* We don't support delta reads.  The data head (i.e. aux_head) wraps
1102 	 around to stay inside the aux buffer.  */
1103       return BTRACE_ERR_NOT_SUPPORTED;
1104 
1105     case BTRACE_READ_NEW:
1106       if (!perf_event_new_data (pt))
1107 	return BTRACE_ERR_NONE;
1108 
1109       /* Fall through.  */
1110     case BTRACE_READ_ALL:
1111       perf_event_read_all (pt, &btrace->data, &btrace->size);
1112       return BTRACE_ERR_NONE;
1113     }
1114 
1115   internal_error (__FILE__, __LINE__, _("Unkown btrace read type."));
1116 }
1117 
1118 /* See linux-btrace.h.  */
1119 
1120 enum btrace_error
1121 linux_read_btrace (struct btrace_data *btrace,
1122 		   struct btrace_target_info *tinfo,
1123 		   enum btrace_read_type type)
1124 {
1125   switch (tinfo->conf.format)
1126     {
1127     case BTRACE_FORMAT_NONE:
1128       return BTRACE_ERR_NOT_SUPPORTED;
1129 
1130     case BTRACE_FORMAT_BTS:
1131       /* We read btrace in BTS format.  */
1132       btrace->format = BTRACE_FORMAT_BTS;
1133       btrace->variant.bts.blocks = NULL;
1134 
1135       return linux_read_bts (&btrace->variant.bts, tinfo, type);
1136 
1137     case BTRACE_FORMAT_PT:
1138       /* We read btrace in Intel Processor Trace format.  */
1139       btrace->format = BTRACE_FORMAT_PT;
1140       btrace->variant.pt.data = NULL;
1141       btrace->variant.pt.size = 0;
1142 
1143       return linux_read_pt (&btrace->variant.pt, tinfo, type);
1144     }
1145 
1146   internal_error (__FILE__, __LINE__, _("Unkown branch trace format."));
1147 }
1148 
1149 /* See linux-btrace.h.  */
1150 
1151 const struct btrace_config *
1152 linux_btrace_conf (const struct btrace_target_info *tinfo)
1153 {
1154   return &tinfo->conf;
1155 }
1156 
1157 #else /* !HAVE_LINUX_PERF_EVENT_H */
1158 
1159 /* See linux-btrace.h.  */
1160 
1161 int
1162 linux_supports_btrace (struct target_ops *ops, enum btrace_format format)
1163 {
1164   return 0;
1165 }
1166 
1167 /* See linux-btrace.h.  */
1168 
1169 struct btrace_target_info *
1170 linux_enable_btrace (ptid_t ptid, const struct btrace_config *conf)
1171 {
1172   return NULL;
1173 }
1174 
1175 /* See linux-btrace.h.  */
1176 
1177 enum btrace_error
1178 linux_disable_btrace (struct btrace_target_info *tinfo)
1179 {
1180   return BTRACE_ERR_NOT_SUPPORTED;
1181 }
1182 
1183 /* See linux-btrace.h.  */
1184 
1185 enum btrace_error
1186 linux_read_btrace (struct btrace_data *btrace,
1187 		   struct btrace_target_info *tinfo,
1188 		   enum btrace_read_type type)
1189 {
1190   return BTRACE_ERR_NOT_SUPPORTED;
1191 }
1192 
1193 /* See linux-btrace.h.  */
1194 
1195 const struct btrace_config *
1196 linux_btrace_conf (const struct btrace_target_info *tinfo)
1197 {
1198   return NULL;
1199 }
1200 
1201 #endif /* !HAVE_LINUX_PERF_EVENT_H */
1202