xref: /netbsd-src/external/gpl3/gdb/dist/gdb/nat/linux-btrace.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1 /* Linux-dependent part of branch trace support for GDB, and GDBserver.
2 
3    Copyright (C) 2013-2024 Free Software Foundation, Inc.
4 
5    Contributed by Intel Corp. <markus.t.metzger@intel.com>
6 
7    This file is part of GDB.
8 
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
21 
22 #include "linux-btrace.h"
23 #include "gdbsupport/common-regcache.h"
24 #include "gdbsupport/gdb_wait.h"
25 #include "x86-cpuid.h"
26 #include "gdbsupport/filestuff.h"
27 #include "gdbsupport/scoped_fd.h"
28 #include "gdbsupport/scoped_mmap.h"
29 
30 #include <inttypes.h>
31 
32 #include <sys/syscall.h>
33 
34 #if HAVE_LINUX_PERF_EVENT_H && defined(SYS_perf_event_open)
35 #include <unistd.h>
36 #include <sys/mman.h>
37 #include <sys/user.h>
38 #include "nat/gdb_ptrace.h"
39 #include <sys/types.h>
40 #include <signal.h>
41 
42 /* A branch trace record in perf_event.  */
43 struct perf_event_bts
44 {
45   /* The linear address of the branch source.  */
46   uint64_t from;
47 
48   /* The linear address of the branch destination.  */
49   uint64_t to;
50 };
51 
52 /* A perf_event branch trace sample.  */
53 struct perf_event_sample
54 {
55   /* The perf_event sample header.  */
56   struct perf_event_header header;
57 
58   /* The perf_event branch tracing payload.  */
59   struct perf_event_bts bts;
60 };
61 
62 /* Identify the cpu we're running on.  */
63 static struct btrace_cpu
64 btrace_this_cpu (void)
65 {
66   struct btrace_cpu cpu;
67   unsigned int eax, ebx, ecx, edx;
68   int ok;
69 
70   memset (&cpu, 0, sizeof (cpu));
71 
72   ok = x86_cpuid (0, &eax, &ebx, &ecx, &edx);
73   if (ok != 0)
74     {
75       if (ebx == signature_INTEL_ebx && ecx == signature_INTEL_ecx
76 	  && edx == signature_INTEL_edx)
77 	{
78 	  unsigned int cpuid, ignore;
79 
80 	  ok = x86_cpuid (1, &cpuid, &ignore, &ignore, &ignore);
81 	  if (ok != 0)
82 	    {
83 	      cpu.vendor = CV_INTEL;
84 
85 	      cpu.family = (cpuid >> 8) & 0xf;
86 	      if (cpu.family == 0xf)
87 		cpu.family += (cpuid >> 20) & 0xff;
88 
89 	      cpu.model = (cpuid >> 4) & 0xf;
90 	      if ((cpu.family == 0x6) || ((cpu.family & 0xf) == 0xf))
91 		cpu.model += (cpuid >> 12) & 0xf0;
92 	    }
93 	}
94       else if (ebx == signature_AMD_ebx && ecx == signature_AMD_ecx
95 	       && edx == signature_AMD_edx)
96 	cpu.vendor = CV_AMD;
97     }
98 
99   return cpu;
100 }
101 
102 /* Return non-zero if there is new data in PEVENT; zero otherwise.  */
103 
104 static int
105 perf_event_new_data (const struct perf_event_buffer *pev)
106 {
107   return *pev->data_head != pev->last_head;
108 }
109 
110 /* Copy the last SIZE bytes from PEV ending at DATA_HEAD and return a pointer
111    to the memory holding the copy.
112    The caller is responsible for freeing the memory.  */
113 
114 static gdb_byte *
115 perf_event_read (const struct perf_event_buffer *pev, __u64 data_head,
116 		 size_t size)
117 {
118   const gdb_byte *begin, *end, *start, *stop;
119   gdb_byte *buffer;
120   size_t buffer_size;
121   __u64 data_tail;
122 
123   if (size == 0)
124     return NULL;
125 
126   /* We should never ask for more data than the buffer can hold.  */
127   buffer_size = pev->size;
128   gdb_assert (size <= buffer_size);
129 
130   /* If we ask for more data than we seem to have, we wrap around and read
131      data from the end of the buffer.  This is already handled by the %
132      BUFFER_SIZE operation, below.  Here, we just need to make sure that we
133      don't underflow.
134 
135      Note that this is perfectly OK for perf event buffers where data_head
136      doesn'grow indefinitely and instead wraps around to remain within the
137      buffer's boundaries.  */
138   if (data_head < size)
139     data_head += buffer_size;
140 
141   gdb_assert (size <= data_head);
142   data_tail = data_head - size;
143 
144   begin = pev->mem;
145   start = begin + data_tail % buffer_size;
146   stop = begin + data_head % buffer_size;
147 
148   buffer = (gdb_byte *) xmalloc (size);
149 
150   if (start < stop)
151     memcpy (buffer, start, stop - start);
152   else
153     {
154       end = begin + buffer_size;
155 
156       memcpy (buffer, start, end - start);
157       memcpy (buffer + (end - start), begin, stop - begin);
158     }
159 
160   return buffer;
161 }
162 
163 /* Copy the perf event buffer data from PEV.
164    Store a pointer to the copy into DATA and its size in SIZE.  */
165 
166 static void
167 perf_event_read_all (struct perf_event_buffer *pev, gdb_byte **data,
168 		     size_t *psize)
169 {
170   size_t size;
171   __u64 data_head;
172 
173   data_head = *pev->data_head;
174   size = pev->size;
175 
176   *data = perf_event_read (pev, data_head, size);
177   *psize = size;
178 
179   pev->last_head = data_head;
180 }
181 
182 /* Try to determine the start address of the Linux kernel.  */
183 
184 static uint64_t
185 linux_determine_kernel_start (void)
186 {
187   static uint64_t kernel_start;
188   static int cached;
189 
190   if (cached != 0)
191     return kernel_start;
192 
193   cached = 1;
194 
195   gdb_file_up file = gdb_fopen_cloexec ("/proc/kallsyms", "r");
196   if (file == NULL)
197     return kernel_start;
198 
199   while (!feof (file.get ()))
200     {
201       char buffer[1024], symbol[8], *line;
202       uint64_t addr;
203       int match;
204 
205       line = fgets (buffer, sizeof (buffer), file.get ());
206       if (line == NULL)
207 	break;
208 
209       match = sscanf (line, "%" SCNx64 " %*[tT] %7s", &addr, symbol);
210       if (match != 2)
211 	continue;
212 
213       if (strcmp (symbol, "_text") == 0)
214 	{
215 	  kernel_start = addr;
216 	  break;
217 	}
218     }
219 
220   return kernel_start;
221 }
222 
223 /* Check whether an address is in the kernel.  */
224 
225 static inline int
226 perf_event_is_kernel_addr (uint64_t addr)
227 {
228   uint64_t kernel_start;
229 
230   kernel_start = linux_determine_kernel_start ();
231   if (kernel_start != 0ull)
232     return (addr >= kernel_start);
233 
234   /* If we don't know the kernel's start address, let's check the most
235      significant bit.  This will work at least for 64-bit kernels.  */
236   return ((addr & (1ull << 63)) != 0);
237 }
238 
239 /* Check whether a perf event record should be skipped.  */
240 
241 static inline int
242 perf_event_skip_bts_record (const struct perf_event_bts *bts)
243 {
244   /* The hardware may report branches from kernel into user space.  Branches
245      from user into kernel space will be suppressed.  We filter the former to
246      provide a consistent branch trace excluding kernel.  */
247   return perf_event_is_kernel_addr (bts->from);
248 }
249 
250 /* Perform a few consistency checks on a perf event sample record.  This is
251    meant to catch cases when we get out of sync with the perf event stream.  */
252 
253 static inline int
254 perf_event_sample_ok (const struct perf_event_sample *sample)
255 {
256   if (sample->header.type != PERF_RECORD_SAMPLE)
257     return 0;
258 
259   if (sample->header.size != sizeof (*sample))
260     return 0;
261 
262   return 1;
263 }
264 
265 /* Branch trace is collected in a circular buffer [begin; end) as pairs of from
266    and to addresses (plus a header).
267 
268    Start points into that buffer at the next sample position.
269    We read the collected samples backwards from start.
270 
271    While reading the samples, we convert the information into a list of blocks.
272    For two adjacent samples s1 and s2, we form a block b such that b.begin =
273    s1.to and b.end = s2.from.
274 
275    In case the buffer overflows during sampling, one sample may have its lower
276    part at the end and its upper part at the beginning of the buffer.  */
277 
278 static std::vector<btrace_block> *
279 perf_event_read_bts (btrace_target_info *tinfo, const uint8_t *begin,
280 		     const uint8_t *end, const uint8_t *start, size_t size)
281 {
282   std::vector<btrace_block> *btrace = new std::vector<btrace_block>;
283   struct perf_event_sample sample;
284   size_t read = 0;
285   struct btrace_block block = { 0, 0 };
286 
287   gdb_assert (begin <= start);
288   gdb_assert (start <= end);
289 
290   /* The first block ends at the current pc.  */
291   reg_buffer_common *regcache = get_thread_regcache_for_ptid (tinfo->ptid);
292   block.end = regcache_read_pc (regcache);
293 
294   /* The buffer may contain a partial record as its last entry (i.e. when the
295      buffer size is not a multiple of the sample size).  */
296   read = sizeof (sample) - 1;
297 
298   for (; read < size; read += sizeof (sample))
299     {
300       const struct perf_event_sample *psample;
301 
302       /* Find the next perf_event sample in a backwards traversal.  */
303       start -= sizeof (sample);
304 
305       /* If we're still inside the buffer, we're done.  */
306       if (begin <= start)
307 	psample = (const struct perf_event_sample *) start;
308       else
309 	{
310 	  int missing;
311 
312 	  /* We're to the left of the ring buffer, we will wrap around and
313 	     reappear at the very right of the ring buffer.  */
314 
315 	  missing = (begin - start);
316 	  start = (end - missing);
317 
318 	  /* If the entire sample is missing, we're done.  */
319 	  if (missing == sizeof (sample))
320 	    psample = (const struct perf_event_sample *) start;
321 	  else
322 	    {
323 	      uint8_t *stack;
324 
325 	      /* The sample wrapped around.  The lower part is at the end and
326 		 the upper part is at the beginning of the buffer.  */
327 	      stack = (uint8_t *) &sample;
328 
329 	      /* Copy the two parts so we have a contiguous sample.  */
330 	      memcpy (stack, start, missing);
331 	      memcpy (stack + missing, begin, sizeof (sample) - missing);
332 
333 	      psample = &sample;
334 	    }
335 	}
336 
337       if (!perf_event_sample_ok (psample))
338 	{
339 	  warning (_("Branch trace may be incomplete."));
340 	  break;
341 	}
342 
343       if (perf_event_skip_bts_record (&psample->bts))
344 	continue;
345 
346       /* We found a valid sample, so we can complete the current block.  */
347       block.begin = psample->bts.to;
348 
349       btrace->push_back (block);
350 
351       /* Start the next block.  */
352       block.end = psample->bts.from;
353     }
354 
355   /* Push the last block (i.e. the first one of inferior execution), as well.
356      We don't know where it ends, but we know where it starts.  If we're
357      reading delta trace, we can fill in the start address later on.
358      Otherwise we will prune it.  */
359   block.begin = 0;
360   btrace->push_back (block);
361 
362   return btrace;
363 }
364 
365 /* Check whether an Intel cpu supports BTS.  */
366 
367 static int
368 intel_supports_bts (const struct btrace_cpu *cpu)
369 {
370   switch (cpu->family)
371     {
372     case 0x6:
373       switch (cpu->model)
374 	{
375 	case 0x1a: /* Nehalem */
376 	case 0x1f:
377 	case 0x1e:
378 	case 0x2e:
379 	case 0x25: /* Westmere */
380 	case 0x2c:
381 	case 0x2f:
382 	case 0x2a: /* Sandy Bridge */
383 	case 0x2d:
384 	case 0x3a: /* Ivy Bridge */
385 
386 	  /* AAJ122: LBR, BTM, or BTS records may have incorrect branch
387 	     "from" information afer an EIST transition, T-states, C1E, or
388 	     Adaptive Thermal Throttling.  */
389 	  return 0;
390 	}
391     }
392 
393   return 1;
394 }
395 
396 /* Check whether the cpu supports BTS.  */
397 
398 static int
399 cpu_supports_bts (void)
400 {
401   struct btrace_cpu cpu;
402 
403   cpu = btrace_this_cpu ();
404   switch (cpu.vendor)
405     {
406     default:
407       /* Don't know about others.  Let's assume they do.  */
408       return 1;
409 
410     case CV_INTEL:
411       return intel_supports_bts (&cpu);
412 
413     case CV_AMD:
414       return 0;
415     }
416 }
417 
418 /* The perf_event_open syscall failed.  Try to print a helpful error
419    message.  */
420 
421 static void
422 diagnose_perf_event_open_fail ()
423 {
424   int orig_errno = errno;
425   switch (orig_errno)
426     {
427     case EPERM:
428     case EACCES:
429       {
430 	static const char filename[] = "/proc/sys/kernel/perf_event_paranoid";
431 	errno = 0;
432 	gdb_file_up file = gdb_fopen_cloexec (filename, "r");
433 	if (file.get () == nullptr)
434 	  error (_("Failed to open %s (%s).  Your system does not support "
435 		   "process recording."), filename, safe_strerror (errno));
436 
437 	int level, found = fscanf (file.get (), "%d", &level);
438 	if (found == 1 && level > 2)
439 	  error (_("You do not have permission to record the process.  "
440 		   "Try setting %s to 2 or less."), filename);
441       }
442 
443       break;
444     }
445 
446   error (_("Failed to start recording: %s"), safe_strerror (orig_errno));
447 }
448 
449 /* Get the linux version of a btrace_target_info.  */
450 
451 static linux_btrace_target_info *
452 get_linux_btrace_target_info (btrace_target_info *gtinfo)
453 {
454   return gdb::checked_static_cast<linux_btrace_target_info *> (gtinfo);
455 }
456 
457 /* Enable branch tracing in BTS format.  */
458 
459 static struct btrace_target_info *
460 linux_enable_bts (ptid_t ptid, const struct btrace_config_bts *conf)
461 {
462   size_t size, pages;
463   __u64 data_offset;
464   int pid, pg;
465 
466   if (!cpu_supports_bts ())
467     error (_("BTS support has been disabled for the target cpu."));
468 
469   std::unique_ptr<linux_btrace_target_info> tinfo
470     { std::make_unique<linux_btrace_target_info> (ptid) };
471 
472   tinfo->conf.format = BTRACE_FORMAT_BTS;
473 
474   tinfo->attr.size = sizeof (tinfo->attr);
475   tinfo->attr.type = PERF_TYPE_HARDWARE;
476   tinfo->attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
477   tinfo->attr.sample_period = 1;
478 
479   /* We sample from and to address.  */
480   tinfo->attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_ADDR;
481 
482   tinfo->attr.exclude_kernel = 1;
483   tinfo->attr.exclude_hv = 1;
484   tinfo->attr.exclude_idle = 1;
485 
486   pid = ptid.lwp ();
487   if (pid == 0)
488     pid = ptid.pid ();
489 
490   errno = 0;
491   scoped_fd fd (syscall (SYS_perf_event_open, &tinfo->attr, pid, -1, -1, 0));
492   if (fd.get () < 0)
493     diagnose_perf_event_open_fail ();
494 
495   /* Convert the requested size in bytes to pages (rounding up).  */
496   pages = ((size_t) conf->size / PAGE_SIZE
497 	   + ((conf->size % PAGE_SIZE) == 0 ? 0 : 1));
498   /* We need at least one page.  */
499   if (pages == 0)
500     pages = 1;
501 
502   /* The buffer size can be requested in powers of two pages.  Adjust PAGES
503      to the next power of two.  */
504   for (pg = 0; pages != ((size_t) 1 << pg); ++pg)
505     if ((pages & ((size_t) 1 << pg)) != 0)
506       pages += ((size_t) 1 << pg);
507 
508   /* We try to allocate the requested size.
509      If that fails, try to get as much as we can.  */
510   scoped_mmap data;
511   for (; pages > 0; pages >>= 1)
512     {
513       size_t length;
514       __u64 data_size;
515 
516       data_size = (__u64) pages * PAGE_SIZE;
517 
518       /* Don't ask for more than we can represent in the configuration.  */
519       if ((__u64) UINT_MAX < data_size)
520 	continue;
521 
522       size = (size_t) data_size;
523       length = size + PAGE_SIZE;
524 
525       /* Check for overflows.  */
526       if ((__u64) length != data_size + PAGE_SIZE)
527 	continue;
528 
529       errno = 0;
530       /* The number of pages we request needs to be a power of two.  */
531       data.reset (nullptr, length, PROT_READ, MAP_SHARED, fd.get (), 0);
532       if (data.get () != MAP_FAILED)
533 	break;
534     }
535 
536   if (pages == 0)
537     error (_("Failed to map trace buffer: %s."), safe_strerror (errno));
538 
539   struct perf_event_mmap_page *header = (struct perf_event_mmap_page *)
540     data.get ();
541   data_offset = PAGE_SIZE;
542 
543 #if defined (PERF_ATTR_SIZE_VER5)
544   if (offsetof (struct perf_event_mmap_page, data_size) <= header->size)
545     {
546       __u64 data_size;
547 
548       data_offset = header->data_offset;
549       data_size = header->data_size;
550 
551       size = (unsigned int) data_size;
552 
553       /* Check for overflows.  */
554       if ((__u64) size != data_size)
555 	error (_("Failed to determine trace buffer size."));
556     }
557 #endif /* defined (PERF_ATTR_SIZE_VER5) */
558 
559   tinfo->pev.size = size;
560   tinfo->pev.data_head = &header->data_head;
561   tinfo->pev.mem = (const uint8_t *) data.release () + data_offset;
562   tinfo->pev.last_head = 0ull;
563   tinfo->header = header;
564   tinfo->file = fd.release ();
565 
566   tinfo->conf.bts.size = (unsigned int) size;
567   return tinfo.release ();
568 }
569 
570 #if defined (PERF_ATTR_SIZE_VER5)
571 
572 /* Determine the event type.  */
573 
574 static int
575 perf_event_pt_event_type ()
576 {
577   static const char filename[] = "/sys/bus/event_source/devices/intel_pt/type";
578 
579   errno = 0;
580   gdb_file_up file = gdb_fopen_cloexec (filename, "r");
581   if (file.get () == nullptr)
582     switch (errno)
583       {
584       case EACCES:
585       case EFAULT:
586       case EPERM:
587 	error (_("Failed to open %s (%s).  You do not have permission "
588 		 "to use Intel PT."), filename, safe_strerror (errno));
589 
590       case ENOTDIR:
591       case ENOENT:
592 	error (_("Failed to open %s (%s).  Your system does not support "
593 		 "Intel PT."), filename, safe_strerror (errno));
594 
595       default:
596 	error (_("Failed to open %s: %s."), filename, safe_strerror (errno));
597       }
598 
599   int type, found = fscanf (file.get (), "%d", &type);
600   if (found != 1)
601     error (_("Failed to read the PT event type from %s."), filename);
602 
603   return type;
604 }
605 
606 /* Enable branch tracing in Intel Processor Trace format.  */
607 
608 static struct btrace_target_info *
609 linux_enable_pt (ptid_t ptid, const struct btrace_config_pt *conf)
610 {
611   size_t pages;
612   int pid, pg;
613 
614   pid = ptid.lwp ();
615   if (pid == 0)
616     pid = ptid.pid ();
617 
618   std::unique_ptr<linux_btrace_target_info> tinfo
619     { std::make_unique<linux_btrace_target_info> (ptid) };
620 
621   tinfo->conf.format = BTRACE_FORMAT_PT;
622 
623   tinfo->attr.size = sizeof (tinfo->attr);
624   tinfo->attr.type = perf_event_pt_event_type ();
625 
626   tinfo->attr.exclude_kernel = 1;
627   tinfo->attr.exclude_hv = 1;
628   tinfo->attr.exclude_idle = 1;
629 
630   errno = 0;
631   scoped_fd fd (syscall (SYS_perf_event_open, &tinfo->attr, pid, -1, -1, 0));
632   if (fd.get () < 0)
633     diagnose_perf_event_open_fail ();
634 
635   /* Allocate the configuration page. */
636   scoped_mmap data (nullptr, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
637 		    fd.get (), 0);
638   if (data.get () == MAP_FAILED)
639     error (_("Failed to map trace user page: %s."), safe_strerror (errno));
640 
641   struct perf_event_mmap_page *header = (struct perf_event_mmap_page *)
642     data.get ();
643 
644   header->aux_offset = header->data_offset + header->data_size;
645 
646   /* Convert the requested size in bytes to pages (rounding up).  */
647   pages = ((size_t) conf->size / PAGE_SIZE
648 	   + ((conf->size % PAGE_SIZE) == 0 ? 0 : 1));
649   /* We need at least one page.  */
650   if (pages == 0)
651     pages = 1;
652 
653   /* The buffer size can be requested in powers of two pages.  Adjust PAGES
654      to the next power of two.  */
655   for (pg = 0; pages != ((size_t) 1 << pg); ++pg)
656     if ((pages & ((size_t) 1 << pg)) != 0)
657       pages += ((size_t) 1 << pg);
658 
659   /* We try to allocate the requested size.
660      If that fails, try to get as much as we can.  */
661   scoped_mmap aux;
662   for (; pages > 0; pages >>= 1)
663     {
664       size_t length;
665       __u64 data_size;
666 
667       data_size = (__u64) pages * PAGE_SIZE;
668 
669       /* Don't ask for more than we can represent in the configuration.  */
670       if ((__u64) UINT_MAX < data_size)
671 	continue;
672 
673       length = (size_t) data_size;
674 
675       /* Check for overflows.  */
676       if ((__u64) length != data_size)
677 	continue;
678 
679       header->aux_size = data_size;
680 
681       errno = 0;
682       aux.reset (nullptr, length, PROT_READ, MAP_SHARED, fd.get (),
683 		 header->aux_offset);
684       if (aux.get () != MAP_FAILED)
685 	break;
686     }
687 
688   if (pages == 0)
689     error (_("Failed to map trace buffer: %s."), safe_strerror (errno));
690 
691   tinfo->pev.size = aux.size ();
692   tinfo->pev.mem = (const uint8_t *) aux.release ();
693   tinfo->pev.data_head = &header->aux_head;
694   tinfo->header = (struct perf_event_mmap_page *) data.release ();
695   gdb_assert (tinfo->header == header);
696   tinfo->file = fd.release ();
697 
698   tinfo->conf.pt.size = (unsigned int) tinfo->pev.size;
699   return tinfo.release ();
700 }
701 
702 #else /* !defined (PERF_ATTR_SIZE_VER5) */
703 
704 static struct btrace_target_info *
705 linux_enable_pt (ptid_t ptid, const struct btrace_config_pt *conf)
706 {
707   error (_("Intel Processor Trace support was disabled at compile time."));
708 }
709 
710 #endif /* !defined (PERF_ATTR_SIZE_VER5) */
711 
712 /* See linux-btrace.h.  */
713 
714 struct btrace_target_info *
715 linux_enable_btrace (ptid_t ptid, const struct btrace_config *conf)
716 {
717   switch (conf->format)
718     {
719     case BTRACE_FORMAT_NONE:
720       error (_("Bad branch trace format."));
721 
722     default:
723       error (_("Unknown branch trace format."));
724 
725     case BTRACE_FORMAT_BTS:
726       return linux_enable_bts (ptid, &conf->bts);
727 
728     case BTRACE_FORMAT_PT:
729       return linux_enable_pt (ptid, &conf->pt);
730     }
731 }
732 
733 /* Disable BTS tracing.  */
734 
735 static void
736 linux_disable_bts (struct linux_btrace_target_info *tinfo)
737 {
738   munmap ((void *) tinfo->header, tinfo->pev.size + PAGE_SIZE);
739   close (tinfo->file);
740 }
741 
742 /* Disable Intel Processor Trace tracing.  */
743 
744 static void
745 linux_disable_pt (struct linux_btrace_target_info *tinfo)
746 {
747   munmap ((void *) tinfo->pev.mem, tinfo->pev.size);
748   munmap ((void *) tinfo->header, PAGE_SIZE);
749   close (tinfo->file);
750 }
751 
752 /* See linux-btrace.h.  */
753 
754 enum btrace_error
755 linux_disable_btrace (struct btrace_target_info *gtinfo)
756 {
757   linux_btrace_target_info *tinfo
758     = get_linux_btrace_target_info (gtinfo);
759 
760   switch (tinfo->conf.format)
761     {
762     case BTRACE_FORMAT_NONE:
763       return BTRACE_ERR_NOT_SUPPORTED;
764 
765     case BTRACE_FORMAT_BTS:
766       linux_disable_bts (tinfo);
767       delete tinfo;
768       return BTRACE_ERR_NONE;
769 
770     case BTRACE_FORMAT_PT:
771       linux_disable_pt (tinfo);
772       delete tinfo;
773       return BTRACE_ERR_NONE;
774     }
775 
776   return BTRACE_ERR_NOT_SUPPORTED;
777 }
778 
779 /* Read branch trace data in BTS format for the thread given by TINFO into
780    BTRACE using the TYPE reading method.  */
781 
782 static enum btrace_error
783 linux_read_bts (btrace_data_bts *btrace, linux_btrace_target_info *tinfo,
784 		enum btrace_read_type type)
785 {
786   const uint8_t *begin, *end, *start;
787   size_t buffer_size, size;
788   __u64 data_head = 0, data_tail;
789   unsigned int retries = 5;
790 
791   /* For delta reads, we return at least the partial last block containing
792      the current PC.  */
793   if (type == BTRACE_READ_NEW && !perf_event_new_data (&tinfo->pev))
794     return BTRACE_ERR_NONE;
795 
796   buffer_size = tinfo->pev.size;
797   data_tail = tinfo->pev.last_head;
798 
799   /* We may need to retry reading the trace.  See below.  */
800   while (retries--)
801     {
802       data_head = *tinfo->pev.data_head;
803 
804       /* Delete any leftover trace from the previous iteration.  */
805       delete btrace->blocks;
806       btrace->blocks = nullptr;
807 
808       if (type == BTRACE_READ_DELTA)
809 	{
810 	  __u64 data_size;
811 
812 	  /* Determine the number of bytes to read and check for buffer
813 	     overflows.  */
814 
815 	  /* Check for data head overflows.  We might be able to recover from
816 	     those but they are very unlikely and it's not really worth the
817 	     effort, I think.  */
818 	  if (data_head < data_tail)
819 	    return BTRACE_ERR_OVERFLOW;
820 
821 	  /* If the buffer is smaller than the trace delta, we overflowed.  */
822 	  data_size = data_head - data_tail;
823 	  if (buffer_size < data_size)
824 	    return BTRACE_ERR_OVERFLOW;
825 
826 	  /* DATA_SIZE <= BUFFER_SIZE and therefore fits into a size_t.  */
827 	  size = (size_t) data_size;
828 	}
829       else
830 	{
831 	  /* Read the entire buffer.  */
832 	  size = buffer_size;
833 
834 	  /* Adjust the size if the buffer has not overflowed, yet.  */
835 	  if (data_head < size)
836 	    size = (size_t) data_head;
837 	}
838 
839       /* Data_head keeps growing; the buffer itself is circular.  */
840       begin = tinfo->pev.mem;
841       start = begin + data_head % buffer_size;
842 
843       if (data_head <= buffer_size)
844 	end = start;
845       else
846 	end = begin + tinfo->pev.size;
847 
848       btrace->blocks = perf_event_read_bts (tinfo, begin, end, start, size);
849 
850       /* The stopping thread notifies its ptracer before it is scheduled out.
851 	 On multi-core systems, the debugger might therefore run while the
852 	 kernel might be writing the last branch trace records.
853 
854 	 Let's check whether the data head moved while we read the trace.  */
855       if (data_head == *tinfo->pev.data_head)
856 	break;
857     }
858 
859   tinfo->pev.last_head = data_head;
860 
861   /* Prune the incomplete last block (i.e. the first one of inferior execution)
862      if we're not doing a delta read.  There is no way of filling in its zeroed
863      BEGIN element.  */
864   if (!btrace->blocks->empty () && type != BTRACE_READ_DELTA)
865     btrace->blocks->pop_back ();
866 
867   return BTRACE_ERR_NONE;
868 }
869 
870 /* Fill in the Intel Processor Trace configuration information.  */
871 
872 static void
873 linux_fill_btrace_pt_config (struct btrace_data_pt_config *conf)
874 {
875   conf->cpu = btrace_this_cpu ();
876 }
877 
878 /* Read branch trace data in Intel Processor Trace format for the thread
879    given by TINFO into BTRACE using the TYPE reading method.  */
880 
881 static enum btrace_error
882 linux_read_pt (btrace_data_pt *btrace, linux_btrace_target_info *tinfo,
883 	       enum btrace_read_type type)
884 {
885   linux_fill_btrace_pt_config (&btrace->config);
886 
887   switch (type)
888     {
889     case BTRACE_READ_DELTA:
890       /* We don't support delta reads.  The data head (i.e. aux_head) wraps
891 	 around to stay inside the aux buffer.  */
892       return BTRACE_ERR_NOT_SUPPORTED;
893 
894     case BTRACE_READ_NEW:
895       if (!perf_event_new_data (&tinfo->pev))
896 	return BTRACE_ERR_NONE;
897       [[fallthrough]];
898     case BTRACE_READ_ALL:
899       perf_event_read_all (&tinfo->pev, &btrace->data, &btrace->size);
900       return BTRACE_ERR_NONE;
901     }
902 
903   internal_error (_("Unknown btrace read type."));
904 }
905 
906 /* See linux-btrace.h.  */
907 
908 enum btrace_error
909 linux_read_btrace (struct btrace_data *btrace,
910 		   struct btrace_target_info *gtinfo,
911 		   enum btrace_read_type type)
912 {
913   linux_btrace_target_info *tinfo
914     = get_linux_btrace_target_info (gtinfo);
915 
916   switch (tinfo->conf.format)
917     {
918     case BTRACE_FORMAT_NONE:
919       return BTRACE_ERR_NOT_SUPPORTED;
920 
921     case BTRACE_FORMAT_BTS:
922       /* We read btrace in BTS format.  */
923       btrace->format = BTRACE_FORMAT_BTS;
924       btrace->variant.bts.blocks = NULL;
925 
926       return linux_read_bts (&btrace->variant.bts, tinfo, type);
927 
928     case BTRACE_FORMAT_PT:
929       /* We read btrace in Intel Processor Trace format.  */
930       btrace->format = BTRACE_FORMAT_PT;
931       btrace->variant.pt.data = NULL;
932       btrace->variant.pt.size = 0;
933 
934       return linux_read_pt (&btrace->variant.pt, tinfo, type);
935     }
936 
937   internal_error (_("Unkown branch trace format."));
938 }
939 
940 /* See linux-btrace.h.  */
941 
942 const struct btrace_config *
943 linux_btrace_conf (const struct btrace_target_info *tinfo)
944 {
945   return &tinfo->conf;
946 }
947 
948 #else /* !HAVE_LINUX_PERF_EVENT_H */
949 
950 /* See linux-btrace.h.  */
951 
952 struct btrace_target_info *
953 linux_enable_btrace (ptid_t ptid, const struct btrace_config *conf)
954 {
955   return NULL;
956 }
957 
958 /* See linux-btrace.h.  */
959 
960 enum btrace_error
961 linux_disable_btrace (struct btrace_target_info *tinfo)
962 {
963   return BTRACE_ERR_NOT_SUPPORTED;
964 }
965 
966 /* See linux-btrace.h.  */
967 
968 enum btrace_error
969 linux_read_btrace (struct btrace_data *btrace,
970 		   struct btrace_target_info *tinfo,
971 		   enum btrace_read_type type)
972 {
973   return BTRACE_ERR_NOT_SUPPORTED;
974 }
975 
976 /* See linux-btrace.h.  */
977 
978 const struct btrace_config *
979 linux_btrace_conf (const struct btrace_target_info *tinfo)
980 {
981   return NULL;
982 }
983 
984 #endif /* !HAVE_LINUX_PERF_EVENT_H */
985