1 //===-- Perf.cpp ----------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Perf.h" 10 11 #include "Plugins/Process/POSIX/ProcessPOSIXLog.h" 12 #include "lldb/Host/linux/Support.h" 13 #include "llvm/Support/FormatVariadic.h" 14 #include "llvm/Support/MathExtras.h" 15 #include "llvm/Support/MemoryBuffer.h" 16 #include <linux/version.h> 17 #include <sys/ioctl.h> 18 #include <sys/mman.h> 19 #include <sys/syscall.h> 20 #include <unistd.h> 21 22 using namespace lldb_private; 23 using namespace process_linux; 24 using namespace llvm; 25 26 Expected<LinuxPerfZeroTscConversion> 27 lldb_private::process_linux::LoadPerfTscConversionParameters() { 28 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0) 29 lldb::pid_t pid = getpid(); 30 perf_event_attr attr; 31 memset(&attr, 0, sizeof(attr)); 32 attr.size = sizeof(attr); 33 attr.type = PERF_TYPE_SOFTWARE; 34 attr.config = PERF_COUNT_SW_DUMMY; 35 36 Expected<PerfEvent> perf_event = PerfEvent::Init(attr, pid); 37 if (!perf_event) 38 return perf_event.takeError(); 39 if (Error mmap_err = 40 perf_event->MmapMetadataAndBuffers(/*num_data_pages=*/0, 41 /*num_aux_pages=*/0, 42 /*data_buffer_write=*/false)) 43 return std::move(mmap_err); 44 45 perf_event_mmap_page &mmap_metada = perf_event->GetMetadataPage(); 46 if (mmap_metada.cap_user_time && mmap_metada.cap_user_time_zero) { 47 return LinuxPerfZeroTscConversion{ 48 mmap_metada.time_mult, mmap_metada.time_shift, {mmap_metada.time_zero}}; 49 } else { 50 auto err_cap = 51 !mmap_metada.cap_user_time ? "cap_user_time" : "cap_user_time_zero"; 52 std::string err_msg = 53 llvm::formatv("Can't get TSC to real time conversion values. " 54 "perf_event capability '{0}' not supported.", 55 err_cap); 56 return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg); 57 } 58 #else 59 std::string err_msg = "PERF_COUNT_SW_DUMMY requires Linux 3.12"; 60 return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg); 61 #endif 62 } 63 64 void resource_handle::MmapDeleter::operator()(void *ptr) { 65 if (m_bytes && ptr != nullptr) 66 munmap(ptr, m_bytes); 67 } 68 69 void resource_handle::FileDescriptorDeleter::operator()(long *ptr) { 70 if (ptr == nullptr) 71 return; 72 if (*ptr == -1) 73 return; 74 close(*ptr); 75 std::default_delete<long>()(ptr); 76 } 77 78 llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr, 79 std::optional<lldb::pid_t> pid, 80 std::optional<lldb::cpu_id_t> cpu, 81 std::optional<long> group_fd, 82 unsigned long flags) { 83 errno = 0; 84 long fd = syscall(SYS_perf_event_open, &attr, pid.value_or(-1), 85 cpu.value_or(-1), group_fd.value_or(-1), flags); 86 if (fd == -1) { 87 std::string err_msg = 88 llvm::formatv("perf event syscall failed: {0}", std::strerror(errno)); 89 return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg); 90 } 91 return PerfEvent(fd, !attr.disabled); 92 } 93 94 llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr, 95 std::optional<lldb::pid_t> pid, 96 std::optional<lldb::cpu_id_t> cpu) { 97 return Init(attr, pid, cpu, -1, 0); 98 } 99 100 llvm::Expected<resource_handle::MmapUP> 101 PerfEvent::DoMmap(void *addr, size_t length, int prot, int flags, 102 long int offset, llvm::StringRef buffer_name) { 103 errno = 0; 104 auto mmap_result = ::mmap(addr, length, prot, flags, GetFd(), offset); 105 106 if (mmap_result == MAP_FAILED) { 107 std::string err_msg = 108 llvm::formatv("perf event mmap allocation failed for {0}: {1}", 109 buffer_name, std::strerror(errno)); 110 return createStringError(inconvertibleErrorCode(), err_msg); 111 } 112 return resource_handle::MmapUP(mmap_result, length); 113 } 114 115 llvm::Error PerfEvent::MmapMetadataAndDataBuffer(size_t num_data_pages, 116 bool data_buffer_write) { 117 size_t mmap_size = (num_data_pages + 1) * getpagesize(); 118 if (Expected<resource_handle::MmapUP> mmap_metadata_data = DoMmap( 119 nullptr, mmap_size, PROT_READ | (data_buffer_write ? PROT_WRITE : 0), 120 MAP_SHARED, 0, "metadata and data buffer")) { 121 m_metadata_data_base = std::move(mmap_metadata_data.get()); 122 return Error::success(); 123 } else 124 return mmap_metadata_data.takeError(); 125 } 126 127 llvm::Error PerfEvent::MmapAuxBuffer(size_t num_aux_pages) { 128 #ifndef PERF_ATTR_SIZE_VER5 129 return createStringError(inconvertibleErrorCode(), 130 "Intel PT Linux perf event not supported"); 131 #else 132 if (num_aux_pages == 0) 133 return Error::success(); 134 135 perf_event_mmap_page &metadata_page = GetMetadataPage(); 136 137 metadata_page.aux_offset = 138 metadata_page.data_offset + metadata_page.data_size; 139 metadata_page.aux_size = num_aux_pages * getpagesize(); 140 141 if (Expected<resource_handle::MmapUP> mmap_aux = 142 DoMmap(nullptr, metadata_page.aux_size, PROT_READ, MAP_SHARED, 143 metadata_page.aux_offset, "aux buffer")) { 144 m_aux_base = std::move(mmap_aux.get()); 145 return Error::success(); 146 } else 147 return mmap_aux.takeError(); 148 #endif 149 } 150 151 llvm::Error PerfEvent::MmapMetadataAndBuffers(size_t num_data_pages, 152 size_t num_aux_pages, 153 bool data_buffer_write) { 154 if (num_data_pages != 0 && !isPowerOf2_64(num_data_pages)) 155 return llvm::createStringError( 156 llvm::inconvertibleErrorCode(), 157 llvm::formatv("Number of data pages must be a power of 2, got: {0}", 158 num_data_pages)); 159 if (num_aux_pages != 0 && !isPowerOf2_64(num_aux_pages)) 160 return llvm::createStringError( 161 llvm::inconvertibleErrorCode(), 162 llvm::formatv("Number of aux pages must be a power of 2, got: {0}", 163 num_aux_pages)); 164 if (Error err = MmapMetadataAndDataBuffer(num_data_pages, data_buffer_write)) 165 return err; 166 if (Error err = MmapAuxBuffer(num_aux_pages)) 167 return err; 168 return Error::success(); 169 } 170 171 long PerfEvent::GetFd() const { return *(m_fd.get()); } 172 173 perf_event_mmap_page &PerfEvent::GetMetadataPage() const { 174 return *reinterpret_cast<perf_event_mmap_page *>(m_metadata_data_base.get()); 175 } 176 177 ArrayRef<uint8_t> PerfEvent::GetDataBuffer() const { 178 #ifndef PERF_ATTR_SIZE_VER5 179 llvm_unreachable("Intel PT Linux perf event not supported"); 180 #else 181 perf_event_mmap_page &mmap_metadata = GetMetadataPage(); 182 return {reinterpret_cast<uint8_t *>(m_metadata_data_base.get()) + 183 mmap_metadata.data_offset, 184 static_cast<size_t>(mmap_metadata.data_size)}; 185 #endif 186 } 187 188 ArrayRef<uint8_t> PerfEvent::GetAuxBuffer() const { 189 #ifndef PERF_ATTR_SIZE_VER5 190 llvm_unreachable("Intel PT Linux perf event not supported"); 191 #else 192 perf_event_mmap_page &mmap_metadata = GetMetadataPage(); 193 return {reinterpret_cast<uint8_t *>(m_aux_base.get()), 194 static_cast<size_t>(mmap_metadata.aux_size)}; 195 #endif 196 } 197 198 Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyDataBuffer() { 199 // The following code assumes that the protection level of the DATA page 200 // is PROT_READ. If PROT_WRITE is used, then reading would require that 201 // this piece of code updates some pointers. See more about data_tail 202 // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html. 203 204 #ifndef PERF_ATTR_SIZE_VER5 205 return createStringError(inconvertibleErrorCode(), 206 "Intel PT Linux perf event not supported"); 207 #else 208 bool was_enabled = m_enabled; 209 if (Error err = DisableWithIoctl()) 210 return std::move(err); 211 212 /** 213 * The data buffer and aux buffer have different implementations 214 * with respect to their definition of head pointer when using PROD_READ only. 215 * In the case of Aux data buffer the head always wraps around the aux buffer 216 * and we don't need to care about it, whereas the data_head keeps 217 * increasing and needs to be wrapped by modulus operator 218 */ 219 perf_event_mmap_page &mmap_metadata = GetMetadataPage(); 220 221 ArrayRef<uint8_t> data = GetDataBuffer(); 222 uint64_t data_head = mmap_metadata.data_head; 223 uint64_t data_size = mmap_metadata.data_size; 224 std::vector<uint8_t> output; 225 output.reserve(data.size()); 226 227 if (data_head > data_size) { 228 uint64_t actual_data_head = data_head % data_size; 229 // The buffer has wrapped, so we first the oldest chunk of data 230 output.insert(output.end(), data.begin() + actual_data_head, data.end()); 231 // And we we read the most recent chunk of data 232 output.insert(output.end(), data.begin(), data.begin() + actual_data_head); 233 } else { 234 // There's been no wrapping, so we just read linearly 235 output.insert(output.end(), data.begin(), data.begin() + data_head); 236 } 237 238 if (was_enabled) { 239 if (Error err = EnableWithIoctl()) 240 return std::move(err); 241 } 242 243 return output; 244 #endif 245 } 246 247 Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyAuxBuffer() { 248 // The following code assumes that the protection level of the AUX page 249 // is PROT_READ. If PROT_WRITE is used, then reading would require that 250 // this piece of code updates some pointers. See more about aux_tail 251 // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html. 252 253 #ifndef PERF_ATTR_SIZE_VER5 254 return createStringError(inconvertibleErrorCode(), 255 "Intel PT Linux perf event not supported"); 256 #else 257 bool was_enabled = m_enabled; 258 if (Error err = DisableWithIoctl()) 259 return std::move(err); 260 261 perf_event_mmap_page &mmap_metadata = GetMetadataPage(); 262 263 ArrayRef<uint8_t> data = GetAuxBuffer(); 264 uint64_t aux_head = mmap_metadata.aux_head; 265 std::vector<uint8_t> output; 266 output.reserve(data.size()); 267 268 /** 269 * When configured as ring buffer, the aux buffer keeps wrapping around 270 * the buffer and its not possible to detect how many times the buffer 271 * wrapped. Initially the buffer is filled with zeros,as shown below 272 * so in order to get complete buffer we first copy firstpartsize, followed 273 * by any left over part from beginning to aux_head 274 * 275 * aux_offset [d,d,d,d,d,d,d,d,0,0,0,0,0,0,0,0,0,0,0] aux_size 276 * aux_head->||<- firstpartsize ->| 277 * 278 * */ 279 280 output.insert(output.end(), data.begin() + aux_head, data.end()); 281 output.insert(output.end(), data.begin(), data.begin() + aux_head); 282 283 if (was_enabled) { 284 if (Error err = EnableWithIoctl()) 285 return std::move(err); 286 } 287 288 return output; 289 #endif 290 } 291 292 Error PerfEvent::DisableWithIoctl() { 293 if (!m_enabled) 294 return Error::success(); 295 296 if (ioctl(*m_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) < 0) 297 return createStringError(inconvertibleErrorCode(), 298 "Can't disable perf event. %s", 299 std::strerror(errno)); 300 301 m_enabled = false; 302 return Error::success(); 303 } 304 305 bool PerfEvent::IsEnabled() const { return m_enabled; } 306 307 Error PerfEvent::EnableWithIoctl() { 308 if (m_enabled) 309 return Error::success(); 310 311 if (ioctl(*m_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) < 0) 312 return createStringError(inconvertibleErrorCode(), 313 "Can't enable perf event. %s", 314 std::strerror(errno)); 315 316 m_enabled = true; 317 return Error::success(); 318 } 319 320 size_t PerfEvent::GetEffectiveDataBufferSize() const { 321 #ifndef PERF_ATTR_SIZE_VER5 322 llvm_unreachable("Intel PT Linux perf event not supported"); 323 #else 324 perf_event_mmap_page &mmap_metadata = GetMetadataPage(); 325 if (mmap_metadata.data_head < mmap_metadata.data_size) 326 return mmap_metadata.data_head; 327 else 328 return mmap_metadata.data_size; // The buffer has wrapped. 329 #endif 330 } 331 332 Expected<PerfEvent> 333 lldb_private::process_linux::CreateContextSwitchTracePerfEvent( 334 lldb::cpu_id_t cpu_id, const PerfEvent *parent_perf_event) { 335 Log *log = GetLog(POSIXLog::Trace); 336 #ifndef PERF_ATTR_SIZE_VER5 337 return createStringError(inconvertibleErrorCode(), 338 "Intel PT Linux perf event not supported"); 339 #else 340 perf_event_attr attr; 341 memset(&attr, 0, sizeof(attr)); 342 attr.size = sizeof(attr); 343 attr.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME; 344 attr.type = PERF_TYPE_SOFTWARE; 345 attr.context_switch = 1; 346 attr.exclude_kernel = 1; 347 attr.sample_id_all = 1; 348 attr.exclude_hv = 1; 349 attr.disabled = parent_perf_event ? !parent_perf_event->IsEnabled() : false; 350 351 // The given perf configuration will produce context switch records of 32 352 // bytes each. Assuming that every context switch will be emitted twice (one 353 // for context switch ins and another one for context switch outs), and that a 354 // context switch will happen at least every half a millisecond per core, we 355 // need 500 * 32 bytes (~16 KB) for a trace of one second, which is much more 356 // than what a regular intel pt trace can get. Pessimistically we pick as 357 // 32KiB for the size of our context switch trace. 358 359 uint64_t data_buffer_size = 32768; 360 uint64_t data_buffer_numpages = data_buffer_size / getpagesize(); 361 362 LLDB_LOG(log, "Will create context switch trace buffer of size {0}", 363 data_buffer_size); 364 365 std::optional<long> group_fd; 366 if (parent_perf_event) 367 group_fd = parent_perf_event->GetFd(); 368 369 if (Expected<PerfEvent> perf_event = PerfEvent::Init( 370 attr, /*pid=*/std::nullopt, cpu_id, group_fd, /*flags=*/0)) { 371 if (Error mmap_err = perf_event->MmapMetadataAndBuffers( 372 data_buffer_numpages, 0, /*data_buffer_write=*/false)) { 373 return std::move(mmap_err); 374 } 375 return perf_event; 376 } else { 377 return perf_event.takeError(); 378 } 379 #endif 380 } 381