1 /* Copyright (C) 2021-2024 Free Software Foundation, Inc. 2 Contributed by Oracle. 3 4 This file is part of GNU Binutils. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, 51 Franklin Street - Fifth Floor, Boston, 19 MA 02110-1301, USA. */ 20 21 #include "config.h" 22 #include <new> 23 24 #include "util.h" 25 #include "CacheMap.h" 26 #include "CallStack.h" 27 #include "DbeSession.h" 28 #include "DbeView.h" 29 #include "DbeLinkList.h" 30 #include "Experiment.h" 31 #include "Exp_Layout.h" 32 #include "Function.h" 33 #include "LoadObject.h" 34 #include "Module.h" 35 36 Descendants::Descendants () 37 { 38 count = 0; 39 limit = sizeof (first_data) / sizeof (CallStackNode *); 40 data = first_data; 41 } 42 43 Descendants::~Descendants () 44 { 45 if (data != first_data) 46 free (data); 47 } 48 49 CallStackNode * 50 Descendants::find (Histable *hi, int *index) 51 { 52 int cnt = count; 53 int left = 0; 54 for (int right = cnt - 1; left <= right;) 55 { 56 int ind = (left + right) / 2; 57 CallStackNode *node = data[ind]; 58 Histable *instr = node->get_instr (); 59 if (instr == hi) 60 { 61 if (index) 62 *index = ind; 63 return node; 64 } 65 if (instr->id < hi->id) 66 right = ind - 1; 67 else 68 left = ind + 1; 69 } 70 if (index) 71 *index = left; 72 return NULL; 73 } 74 75 void 76 Descendants::append (CallStackNode* item) 77 { 78 if (count < limit) 79 data[count++] = item; 80 else 81 insert (count, item); 82 } 83 84 void 85 Descendants::insert (int ind, CallStackNode* item) 86 { 87 CallStackNode **old_data = data; 88 int old_cnt = count; 89 if (old_cnt + 1 >= limit) 90 { 91 int new_limit = (limit == 0) ? DELTA : limit * 2; 92 CallStackNode **new_data = (CallStackNode **) malloc (new_limit * sizeof (CallStackNode *)); 93 for (int i = 0; i < ind; i++) 94 new_data[i] = old_data[i]; 95 new_data[ind] = item; 96 for (int i = ind; i < old_cnt; i++) 97 new_data[i + 1] = old_data[i]; 98 limit = new_limit; 99 data = new_data; 100 if (old_data != first_data) 101 free (old_data); 102 } 103 else 104 { 105 for (int i = ind; i < old_cnt; i++) 106 old_data[i + 1] = old_data[i]; 107 old_data[ind] = item; 108 } 109 count++; 110 } 111 112 /* 113 * Private implementation of CallStack interface 114 */ 115 116 // When performing pipeline optimization on resolve_frame_info + add_stack 117 // cstk_ctx structure contains the state (or context) for one iteration to pass on 118 // from Phase 2 to Phase 3 (More details in Experiment.cc) 119 class CallStackP : public CallStack 120 { 121 public: 122 CallStackP (Experiment *exp); 123 124 virtual ~CallStackP (); 125 126 virtual void add_stack (DataDescriptor *dDscr, long idx, FramePacket *frp, cstk_ctx_chunk *cstCtxChunk); 127 virtual void *add_stack (Vector<Histable*> *objs); 128 virtual CallStackNode *get_node (int n); 129 virtual void print (FILE *); 130 131 private: 132 133 static const int CHUNKSZ = 16384; 134 135 Experiment *experiment; 136 CallStackNode *root; 137 CallStackNode *jvm_node; 138 int nodes; 139 int nchunks; 140 CallStackNode **chunks; 141 Map<uint64_t, CallStackNode *> *cstackMap; 142 DbeLock *cstackLock; 143 144 CallStackNode *add_stack (long start, long end, Vector<Histable*> *objs, CallStackNode *myRoot); 145 CallStackNode *new_Node (CallStackNode*, Histable*); 146 CallStackNode *find_preg_stack (uint64_t); 147 // objs are in the root..leaf order 148 void *add_stack_d (Vector<Histable*> *objs); 149 void add_stack_java (DataDescriptor *dDscr, long idx, FramePacket *frp, hrtime_t tstamp, uint32_t thrid, Vector<DbeInstr*>* natpcs, bool natpc_added, cstk_ctx_chunk *cstCtxChunk); 150 void add_stack_java_epilogue (DataDescriptor *dDscr, long idx, FramePacket *frp, hrtime_t tstamp, uint32_t thrid, Vector<DbeInstr*>* natpcs, Vector<Histable*>* jpcs, bool natpc_added); 151 152 // Adjust HW counter event to find better trigger PC, etc. 153 DbeInstr *adjustEvent (DbeInstr *leafPC, DbeInstr * candPC, 154 Vaddr &eventEA, int abst_type); 155 Vector<DbeInstr*> *natpcsP; 156 Vector<Histable*> *jpcsP; 157 }; 158 159 CallStackP::CallStackP (Experiment *exp) 160 { 161 experiment = exp; 162 nchunks = 0; 163 chunks = NULL; 164 nodes = 0; 165 cstackMap = new CacheMap<uint64_t, CallStackNode *>; 166 cstackLock = new DbeLock (); 167 Function *total = dbeSession->get_Total_Function (); 168 root = new_Node (0, total->find_dbeinstr (0, 0)); 169 jvm_node = NULL; 170 natpcsP = NULL; 171 jpcsP = NULL; 172 } 173 174 CallStackP::~CallStackP () 175 { 176 delete cstackLock; 177 if (chunks) 178 { 179 for (int i = 0; i < nodes; i++) 180 { 181 CallStackNode *node = get_node (i); 182 node->~CallStackNode (); 183 } 184 for (int i = 0; i < nchunks; i++) 185 free (chunks[i]); 186 free (chunks); 187 } 188 delete natpcsP; 189 delete jpcsP; 190 destroy_map (CallStackNode *, cstackMap); 191 } 192 193 CallStackNode * 194 CallStackP::new_Node (CallStackNode *anc, Histable *pcval) 195 { 196 // cstackLock->aquireLock(); // Caller already locked it 197 if (nodes >= nchunks * CHUNKSZ) 198 { 199 CallStackNode **old_chunks = chunks; 200 nchunks++; 201 202 // Reallocate Node chunk array 203 chunks = (CallStackNode **) malloc (nchunks * sizeof (CallStackNode *)); 204 for (int i = 0; i < nchunks - 1; i++) 205 chunks[i] = old_chunks[i]; 206 free (old_chunks); 207 // Allocate new chunk for nodes. 208 chunks[nchunks - 1] = (CallStackNode *) malloc (CHUNKSZ * sizeof (CallStackNode)); 209 } 210 nodes++; 211 CallStackNode *node = get_node (nodes - 1); 212 new (node) CallStackNode (anc, pcval); 213 // cstackLock->releaseLock(); 214 return node; 215 } 216 217 CallStackNode * 218 CallStackP::find_preg_stack (uint64_t prid) 219 { 220 DataView *dview = experiment->getOpenMPdata (); 221 dview->sort (PROP_CPRID); 222 Datum tval; 223 tval.setUINT64 (prid); 224 long idx = dview->getIdxByVals (&tval, DataView::REL_EQ); 225 if (idx < 0) 226 return root; 227 CallStackNode *node = (CallStackNode*) dview->getObjValue (PROP_USTACK, idx); 228 if (node != NULL) 229 return node; 230 uint64_t pprid = dview->getLongValue (PROP_PPRID, idx); 231 if (pprid == prid) 232 return root; 233 void *nat_stack = dview->getObjValue (PROP_MSTACK, idx); 234 Vector<Histable*> *pcs = getStackPCs (nat_stack); 235 236 // Find the bottom frame 237 int btm; 238 bool inOMP = false; 239 DbeInstr *instr; 240 Histable *hist; 241 for (btm = 0; btm < pcs->size (); btm++) 242 { 243 hist = pcs->fetch (btm); 244 if (hist->get_type () == Histable::INSTR) 245 instr = (DbeInstr *) hist; 246 else // DBELINE 247 instr = (DbeInstr *) hist->convertto (Histable::INSTR); 248 LoadObject *lo = instr->func->module->loadobject; 249 if (!inOMP) 250 { 251 if (lo->flags & SEG_FLAG_OMP) 252 inOMP = true; 253 } 254 else if (!(lo->flags & SEG_FLAG_OMP)) 255 break; 256 } 257 258 // Find the top frame 259 dview->sort (PROP_CPRID); 260 int top; 261 tval.setUINT64 (pprid); 262 long pidx = dview->getIdxByVals (&tval, DataView::REL_EQ); 263 if (pidx < 0) // No parent. Process the entire nat_stack 264 top = pcs->size () - 1; 265 else 266 { 267 uint32_t thrid = (uint32_t) dview->getIntValue (PROP_THRID, idx); 268 uint32_t pthrid = (uint32_t) dview->getIntValue (PROP_THRID, pidx); 269 if (thrid != pthrid) 270 { 271 // Parent is on a different stack. 272 // Process the entire nat_stack. Skip libthread. 273 for (top = pcs->size () - 1; top >= 0; top--) 274 { 275 hist = pcs->fetch (top); 276 if (hist->get_type () == Histable::INSTR) 277 instr = (DbeInstr *) hist; 278 else // DBELINE 279 instr = (DbeInstr *) hist->convertto (Histable::INSTR); 280 if (instr->func->module->loadobject->flags & SEG_FLAG_OMP) 281 break; 282 } 283 if (top < 0) // None found. May be incomplete call stack (x86) 284 top = pcs->size () - 1; 285 } 286 else 287 { 288 // Parent is on the same stack. Find match. 289 top = pcs->size () - 1; 290 void *pnat_stack = dview->getObjValue (PROP_MSTACK, pidx); 291 Vector<Histable*> *ppcs = getStackPCs (pnat_stack); 292 for (int ptop = ppcs->size () - 1; top >= 0 && ptop >= 0; 293 top--, ptop--) 294 { 295 if (pcs->fetch (top) != ppcs->fetch (ptop)) 296 break; 297 } 298 delete ppcs; 299 } 300 } 301 302 // Process the found range 303 Vector<Histable*> *upcs = new Vector<Histable*>(128); 304 for (int i = btm; i <= top; ++i) 305 { 306 hist = (DbeInstr*) pcs->fetch (i); 307 if (hist->get_type () == Histable::INSTR) 308 instr = (DbeInstr *) hist; 309 else // DBELINE 310 instr = (DbeInstr *) hist->convertto (Histable::INSTR); 311 312 if (instr->func->module->loadobject->flags & SEG_FLAG_OMP) 313 // Skip all frames from libmtsk 314 continue; 315 upcs->append (instr); 316 } 317 delete pcs; 318 node = find_preg_stack (pprid); 319 while (node != root) 320 { 321 upcs->append (node->instr); 322 node = node->ancestor; 323 } 324 node = (CallStackNode *) add_stack (upcs); 325 dview->setObjValue (PROP_USTACK, idx, node); 326 delete upcs; 327 return node; 328 } 329 330 #define JNI_MARKER -3 331 332 // This is one iteration if the third stage of 333 // resolve_frame_info + add_stack pipeline. Works on building the java 334 // stacks 335 void 336 CallStackP::add_stack_java (DataDescriptor *dDscr, long idx, FramePacket *frp, 337 hrtime_t tstamp, uint32_t thrid, 338 Vector<DbeInstr*>* natpcs, bool natpc_added, 339 cstk_ctx_chunk *cstCtxChunk) 340 { 341 Vector<Histable*> *jpcs = NULL; 342 cstk_ctx *cstctx = NULL; 343 if (cstCtxChunk != NULL) 344 { 345 cstctx = cstCtxChunk->cstCtxAr[idx % CSTCTX_CHUNK_SZ]; 346 jpcs = cstctx->jpcs; 347 jpcs->reset (); 348 } 349 if (jpcs == NULL) 350 { 351 // this is when we are not doing the pipeline optimization 352 // Temporary array for resolved addresses 353 // [leaf_pc .. root_pc] == [0..stack_size-1] 354 // Leave room for a possible "truncated" frame 355 if (jpcsP == NULL) 356 jpcsP = new Vector<Histable*>; 357 jpcs = jpcsP; 358 jpcs->reset (); 359 } 360 361 // 362 // Construct the user stack 363 // 364 // Construct Java user stack 365 int jstack_size = frp->stackSize (true); 366 if (jstack_size) 367 { 368 // jpcs = new Vector<Histable*>( jstack_size ); 369 if (frp->isTruncatedStack (true)) 370 { 371 Function *truncf = dbeSession->getSpecialFunction (DbeSession::TruncatedStackFunc); 372 jpcs->append (truncf->find_dbeinstr (0, 0)); 373 } 374 375 int nind = natpcs->size () - 1; // first native frame 376 for (int jind = jstack_size - 1; jind >= 0; jind--) 377 { 378 bool jleaf = (jind == 0); // is current java frame a leaf? 379 Vaddr mid = frp->getMthdFromStack (jind); 380 int bci = frp->getBciFromStack (jind); 381 DbeInstr *cur_instr = experiment->map_jmid_to_PC (mid, bci, tstamp); 382 jpcs->append (cur_instr); 383 if (bci == JNI_MARKER) 384 { 385 JMethod *j_method = (JMethod*) cur_instr->func; 386 // Find matching native function on the native stack 387 bool found = false; 388 for (; nind >= 0; nind--) 389 { 390 DbeInstr *nat_addr = natpcs->fetch (nind); 391 if (0 == nat_addr) 392 continue; 393 Function *nat_func = nat_addr->func; 394 if (!found && j_method->jni_match (nat_func)) 395 found = true; 396 if (found) 397 { 398 // XXX omazur: the following will skip JNI native method 399 // implemented in JVM itself. 400 // If we are back in JVM switch to processing Java 401 // frames if there are any. 402 if ((nat_func->module->loadobject->flags & SEG_FLAG_JVM) && !jleaf) 403 break; 404 jpcs->append (nat_addr); 405 } 406 } 407 } 408 } 409 } 410 add_stack_java_epilogue (dDscr, idx, frp, tstamp, thrid, natpcs, jpcs, natpc_added); 411 } 412 413 // This is one iteration if the fourth stage of 414 // resolve_frame_info + add_stack pipeline. 415 // It adds the native and java stacks to the stackmap 416 417 void 418 CallStackP::add_stack_java_epilogue (DataDescriptor *dDscr, long idx, FramePacket *frp, hrtime_t tstamp, uint32_t thrid, Vector<DbeInstr*>* natpcs, Vector<Histable*> *jpcs, bool natpc_added) 419 { 420 CallStackNode *node = NULL; 421 if (!natpc_added) 422 { 423 node = (CallStackNode *) add_stack ((Vector<Histable*>*)natpcs); 424 dDscr->setObjValue (PROP_MSTACK, idx, node); 425 dDscr->setObjValue (PROP_XSTACK, idx, node); 426 dDscr->setObjValue (PROP_USTACK, idx, node); 427 } 428 429 int jstack_size = frp->stackSize (true); 430 if (jstack_size) 431 { 432 if (jpcs != NULL) 433 node = (CallStackNode *) add_stack_d (jpcs); 434 if (node == NULL) 435 node = (CallStackNode*) dDscr->getObjValue (PROP_USTACK, idx); 436 dDscr->setObjValue (PROP_USTACK, idx, node); 437 Function *func = (Function*) node->instr->convertto (Histable::FUNCTION); 438 if (func != dbeSession->get_JUnknown_Function ()) 439 dDscr->setObjValue (PROP_XSTACK, idx, node); 440 } 441 442 JThread *jthread = experiment->map_pckt_to_Jthread (thrid, tstamp); 443 if (jthread == JTHREAD_NONE && jstack_size != 0 && node != NULL) 444 { 445 Function *func = (Function*) node->instr->convertto (Histable::FUNCTION); 446 if (func != dbeSession->get_JUnknown_Function ()) 447 jthread = JTHREAD_DEFAULT; 448 } 449 dDscr->setObjValue (PROP_JTHREAD, idx, jthread); 450 if (jthread == JTHREAD_NONE || (jthread != JTHREAD_DEFAULT && jthread->is_system ())) 451 { 452 if (jvm_node == NULL) 453 { 454 Function *jvm = dbeSession->get_jvm_Function (); 455 if (jvm) 456 { 457 jvm_node = new_Node (root, jvm->find_dbeinstr (0, 0)); 458 CommonPacket::jvm_overhead = jvm_node; 459 } 460 } 461 dDscr->setObjValue (PROP_USTACK, idx, jvm_node); 462 } 463 } 464 465 // This is one iteration of the 2nd stage of 466 // resolve_frame_info + add_stack() pipeline. Builds the stack for a given framepacket. 467 // When pipeline optimization is turnd off, cstctxchunk passed is NULL 468 void 469 CallStackP::add_stack (DataDescriptor *dDscr, long idx, FramePacket *frp, 470 cstk_ctx_chunk* cstCtxChunk) 471 { 472 Vector<DbeInstr*> *natpcs = NULL; 473 cstk_ctx *cstctx = NULL; 474 int stack_size = frp->stackSize (); 475 if (cstCtxChunk != NULL) 476 { 477 cstctx = cstCtxChunk->cstCtxAr[idx % CSTCTX_CHUNK_SZ]; 478 natpcs = cstctx->natpcs; 479 natpcs->reset (); 480 } 481 if (natpcs == NULL) 482 { 483 // this is when we are not doing the pipeline optimization 484 // Temporary array for resolved addresses 485 // [leaf_pc .. root_pc] == [0..stack_size-1] 486 // Leave room for a possible "truncated" frame 487 if (natpcsP == NULL) 488 natpcsP = new Vector<DbeInstr*>; 489 natpcs = natpcsP; 490 natpcs->reset (); 491 } 492 493 bool leaf = true; 494 hrtime_t tstamp = (hrtime_t) dDscr->getLongValue (PROP_TSTAMP, idx); 495 uint32_t thrid = (uint32_t) dDscr->getIntValue (PROP_THRID, idx); 496 497 enum 498 { 499 NONE, 500 CHECK_O7, 501 USE_O7, 502 SKIP_O7 503 } state = NONE; 504 505 Vaddr o7_to_skip = 0; 506 for (int index = 0; index < stack_size; index++) 507 { 508 if (frp->isLeafMark (index)) 509 { 510 state = CHECK_O7; 511 continue; 512 } 513 514 if (state == SKIP_O7) 515 { 516 // remember this bad o7 value since OMP might not recognize it 517 o7_to_skip = frp->getFromStack (index); 518 state = NONE; 519 continue; 520 } 521 522 Vaddr va = frp->getFromStack (index); 523 DbeInstr *cur_instr = experiment->map_Vaddr_to_PC (va, tstamp); 524 #if ARCH(Intel)// TBR? FIXUP_XXX_SPARC_LINUX: switch should be on experiment ARCH, not dbe ARCH 525 // We need to adjust return addresses on intel 526 // in order to attribute inclusive metrics to 527 // proper call instructions. 528 if (experiment->exp_maj_version <= 9) 529 if (!leaf && cur_instr->addr != 0) 530 cur_instr = cur_instr->func->find_dbeinstr (0, cur_instr->addr - 1); 531 #endif 532 533 // Skip PC's from PLT, update leaf and state accordingly 534 if ((cur_instr->func->flags & FUNC_FLAG_PLT) 535 && (leaf || state == CHECK_O7)) 536 { 537 if (state == CHECK_O7) 538 state = USE_O7; 539 leaf = false; 540 continue; 541 } 542 if (state == CHECK_O7) 543 { 544 state = USE_O7; 545 uint64_t saddr = cur_instr->func->save_addr; 546 if (cur_instr->func->isOutlineFunction) 547 // outline functions assume 'save' instruction 548 // Note: they accidentally have saddr == FUNC_ROOT 549 state = SKIP_O7; 550 else if (saddr == FUNC_ROOT) 551 { 552 // If a function is statically determined as a root 553 // but dynamically appears not, don't discard o7. 554 // One such case is __misalign_trap_handler on sparcv9. 555 if (stack_size == 3) 556 state = SKIP_O7; 557 } 558 else if (saddr != FUNC_NO_SAVE && cur_instr->addr > saddr) 559 state = SKIP_O7; 560 } 561 else if (state == USE_O7) 562 { 563 state = NONE; 564 if (cur_instr->flags & PCInvlFlag) 565 continue; 566 } 567 if (leaf) 568 { 569 Vaddr evpc = (Vaddr) dDscr->getLongValue (PROP_VIRTPC, idx); 570 if (evpc != 0 571 && !(index > 0 && frp->isLeafMark (index - 1) 572 && evpc == (Vaddr) (-1))) 573 { 574 /* contains hwcprof info */ 575 cur_instr->func->module->read_hwcprof_info (); 576 577 // complete ABS validation of candidate eventPC/eventEA 578 // and correction/adjustment of collected callstack leaf PC 579 DbeInstr *candPC = experiment->map_Vaddr_to_PC (evpc, tstamp); 580 Vaddr vaddr = (Vaddr) dDscr->getLongValue (PROP_VADDR, idx); 581 Vaddr tmp_vaddr = vaddr; 582 int abst_type; 583 uint32_t tag = dDscr->getIntValue (PROP_HWCTAG, idx); 584 if (tag < 0 || tag >= MAX_HWCOUNT) 585 abst_type = ABST_NOPC; 586 else 587 abst_type = experiment->coll_params.hw_tpc[tag]; 588 589 // We need to adjust addresses for ABST_EXACT_PEBS_PLUS1 590 // (Nehalem/SandyBridge PEBS identifies PC+1, not PC) 591 if (abst_type == ABST_EXACT_PEBS_PLUS1 && candPC->addr != 0) 592 candPC = candPC->func->find_dbeinstr (0, candPC->func->find_previous_addr (candPC->addr)); 593 594 cur_instr = adjustEvent (cur_instr, candPC, tmp_vaddr, abst_type); 595 if (vaddr != tmp_vaddr) 596 { 597 if (tmp_vaddr < ABS_CODE_RANGE) 598 { 599 /* post processing backtrack failed */ 600 dDscr->setValue (PROP_VADDR, idx, tmp_vaddr); 601 dDscr->setValue (PROP_PADDR, idx, ABS_NULL); 602 /* hwcp->eventVPC = xxxxx leave eventPC alone, 603 * or can we set it to leafpc? */ 604 dDscr->setValue (PROP_PHYSPC, idx, ABS_NULL); 605 } 606 else 607 { 608 /* internal error: why would post-processing modify vaddr? */ 609 dDscr->setValue (PROP_PADDR, idx, (Vaddr) (-1)); 610 dDscr->setValue (PROP_PHYSPC, idx, (Vaddr) (-1)); 611 } 612 } 613 } 614 } 615 natpcs->append (cur_instr); 616 leaf = false; 617 618 // A hack to deceive the user into believing that outlined code 619 // is called from the base function 620 DbeInstr *drvd = cur_instr->func->derivedNode; 621 if (drvd != NULL) 622 natpcs->append (drvd); 623 } 624 if (frp->isTruncatedStack ()) 625 { 626 Function *truncf = dbeSession->getSpecialFunction (DbeSession::TruncatedStackFunc); 627 natpcs->append (truncf->find_dbeinstr (0, 0)); 628 } 629 else if (frp->isFailedUnwindStack ()) 630 { 631 Function *funwf = dbeSession->getSpecialFunction (DbeSession::FailedUnwindFunc); 632 natpcs->append (funwf->find_dbeinstr (0, 0)); 633 } 634 635 CallStackNode *node = (CallStackNode*) add_stack ((Vector<Histable*>*)natpcs); 636 dDscr->setObjValue (PROP_MSTACK, idx, node); 637 dDscr->setObjValue (PROP_XSTACK, idx, node); 638 dDscr->setObjValue (PROP_USTACK, idx, node); 639 640 // OpenMP 3.0 stacks 641 stack_size = frp->ompstack->size (); 642 if (stack_size > 0 || frp->omp_state == OMP_IDLE_STATE) 643 { 644 Function *func; 645 Vector<Histable*> *omppcs = new Vector<Histable*>(stack_size); 646 Vector<Histable*> *ompxpcs = new Vector<Histable*>(stack_size); 647 switch (frp->omp_state) 648 { 649 case OMP_IDLE_STATE: 650 case OMP_RDUC_STATE: 651 case OMP_IBAR_STATE: 652 case OMP_EBAR_STATE: 653 case OMP_LKWT_STATE: 654 case OMP_CTWT_STATE: 655 case OMP_ODWT_STATE: 656 case OMP_ATWT_STATE: 657 { 658 func = dbeSession->get_OMP_Function (frp->omp_state); 659 DbeInstr *instr = func->find_dbeinstr (0, 0); 660 omppcs->append (instr); 661 ompxpcs->append (instr); 662 break; 663 } 664 } 665 Vector<Vaddr> *stck = frp->ompstack; 666 leaf = true; 667 for (int index = 0; index < stack_size; index++) 668 { 669 if (stck->fetch (index) == SP_LEAF_CHECK_MARKER) 670 { 671 state = CHECK_O7; 672 continue; 673 } 674 if (state == SKIP_O7) 675 { 676 state = NONE; 677 continue; 678 } 679 680 // The OMP stack might not have enough information to know to discard a bad o7. 681 // So just remember what the native stack skipped. 682 if (o7_to_skip == stck->fetch (index)) 683 { 684 state = NONE; 685 continue; 686 } 687 Vaddr va = stck->fetch (index); 688 DbeInstr *cur_instr = experiment->map_Vaddr_to_PC (va, tstamp); 689 690 // Skip PC's from PLT, update leaf and state accordingly 691 if ((cur_instr->func->flags & FUNC_FLAG_PLT) && 692 (leaf || state == CHECK_O7)) 693 { 694 if (state == CHECK_O7) 695 state = USE_O7; 696 leaf = false; 697 continue; 698 } 699 if (state == CHECK_O7) 700 { 701 state = USE_O7; 702 uint64_t saddr = cur_instr->func->save_addr; 703 if (cur_instr->func->isOutlineFunction) 704 // outline functions assume 'save' instruction 705 // Note: they accidentally have saddr == FUNC_ROOT 706 state = SKIP_O7; 707 else if (saddr == FUNC_ROOT) 708 { 709 // If a function is statically determined as a root 710 // but dynamically appears not, don't discard o7. 711 // One such case is __misalign_trap_handler on sparcv9. 712 if (stack_size == 3) 713 state = SKIP_O7; 714 } 715 else if (saddr != FUNC_NO_SAVE && cur_instr->addr > saddr) 716 state = SKIP_O7; 717 } 718 else if (state == USE_O7) 719 { 720 state = NONE; 721 if (cur_instr->flags & PCInvlFlag) 722 continue; 723 } 724 725 DbeLine *dbeline = (DbeLine*) cur_instr->convertto (Histable::LINE); 726 if (cur_instr->func->usrfunc) 727 { 728 dbeline = dbeline->sourceFile->find_dbeline (cur_instr->func->usrfunc, dbeline->lineno); 729 omppcs->append (dbeline); 730 } 731 else if (dbeline->lineno > 0) 732 omppcs->append (dbeline); 733 else 734 omppcs->append (cur_instr); 735 if (dbeline->is_set (DbeLine::OMPPRAGMA) && 736 frp->omp_state == OMP_WORK_STATE) 737 dDscr->setValue (PROP_OMPSTATE, idx, OMP_OVHD_STATE); 738 ompxpcs->append (cur_instr); 739 leaf = false; 740 } 741 if (frp->omptruncated == SP_TRUNC_STACK_MARKER) 742 { 743 func = dbeSession->getSpecialFunction (DbeSession::TruncatedStackFunc); 744 DbeInstr *instr = func->find_dbeinstr (0, 0); 745 omppcs->append (instr); 746 ompxpcs->append (instr); 747 } 748 else if (frp->omptruncated == SP_FAILED_UNWIND_MARKER) 749 { 750 func = dbeSession->getSpecialFunction (DbeSession::FailedUnwindFunc); 751 DbeInstr *instr = func->find_dbeinstr (0, 0); 752 omppcs->append (instr); 753 ompxpcs->append (instr); 754 } 755 756 // User model call stack 757 node = (CallStackNode*) add_stack (omppcs); 758 dDscr->setObjValue (PROP_USTACK, idx, node); 759 delete omppcs; 760 761 // Expert call stack 762 node = (CallStackNode*) add_stack (ompxpcs); 763 dDscr->setObjValue (PROP_XSTACK, idx, node); 764 delete ompxpcs; 765 dDscr->setObjValue (PROP_JTHREAD, idx, JTHREAD_DEFAULT); 766 return; 767 } 768 769 // OpenMP 2.5 stacks 770 if (frp->omp_cprid || frp->omp_state) 771 { 772 DataView *dview = experiment->getOpenMPdata (); 773 if (dview == NULL) 774 { 775 // It appears we may get OMP_SERL_STATE from a passive libmtsk 776 dDscr->setObjValue (PROP_JTHREAD, idx, JTHREAD_DEFAULT); 777 return; 778 } 779 if (dview->getDataDescriptor () == dDscr) 780 { 781 // Don't process the user stack for OpenMP fork events yet 782 dDscr->setObjValue (PROP_USTACK, idx, (void*) NULL); 783 dDscr->setObjValue (PROP_JTHREAD, idx, JTHREAD_DEFAULT); 784 return; 785 } 786 Vector<Histable*> *omppcs = new Vector<Histable*>(stack_size); 787 788 // Construct OMP user stack 789 // Find the bottom frame 790 int btm = 0; 791 switch (frp->omp_state) 792 { 793 case OMP_IDLE_STATE: 794 { 795 Function *func = dbeSession->get_OMP_Function (frp->omp_state); 796 omppcs->append (func->find_dbeinstr (0, 0)); 797 // XXX: workaround for inconsistency between OMP_IDLE_STATE 798 // and omp_cprid != 0 799 frp->omp_cprid = 0; 800 btm = natpcs->size (); 801 break; 802 } 803 case OMP_RDUC_STATE: 804 case OMP_IBAR_STATE: 805 case OMP_EBAR_STATE: 806 case OMP_LKWT_STATE: 807 case OMP_CTWT_STATE: 808 case OMP_ODWT_STATE: 809 case OMP_ATWT_STATE: 810 { 811 Function *func = dbeSession->get_OMP_Function (frp->omp_state); 812 omppcs->append (func->find_dbeinstr (0, 0)); 813 bool inOMP = false; 814 for (btm = 0; btm < natpcs->size (); btm++) 815 { 816 LoadObject *lo = natpcs->fetch (btm)->func->module->loadobject; 817 if (!inOMP) 818 { 819 if (lo->flags & SEG_FLAG_OMP) 820 inOMP = true; 821 } 822 else if (!(lo->flags & SEG_FLAG_OMP)) 823 break; 824 } 825 break; 826 } 827 case OMP_NO_STATE: 828 case OMP_WORK_STATE: 829 case OMP_SERL_STATE: 830 default: 831 break; 832 } 833 834 // Find the top frame 835 int top = -1; 836 switch (frp->omp_state) 837 { 838 case OMP_IDLE_STATE: 839 break; 840 default: 841 { 842 dview->sort (PROP_CPRID); 843 Datum tval; 844 tval.setUINT64 (frp->omp_cprid); 845 long pidx = dview->getIdxByVals (&tval, DataView::REL_EQ); 846 if (pidx < 0) // No parent. Process the entire nat_stack 847 top = natpcs->size () - 1; 848 else 849 { 850 uint32_t pthrid = (uint32_t) dview->getIntValue (PROP_THRID, pidx); 851 if (thrid != pthrid) 852 { 853 // Parent is on a different stack. 854 // Process the entire nat_stack. Skip libthread. 855 for (top = natpcs->size () - 1; top >= 0; top--) 856 { 857 DbeInstr *instr = natpcs->fetch (top); 858 if (instr->func->module->loadobject->flags & SEG_FLAG_OMP) 859 break; 860 } 861 if (top < 0) // None found. May be incomplete call stack 862 top = natpcs->size () - 1; 863 } 864 else 865 { 866 // Parent is on the same stack. Find match. 867 top = natpcs->size () - 1; 868 void *pnat_stack = dview->getObjValue (PROP_MSTACK, pidx); 869 Vector<Histable*> *ppcs = getStackPCs (pnat_stack); 870 for (int ptop = ppcs->size () - 1; top >= 0 && ptop >= 0; 871 top--, ptop--) 872 { 873 if (natpcs->fetch (top) != ppcs->fetch (ptop)) 874 break; 875 } 876 delete ppcs; 877 } 878 } 879 // If no frames are found for Barrier/Reduction save at least one 880 if ((frp->omp_state == OMP_RDUC_STATE 881 || frp->omp_state == OMP_IBAR_STATE 882 || frp->omp_state == OMP_EBAR_STATE) 883 && top < btm && btm < natpcs->size ()) 884 top = btm; 885 } 886 } 887 for (int i = btm; i <= top; ++i) 888 { 889 DbeInstr *instr = natpcs->fetch (i); 890 if (instr->func->module->loadobject->flags & SEG_FLAG_OMP) 891 continue; // Skip all frames from libmtsk 892 omppcs->append (instr); 893 } 894 node = find_preg_stack (frp->omp_cprid); 895 while (node != root) 896 { 897 omppcs->append (node->instr); 898 node = node->ancestor; 899 } 900 node = (CallStackNode *) add_stack (omppcs); 901 dDscr->setObjValue (PROP_USTACK, idx, node); 902 delete omppcs; 903 dDscr->setObjValue (PROP_JTHREAD, idx, JTHREAD_DEFAULT); 904 return; 905 } 906 907 // Construct Java user stack 908 add_stack_java (dDscr, idx, frp, tstamp, thrid, natpcs, true, NULL); 909 } 910 911 // adjustment of leafPC/eventVA for XHWC packets with candidate eventPC 912 // Called from CallStack during initial processing of the events 913 DbeInstr * 914 CallStackP::adjustEvent (DbeInstr *leafPC, DbeInstr *candPC, Vaddr &eventVA, 915 int abst_type) 916 { 917 // increment counter of dataspace events 918 experiment->dsevents++; 919 bool isPrecise; 920 if (abst_type == ABST_EXACT_PEBS_PLUS1) 921 isPrecise = true; 922 else if (abst_type == ABST_EXACT) 923 isPrecise = true; 924 else 925 isPrecise = false; 926 927 if (isPrecise) 928 /* precise backtracking */ 929 /* assume within 1 instruction of leaf (this could be checked here) */ 930 // no change to eventVA or candPC 931 return candPC; 932 933 Function *func = leafPC->func; 934 unsigned int bt_entries = func->module->bTargets.size (); 935 DbeInstr *bestPC = NULL; 936 937 // bt == branch target (potential destination of a branch 938 if (bt_entries == 0) 939 { // no XHWCprof info for this module 940 // increment counter 941 experiment->dsnoxhwcevents++; 942 943 // see if event is to be processed anyway 944 if (!dbeSession->check_ignore_no_xhwcprof ()) 945 { 946 // Don't ignore error 947 // XXX -- set error code in event VA -- replace with other mechanism 948 if (eventVA > ABS_CODE_RANGE) 949 eventVA = ABS_NULL; 950 eventVA |= ABS_NO_CTI_INFO; // => effective address can't be validated 951 bestPC = leafPC; // => no PC correction possible 952 } 953 else 954 bestPC = candPC; // assume the event valid 955 } 956 else 957 { 958 // we have the info to verify the backtracking 959 target_info_t *bt; 960 int bt_entry = bt_entries; 961 uint64_t leafPC_offset = func->img_offset + leafPC->addr; 962 uint64_t candPC_offset = candPC->func->img_offset + candPC->addr; 963 do 964 { 965 bt_entry--; 966 bt = func->module->bTargets.fetch (bt_entry); 967 /* bts seem to be sorted by offset, smallest to largest */ 968 } 969 while (bt_entry > 0 && bt->offset > leafPC_offset); 970 /* if bt_entry == 0, all items have been checked */ 971 972 if (bt->offset > leafPC_offset) 973 { /* XXXX isn't is possible that all bt's are after leafPC_offset? */ 974 bestPC = leafPC; // actual event PC can't be determined 975 if (eventVA > ABS_CODE_RANGE) 976 eventVA = ABS_NULL; 977 eventVA |= ABS_INFO_FAILED; // effective address can't be validated 978 } 979 else if (bt->offset > candPC_offset) 980 { 981 // use synthetic PC corresponding to bTarget 982 bestPC = func->find_dbeinstr (PCTrgtFlag, bt->offset - func->img_offset); 983 if (eventVA > ABS_CODE_RANGE) 984 eventVA = ABS_NULL; 985 eventVA |= ABS_CTI_TARGET; // effective address can't be validated 986 } 987 else 988 bestPC = candPC; // accept provided virtual address as valid 989 } 990 return bestPC; 991 } 992 993 void * 994 CallStackP::add_stack_d (Vector<Histable*> *objs) 995 { 996 // objs: root..leaf 997 // Reverse objs 998 for (int i = 0, j = objs->size () - 1; i < j; ++i, --j) 999 objs->swap (i, j); 1000 return add_stack (objs); 1001 } 1002 1003 CallStackNode::CallStackNode (CallStackNode *_ancestor, Histable *_instr) 1004 { 1005 ancestor = _ancestor; 1006 instr = _instr; 1007 alt_node = NULL; 1008 } 1009 1010 CallStackNode::~CallStackNode () { } 1011 1012 bool 1013 CallStackNode::compare (long start, long end, Vector<Histable*> *objs, CallStackNode *mRoot) 1014 { 1015 CallStackNode *p = this; 1016 for (long i = start; i < end; i++, p = p->get_ancestor ()) 1017 if (p == NULL || p->get_instr () != objs->get (i)) 1018 return false; 1019 return p == mRoot; 1020 } 1021 1022 void 1023 CallStackNode::dump () 1024 { 1025 const char *s = ""; 1026 int sz = 0; 1027 for (CallStackNode *p = this; p; p = p->get_ancestor ()) 1028 { 1029 fprintf (stderr, NTXT ("%.*s 0x%08llx id=0x%08llx %s\n"), sz, s, 1030 (long long) p, (long long) p->get_instr ()->id, 1031 STR (p->get_instr ()->get_name ())); 1032 s = "-"; 1033 sz += 1; 1034 } 1035 } 1036 1037 long total_calls_add_stack, total_stacks, total_nodes, call_stack_size[201]; 1038 1039 void * 1040 CallStackP::add_stack (Vector<Histable*> *objs) 1041 { 1042 // objs: leaf..root 1043 uint64_t hash = objs->size (); 1044 for (long i = objs->size () - 1; i >= 0; --i) 1045 hash ^= (unsigned long long) objs->get (i); 1046 1047 uint64_t key = hash ? hash : 1; 1048 CallStackNode *node = cstackMap->get (key); 1049 #ifdef DEBUG 1050 if (DUMP_CALL_STACK) 1051 { 1052 total_calls_add_stack++; 1053 call_stack_size[objs->size () > 200 ? 200 : objs->size ()]++; 1054 Dprintf (DUMP_CALL_STACK, 1055 "add_stack: %lld size=%lld key=0x%08llx cashNode=0x%08llx\n", 1056 (long long) total_calls_add_stack, (long long) objs->size (), 1057 (long long) key, (long long) node); 1058 for (long i = 0, sz = VecSize (objs); i < sz; i++) 1059 Dprintf (DUMP_CALL_STACK, " add_stack: %.*s 0x%08llx id=0x%08llx %s\n", 1060 (int) i, NTXT (" "), (long long) objs->get (i), 1061 (long long) objs->get (i)->id, STR (objs->get (i)->get_name ())); 1062 } 1063 #endif 1064 if (node && node->compare (0, objs->size (), objs, root)) 1065 { 1066 Dprintf (DUMP_CALL_STACK, NTXT ("STACK FOUND: key=0x%08llx 0x%08llx id=0x%08llx %s\n"), 1067 (long long) key, (long long) node, 1068 (long long) node->get_instr ()->id, 1069 STR (node->get_instr ()->get_name ())); 1070 return node; 1071 } 1072 node = root; 1073 for (long i = objs->size () - 1; i >= 0; i--) 1074 { 1075 Histable *instr = objs->get (i); 1076 int old_count = node->count; 1077 int left; 1078 CallStackNode *nd = node->find (instr, &left); 1079 if (nd) 1080 { 1081 node = nd; 1082 continue; 1083 } 1084 cstackLock->aquireLock (); // Use one lock for all nodes 1085 // node->aquireLock(); 1086 if (old_count != node->count) 1087 { 1088 nd = node->find (instr, &left); 1089 if (nd) 1090 { // the other thread has created this node 1091 cstackLock->releaseLock (); 1092 // node->releaseLock(); 1093 node = nd; 1094 continue; 1095 } 1096 } 1097 // New Call Stack 1098 total_stacks++; 1099 nd = node; 1100 CallStackNode *first = NULL; 1101 do 1102 { 1103 CallStackNode *anc = node; 1104 total_nodes++; 1105 node = new_Node (anc, objs->get (i)); 1106 if (first) 1107 anc->append (node); 1108 else 1109 first = node; 1110 } 1111 while (i-- > 0); 1112 nd->insert (left, first); 1113 cstackLock->releaseLock (); 1114 // nd->releaseLock(); 1115 break; 1116 } 1117 cstackMap->put (key, node); 1118 if (DUMP_CALL_STACK) 1119 node->dump (); 1120 return node; 1121 } 1122 1123 CallStackNode * 1124 CallStackP::get_node (int n) 1125 { 1126 if (n < nodes) 1127 return &chunks[n / CHUNKSZ][n % CHUNKSZ]; 1128 return NULL; 1129 } 1130 1131 /* 1132 * Debugging methods 1133 */ 1134 void 1135 CallStackP::print (FILE *fd) 1136 { 1137 FILE *f = (fd == NULL ? stderr : fd); 1138 fprintf (f, GTXT ("CallStack: nodes = %d\n\n"), nodes); 1139 int maxdepth = 0; 1140 int maxwidth = 0; 1141 const char *t; 1142 char *n; 1143 for (int i = 0; i < nodes; i++) 1144 { 1145 CallStackNode *node = &chunks[i / CHUNKSZ][i % CHUNKSZ]; 1146 Histable *instr = node->instr; 1147 if (instr->get_type () == Histable::LINE) 1148 { 1149 t = "L"; 1150 n = ((DbeLine *) instr)->func->get_name (); 1151 } 1152 else if (instr->get_type () == Histable::INSTR) 1153 { 1154 t = "I"; 1155 n = ((DbeInstr *) instr)->func->get_name (); 1156 } 1157 else 1158 { 1159 t = "O"; 1160 n = instr->get_name (); 1161 } 1162 long long addr = (long long) instr->get_addr (); 1163 fprintf (f, GTXT ("node: 0x%016llx anc: 0x%016llx -- 0x%016llX: %s %s\n"), 1164 (unsigned long long) node, (unsigned long long) node->ancestor, 1165 addr, t, n); 1166 } 1167 fprintf (f, GTXT ("md = %d, mw = %d\n"), maxdepth, maxwidth); 1168 } 1169 1170 /* 1171 * Static CallStack methods 1172 */ 1173 CallStack * 1174 CallStack::getInstance (Experiment *exp) 1175 { 1176 return new CallStackP (exp); 1177 } 1178 1179 int 1180 CallStack::stackSize (void *stack) 1181 { 1182 CallStackNode *node = (CallStackNode *) stack; 1183 int sz = 0; 1184 for (; node; node = node->ancestor) 1185 sz++; 1186 return sz - 1; // don't count the root node 1187 } 1188 1189 Histable * 1190 CallStack::getStackPC (void *stack, int n) 1191 { 1192 CallStackNode *node = (CallStackNode *) stack; 1193 while (n-- && node) 1194 node = node->ancestor; 1195 if (node == NULL) 1196 return dbeSession->get_Unknown_Function ()->find_dbeinstr (PCInvlFlag, 0); 1197 return node->instr; 1198 } 1199 1200 Vector<Histable*> * 1201 CallStack::getStackPCs (void *stack, bool get_hide_stack) 1202 { 1203 Vector<Histable*> *res = new Vector<Histable*>; 1204 CallStackNode *node = (CallStackNode *) stack; 1205 if (get_hide_stack && node->alt_node != NULL) 1206 node = node->alt_node; 1207 while (node && node->ancestor) 1208 { // skip the root node 1209 res->append (node->instr); 1210 node = node->ancestor; 1211 } 1212 return res; 1213 } 1214 1215 int 1216 CallStack::compare (void *stack1, void *stack2) 1217 { 1218 // Quick comparision 1219 if (stack1 == stack2) 1220 return 0; 1221 1222 CallStackNode *node1 = (CallStackNode *) stack1; 1223 CallStackNode *node2 = (CallStackNode *) stack2; 1224 while (node1 != NULL && node2 != NULL) 1225 { 1226 //to keep the result const on different platforms 1227 //we use instr->id instead of instr 1228 if (node1->instr->id < node2->instr->id) 1229 return -1; 1230 else if (node1->instr->id > node2->instr->id) 1231 return 1; 1232 node1 = node1->ancestor; 1233 node2 = node2->ancestor; 1234 } 1235 if (node1 == NULL && node2 != NULL) 1236 return -1; 1237 else if (node1 != NULL && node2 == NULL) 1238 return 1; 1239 else 1240 return 0; 1241 } 1242 1243 // LIBRARY VISIBILITY 1244 1245 void 1246 CallStack::setHideStack (void *stack, void *hideStack) 1247 { 1248 CallStackNode *hNode = (CallStackNode *) stack; 1249 hNode->alt_node = (CallStackNode *) hideStack; 1250 } 1251