1 /* Copyright (C) 2021 Free Software Foundation, Inc. 2 Contributed by Oracle. 3 4 This file is part of GNU Binutils. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, 51 Franklin Street - Fifth Floor, Boston, 19 MA 02110-1301, USA. */ 20 21 #include "config.h" 22 #include <alloca.h> 23 #include <errno.h> 24 #include <signal.h> 25 #include <ucontext.h> 26 #include <stdlib.h> /* exit() */ 27 #include <sys/param.h> 28 #include <sys/utsname.h> /* struct utsname */ 29 #include <sys/resource.h> 30 #include <sys/syscall.h> /* system call fork() */ 31 32 #include "gp-defs.h" 33 #include "collector.h" 34 #include "descendants.h" 35 #include "gp-experiment.h" 36 #include "memmgr.h" 37 #include "cc_libcollector.h" 38 #include "tsd.h" 39 40 /* TprintfT(<level>,...) definitions. Adjust per module as needed */ 41 #define DBG_LT0 0 // for high-level configuration, unexpected errors/warnings 42 #define DBG_LT1 1 // for configuration details, warnings 43 #define DBG_LT2 2 44 #define DBG_LT3 3 45 46 typedef unsigned long ulong_t; 47 48 extern char **environ; 49 extern void __collector_close_experiment (); 50 extern int __collector_set_size_limit (char *par); 51 52 /* ------- internal function prototypes ---------- */ 53 CollectorModule __collector_register_module (ModuleInterface *modint); 54 static void write_sample (char *name); 55 static const char *__collector_get_params (); 56 static const char *__collector_get_expdir (); 57 static FrameInfo __collector_getUserCtx (CollectorModule modl, HiResTime ts, int mode, void *arg); 58 static FrameInfo __collector_getUID1 (CM_Array *arg); 59 static int __collector_writeMetaData (CollectorModule modl, char *format, ...); 60 static int __collector_writeDataRecord (CollectorModule modl, struct Common_packet *pckt); 61 static int __collector_writeDataPacket (CollectorModule modl, struct CM_Packet *pckt); 62 static void *allocCSize (struct Heap*, unsigned, int); 63 static void freeCSize (struct Heap*, void*, unsigned); 64 static void *allocVSize (struct Heap*, unsigned); 65 static void *reallocVSize (struct Heap*, void*, unsigned); 66 67 static int collector_create_expr_dir (const char *new_exp_name); 68 static int collector_create_expr_dir_lineage (const char *parent_exp_name); 69 static int collector_exp_dir_append_x (int linenum, const char *parent_exp_name); 70 static int collector_tail_init (const char *parent_exp_name); 71 static int log_open (); 72 static void log_header_write (sp_origin_t origin); 73 static void log_pause (); 74 static void log_resume (); 75 static void fs_warn (); 76 static void log_close (); 77 static void get_progspec (char *cmdline, int tmp_sz, char *progname, int sz); 78 static void sample_handler (int, siginfo_t*, void*); 79 static int sample_set_interval (char *); 80 static int set_duration (char *); 81 static int sample_set_user_sig (char *); 82 static void pause_handler (int, siginfo_t*, void*); 83 static int pause_set_user_sig (char *); 84 static int set_user_sig_action (char*); 85 static void ovw_open (); 86 static hrtime_t ovw_write (); 87 88 /* ------- global data controlling the collector's behavior -------- */ 89 90 static CollectorInterface collector_interface ={ 91 __collector_register_module, /* registerModule */ 92 __collector_get_params, /* getParams */ 93 __collector_get_expdir, /* getExpDir */ 94 __collector_log_write, /* writeLog */ 95 __collector_getUserCtx, /* getFrameInfo */ 96 __collector_getUID1, /* getUID */ 97 __collector_getUID, /* getUID2 */ 98 __collector_getStackTrace, /* getStackTrace */ 99 __collector_writeMetaData, /* writeMetaData */ 100 __collector_writeDataRecord, /* writeDataRecord */ 101 __collector_writeDataPacket, /* writeDataPacket */ 102 write_sample, /* write_sample */ 103 get_progspec, /* get_progspec */ 104 __collector_open_experiment, /* open_experiment */ 105 NULL, /* getHiResTime */ 106 __collector_newHeap, /* newHeap */ 107 __collector_deleteHeap, /* deleteHeap */ 108 allocCSize, /* allocCSize */ 109 freeCSize, /* freeCSize */ 110 allocVSize, /* allocVSize */ 111 reallocVSize, /* reallocVSize */ 112 __collector_tsd_create_key, /* createKey */ 113 __collector_tsd_get_by_key, /* getKey */ 114 __collector_dlog /* writeDebugInfo */ 115 }; 116 117 #define MAX_MODULES 32 118 static ModuleInterface *modules[MAX_MODULES]; 119 static int modules_st[MAX_MODULES]; 120 static void *modules_hndl[MAX_MODULES]; 121 static volatile int nmodules = 0; 122 123 /* flag set non-zero, if data collected implies a filesystem warning is appropriate */ 124 static int fs_matters = 0; 125 static const char *collector_params = NULL; 126 static const char *project_home = NULL; 127 Heap *__collector_heap = NULL; 128 int __collector_no_threads; 129 int __collector_libthread_T1 = -1; 130 131 static volatile int collector_paused = 0; 132 133 int __collector_tracelevel = -1; 134 static int collector_debug_opt = 0; 135 136 hrtime_t __collector_next_sample = 0; 137 int __collector_sample_period = 0; /* if non-zero, periodic sampling is enabled */ 138 139 hrtime_t __collector_delay_start = 0; /* if non-zero, delay before starting data */ 140 hrtime_t __collector_terminate_time = 0; /* if non-zero, fixed duration run */ 141 142 static collector_mutex_t __collector_glob_lock = COLLECTOR_MUTEX_INITIALIZER; 143 static collector_mutex_t __collector_open_guard = COLLECTOR_MUTEX_INITIALIZER; 144 static collector_mutex_t __collector_close_guard = COLLECTOR_MUTEX_INITIALIZER; 145 static collector_mutex_t __collector_sample_guard = COLLECTOR_MUTEX_INITIALIZER; 146 static collector_mutex_t __collector_suspend_guard = COLLECTOR_MUTEX_INITIALIZER; 147 static collector_mutex_t __collector_resume_guard = COLLECTOR_MUTEX_INITIALIZER; 148 char __collector_exp_dir_name[MAXPATHLEN + 1] = ""; /* experiment directory */ 149 int __collector_size_limit = 0; 150 151 static char *archive_mode = NULL; 152 153 volatile sp_state_t __collector_expstate = EXP_INIT; 154 static int exp_origin = SP_ORIGIN_LIBCOL_INIT; 155 static int exp_open = 0; 156 int __collector_exp_active = 0; 157 static int paused_when_suspended = 0; 158 static int exp_initted = 0; 159 static char exp_progspec[_POSIX_ARG_MAX + 1]; /* program cmdline. includes args */ 160 static char exp_progname[_POSIX_ARG_MAX + 1]; /* program name == argv[0] */ 161 162 hrtime_t __collector_start_time = 0; 163 static time_t start_sec_time = 0; 164 165 /* Sample related data */ 166 static int sample_installed = 0; /* 1 if the sample signal handler installed */ 167 static int sample_mode = 0; /* dynamically turns sample record writing on/off */ 168 static int sample_number = 0; /* index of the current sample record */ 169 static struct sigaction old_sample_handler; 170 int __collector_sample_sig = -1; /* user-specified sample signal */ 171 int __collector_sample_sig_warn = 0; /* non-zero if warning already given */ 172 173 /* Pause/resume related data */ 174 static struct sigaction old_pause_handler; 175 int __collector_pause_sig = -1; /* user-specified pause signal */ 176 int __collector_pause_sig_warn = 0; /* non-zero if warning already given */ 177 178 static struct sigaction old_close_handler; 179 static struct sigaction old_exit_handler; 180 181 /* Experiment files */ 182 static char ovw_name[MAXPATHLEN]; /* Overview data file name */ 183 184 /* macro to convert a timestruc to hrtime_t */ 185 #define ts2hrt(x) ((hrtime_t)(x).tv_sec*NANOSEC + (hrtime_t)(x).tv_nsec) 186 187 static void 188 init_tracelevel () 189 { 190 #if DEBUG 191 char *s = CALL_UTIL (getenv)("SP_COLLECTOR_TRACELEVEL"); 192 if (s != NULL) 193 __collector_tracelevel = CALL_UTIL (atoi)(s); 194 TprintfT (DBG_LT0, "collector: SP_COLLECTOR_TRACELEVEL=%d\n", __collector_tracelevel); 195 s = CALL_UTIL (getenv)("SP_COLLECTOR_DEBUG"); 196 if (s != NULL) 197 collector_debug_opt = CALL_UTIL (atoi)(s) & ~(SP_DUMP_TIME | SP_DUMP_FLAG); 198 #endif 199 } 200 201 static CollectorInterface * 202 get_collector_interface () 203 { 204 if (collector_interface.getHiResTime == NULL) 205 collector_interface.getHiResTime = __collector_gethrtime; 206 return &collector_interface; 207 } 208 209 /* 210 * __collector_module_init is an alternate method to initialize 211 * dynamic collector modules (er_heap, er_sync, er_iotrace, er_mpi, tha). 212 * Every module that needs to register itself with libcollector 213 * before the experiment is open implements its own global 214 * __collector_module_init and makes sure the next one is called. 215 */ 216 static void 217 collector_module_init (CollectorInterface *col_intf) 218 { 219 int nmodules = 0; 220 221 ModuleInitFunc next_init = (ModuleInitFunc) dlsym (RTLD_DEFAULT, "__collector_module_init"); 222 if (next_init != NULL) 223 { 224 nmodules++; 225 next_init (col_intf); 226 } 227 TprintfT (DBG_LT1, "collector_module_init: %d modules\n", nmodules); 228 } 229 230 /* Routines concerned with general experiment start and stop */ 231 232 /* initialization -- init section routine -- called when libcollector loaded */ 233 static void collector_init () __attribute__ ((constructor)); 234 235 static void 236 collector_init () 237 { 238 if (__collector_util_init () != 0) 239 /* we can't do anything without various utility functions */ 240 abort (); 241 init_tracelevel (); 242 243 /* 244 * Unconditionally install the SIGPROF handler 245 * to process signals originated in dtracelets. 246 */ 247 __collector_sigprof_install (); 248 249 /* Initialize all preloaded modules */ 250 collector_module_init (get_collector_interface ()); 251 252 /* determine experiment name */ 253 char *exp = CALL_UTIL (getenv)("SP_COLLECTOR_EXPNAME"); 254 if ((exp == NULL) || (CALL_UTIL (strlen)(exp) == 0)) 255 { 256 TprintfT (DBG_LT0, "collector_init: SP_COLLECTOR_EXPNAME undefined - no experiment to start\n"); 257 /* not set -- no experiment to run */ 258 return; 259 } 260 else 261 TprintfT (DBG_LT1, "collector_init: found SP_COLLECTOR_EXPNAME = %s\n", exp); 262 263 /* determine the data descriptor for the experiment */ 264 char *params = CALL_UTIL (getenv)("SP_COLLECTOR_PARAMS"); 265 if (params == NULL) 266 { 267 TprintfT (0, "collector_init: SP_COLLECTOR_PARAMS undefined - no experiment to start\n"); 268 return; 269 } 270 271 /* now do the real open of the experiment */ 272 if (__collector_open_experiment (exp, params, SP_ORIGIN_LIBCOL_INIT)) 273 { 274 TprintfT (0, "collector_init: __collector_open_experiment failed\n"); 275 /* experiment open failed, close it */ 276 __collector_close_experiment (); 277 return; 278 } 279 return; 280 } 281 282 CollectorModule 283 __collector_register_module (ModuleInterface *modint) 284 { 285 TprintfT (DBG_LT1, "collector: module %s calls for registration.\n", 286 modint->description == NULL ? "(null)" : modint->description); 287 if (modint == NULL) 288 return COLLECTOR_MODULE_ERR; 289 if (nmodules >= MAX_MODULES) 290 return COLLECTOR_MODULE_ERR; 291 if (modint->initInterface && 292 modint->initInterface (get_collector_interface ())) 293 return COLLECTOR_MODULE_ERR; 294 int idx = nmodules++; 295 modules[idx] = modint; 296 modules_st[idx] = 0; 297 298 if (exp_open && modint->openExperiment) 299 { 300 modules_st[idx] = modint->openExperiment (__collector_exp_dir_name); 301 if (modules_st[idx] == COL_ERROR_NONE && modules[idx]->description != NULL) 302 { 303 modules_hndl[idx] = __collector_create_handle (modules[idx]->description); 304 if (modules_hndl[idx] == NULL) 305 modules_st[idx] = -1; 306 } 307 } 308 if (__collector_exp_active && collector_paused == 0 && 309 modint->startDataCollection && modules_st[idx] == 0) 310 modint->startDataCollection (); 311 TprintfT (DBG_LT1, "collector: module %s (%d) registered.\n", 312 modint->description == NULL ? "(null)" : modint->description, idx); 313 return (CollectorModule) idx; 314 } 315 316 static const char * 317 __collector_get_params () 318 { 319 return collector_params; 320 } 321 322 static const char * 323 __collector_get_expdir () 324 { 325 return __collector_exp_dir_name; 326 } 327 328 static FrameInfo 329 __collector_getUserCtx (CollectorModule modl, HiResTime ts, int mode, void *arg) 330 { 331 return __collector_get_frame_info (ts, mode, arg); 332 } 333 334 static FrameInfo 335 __collector_getUID1 (CM_Array *arg) 336 { 337 return __collector_getUID (arg, (FrameInfo) 0); 338 } 339 340 static int 341 __collector_writeMetaData (CollectorModule modl, char *format, ...) 342 { 343 if (modl < 0 || modl >= nmodules || modules[modl]->description == NULL) 344 { 345 TprintfT (DBG_LT0, "__collector_writeMetaData(): bad module: %d\n", modl); 346 return 1; 347 } 348 char fname[MAXPATHLEN + 1]; 349 CALL_UTIL (strlcpy)(fname, __collector_exp_dir_name, sizeof (fname)); 350 CALL_UTIL (strlcat)(fname, "/metadata.", sizeof (fname)); 351 CALL_UTIL (strlcat)(fname, modules[modl]->description, sizeof (fname)); 352 CALL_UTIL (strlcat)(fname, ".xml", sizeof (fname)); 353 int fd = CALL_UTIL (open)(fname, O_CREAT | O_WRONLY | O_APPEND, 354 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); 355 if (fd < 0) 356 { 357 TprintfT (DBG_LT0, "__collector_writeMetaData(): can't open file: %s\n", fname); 358 return 1; 359 } 360 char buf[1024]; 361 char *bufptr = buf; 362 va_list va; 363 va_start (va, format); 364 int sz = __collector_xml_vsnprintf (bufptr, sizeof (buf), format, va); 365 va_end (va); 366 367 if (sz >= sizeof (buf)) 368 { 369 /* Allocate a new buffer */ 370 sz += 1; /* add the terminating null byte */ 371 bufptr = (char*) alloca (sz); 372 373 va_start (va, format); 374 sz = __collector_xml_vsnprintf (bufptr, sz, format, va); 375 va_end (va); 376 } 377 CALL_UTIL (write)(fd, bufptr, sz); 378 CALL_UTIL (close)(fd); 379 return COL_ERROR_NONE; 380 } 381 382 /* check that the header fields are filled-in, and then call __collector_writeDataPacket */ 383 static int 384 __collector_writeDataRecord (CollectorModule modl, struct Common_packet *pckt) 385 { 386 return __collector_write_record (modules_hndl[modl], pckt); 387 } 388 389 static int 390 __collector_writeDataPacket (CollectorModule modl, struct CM_Packet *pckt) 391 { 392 return __collector_write_packet (modules_hndl[modl], pckt); 393 } 394 395 static void * 396 allocCSize (struct Heap *heap, unsigned sz, int log) 397 { 398 return __collector_allocCSize (heap ? heap : __collector_heap, sz, log); 399 } 400 401 static void 402 freeCSize (struct Heap *heap, void *ptr, unsigned sz) 403 { 404 __collector_freeCSize (heap ? heap : __collector_heap, ptr, sz); 405 } 406 407 static void * 408 allocVSize (struct Heap *heap, unsigned sz) 409 { 410 return __collector_allocVSize (heap ? heap : __collector_heap, sz); 411 } 412 413 static void * 414 reallocVSize (struct Heap *heap, void *ptr, unsigned sz) 415 { 416 return __collector_reallocVSize (heap ? heap : __collector_heap, ptr, sz); 417 } 418 419 static time_t 420 get_gm_time (struct tm *tp) 421 { 422 /* 423 Note that glibc contains a function of the same purpose named `timegm'. 424 But obviously, it is not universally available. 425 426 Some implementations of mktime return -1 for the nonexistent localtime hour 427 at the beginning of DST. In this event, use 'mktime(tm - 1hr) + 3600'. 428 nonexistent 429 tm_isdst is set to 0 to force mktime to introduce a consistent offset 430 (the non DST offset) since tm and tm+o might be on opposite sides of a DST change. 431 432 Schematically: 433 mktime(tm) --> t+o 434 gmtime_r(t+o) --> tm+o 435 mktime(tm+o) --> t+2o 436 t = t+o - (t+2o - t+o) 437 */ 438 struct tm stm; 439 time_t tl = CALL_UTIL (mktime)(tp); 440 if (tl == -1) 441 { 442 stm = *tp; 443 stm.tm_hour--; 444 tl = CALL_UTIL (mktime)(&stm); 445 if (tl == -1) 446 return -1; 447 tl += 3600; 448 } 449 450 (void) (CALL_UTIL (gmtime_r)(&tl, &stm)); 451 stm.tm_isdst = 0; 452 time_t tb = CALL_UTIL (mktime)(&stm); 453 if (tb == -1) 454 { 455 stm.tm_hour--; 456 tb = CALL_UTIL (mktime)(&stm); 457 if (tb == -1) 458 return -1; 459 tb += 3600; 460 } 461 return (tl - (tb - tl)); 462 } 463 464 static void 465 log_write_event_run () 466 { 467 /* get the gm and local time */ 468 struct tm start_stm; 469 CALL_UTIL (gmtime_r)(&start_sec_time, &start_stm); 470 time_t start_gm_time = get_gm_time (&start_stm); 471 time_t lcl_time = CALL_UTIL (mktime)(&start_stm); 472 __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" time=\"%lld\" tm_zone=\"%lld\"/>\n", 473 SP_JCMD_RUN, 474 (unsigned) (__collector_start_time / NANOSEC), 475 (unsigned) (__collector_start_time % NANOSEC), 476 (long long) start_gm_time, 477 (long long) (lcl_time - start_gm_time)); 478 } 479 480 static void * 481 m_dlopen (const char *filename, int flag) 482 { 483 void *p = dlopen (filename, flag); 484 TprintfT (DBG_LT1, "collector.c: dlopen(%s, %d) returns %p\n", filename, flag, p); 485 return p; 486 } 487 /* real routine to open an experiment 488 * called by collector_init from libcollector init section 489 * called by __collector_start_experiment when a child is forked */ 490 int 491 __collector_open_experiment (const char *exp, const char *params, sp_origin_t origin) 492 { 493 char *s; 494 char *buf = NULL; 495 char *duration_string = NULL; 496 int err; 497 int is_founder = 1; 498 int record_this_experiment = 1; 499 int seen_F_flag = 0; 500 static char buffer[32]; 501 if (exp_open) 502 { 503 /* experiment already opened */ 504 TprintfT (0, "collector: ERROR: Attempt to open opened experiment\n"); 505 return COL_ERROR_EXPOPEN; 506 } 507 __collector_start_time = collector_interface.getHiResTime (); 508 TprintfT (DBG_LT1, "\n\t\t__collector_open_experiment(SP_COLLECTOR_EXPNAME=%s, params=%s, origin=%d); setting start_time\n", 509 exp, params, origin); 510 if (environ) 511 __collector_env_printall ("__collector_open_experiment", environ); 512 else 513 TprintfT (DBG_LT1, "collector_open_experiment found environ == NULL)\n"); 514 515 /* 516 * Recheck sigprof handler 517 * XXXX Bug 18177509 - additional sigprof signal kills target program 518 */ 519 __collector_sigprof_install (); 520 exp_origin = origin; 521 collector_params = params; 522 523 /* Determine which of the three possible threading models: 524 * singlethreaded 525 * multi-LWP (no threads) 526 * multithreaded 527 * is the one the target is actually using. 528 * 529 * we really only need to distinguish between first two 530 * and the third. The thr_main() trick does exactly that. 531 * is the one the target is actually using. 532 * 533 * __collector_no_threads applies to all signal handlers, 534 * and must be set before signal handlers are installed. 535 */ 536 __collector_no_threads = 0; 537 __collector_exp_dir_name[0] = 0; 538 sample_mode = 0; 539 sample_number = 0; 540 541 /* create global heap */ 542 if (__collector_heap == NULL) 543 { 544 __collector_heap = __collector_newHeap (); 545 if (__collector_heap == NULL) 546 { 547 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment COLERROR_NOZMEM 1\n"); 548 return COL_ERROR_NOZMEM; 549 } 550 } 551 //check whether is origin is collect 552 char * envar = CALL_UTIL (getenv)("SP_COLLECTOR_ORIGIN_COLLECT"); 553 TprintfT (DBG_LT1, "__collector_open_experiment SP_COLLECTOR_ORIGIN_COLLECT = '%s'\n", 554 (envar == NULL) ? "NULL" : envar); 555 if (envar) 556 exp_origin = SP_ORIGIN_COLLECT; 557 558 //check if this is the founder process 559 is_founder = getpid (); 560 if (origin != SP_ORIGIN_DBX_ATTACH) 561 { 562 envar = CALL_UTIL (getenv)("SP_COLLECTOR_FOUNDER"); 563 if (envar) 564 is_founder = CALL_UTIL (atoi)(envar); 565 if (is_founder != 0) 566 { 567 if (is_founder != getpid ()) 568 { 569 TprintfT (0, "__collector_open_experiment SP_COLLECTOR_FOUNDER=%d != pid(%d)\n", 570 is_founder, getpid ()); 571 //CALL_UTIL(fprintf)(stderr, "__collector_open_experiment SP_COLLECTOR_FOUNDER=%d != pid(%d); not recording experiment\n", 572 //is_founder, getpid() ); 573 //return COL_ERROR_UNEXP_FOUNDER; 574 is_founder = 0; // Special case (CR 22917352) 575 } 576 /* clear FOUNDER for descendant experiments */ 577 TprintfT (0, "__collector_open_experiment setting SP_COLLECTOR_FOUNDER=0\n"); 578 CALL_UTIL (strlcpy)(buffer, "SP_COLLECTOR_FOUNDER=0", sizeof (buffer)); 579 CALL_UTIL (putenv)(buffer); 580 } 581 } 582 583 /* Set up fork/exec interposition (requires __collector_heap). */ 584 /* Determine if "collect -F" specification enables this subexperiment */ 585 get_progspec (exp_progspec, sizeof (exp_progspec), exp_progname, sizeof (exp_progname)); 586 587 /* convert the returned exp_progname to a basename */ 588 const char * base_name = __collector_strrchr (exp_progname, '/'); 589 if (base_name == NULL) 590 base_name = exp_progname; 591 else 592 base_name = base_name + 1; 593 err = __collector_ext_line_init (&record_this_experiment, exp_progspec, base_name); 594 if (err != COL_ERROR_NONE) 595 { 596 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment COLERROR: %d\n", err); 597 return err; 598 } 599 600 /* Due to the fix of bug 15691122, we need to initialize unwind to make 601 * the function __collector_ext_return_address() work for dlopen interposition. 602 * */ 603 if (!record_this_experiment && !is_founder) 604 { 605 TprintfT (DBG_LT0, "__collector_open_experiment: NOT creating experiment. (is_founder=%d, record=%d)\n", 606 is_founder, record_this_experiment); 607 return collector_tail_init (exp); 608 } 609 TprintfT (DBG_LT0, "__collector_open_experiment: is_founder=%d, record=%d\n", 610 is_founder, record_this_experiment); 611 if (is_founder || origin == SP_ORIGIN_FORK) 612 { 613 CALL_UTIL (strlcpy)(__collector_exp_dir_name, exp, sizeof (__collector_exp_dir_name)); 614 if (origin == SP_ORIGIN_FORK) 615 { /*create exp dir for fork-child*/ 616 if (collector_create_expr_dir (__collector_exp_dir_name)) 617 { 618 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_BADDIR 1: `%s'\n", exp); 619 return COL_ERROR_BADDIR; 620 } 621 } 622 } 623 else 624 {/* founder/fork-child will already have created experiment dir, but exec/combo descendants must do so now */ 625 if (collector_create_expr_dir_lineage (exp)) 626 { 627 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_BADDIR 2: `%s'\n", exp); 628 return COL_ERROR_BADDIR; 629 } 630 static char exp_name_env[MAXPATHLEN + 1]; 631 TprintfT (DBG_LT1, "collector_open_experiment: setting SP_COLLECTOR_EXPNAME to %s\n", __collector_exp_dir_name); 632 CALL_UTIL (snprintf)(exp_name_env, sizeof (exp_name_env), "SP_COLLECTOR_EXPNAME=%s", __collector_exp_dir_name); 633 CALL_UTIL (putenv)(exp_name_env); 634 } 635 /* Check that the name is that of a directory (new structure) */ 636 DIR *expDir = CALL_UTIL (opendir)(__collector_exp_dir_name); 637 if (expDir == NULL) 638 { 639 /* can't open it */ 640 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_BADDIR 3: `%s'\n", exp); 641 return COL_ERROR_BADDIR; 642 } 643 CALL_UTIL (closedir)(expDir); 644 645 if (CALL_UTIL (access)(__collector_exp_dir_name, W_OK)) 646 { 647 TprintfT (0, "collector: ERROR: access error: errno=%d\n", errno); 648 if ((errno == EACCES) || (errno == EROFS)) 649 { 650 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_DIRPERM: `%s'\n", exp); 651 TprintfT (DBG_LT0, "collector: ERROR: experiment directory `%s' is not writeable\n", 652 __collector_exp_dir_name); 653 return COL_ERROR_DIRPERM; 654 } 655 else 656 { 657 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_BADDIR 4: `%s'\n", exp); 658 return COL_ERROR_BADDIR; 659 } 660 } 661 662 /* reset the paused flag */ 663 collector_paused = (origin == SP_ORIGIN_FORK ? paused_when_suspended : 0); 664 665 /* mark the experiment as opened */ 666 __collector_expstate = EXP_OPEN; 667 TprintfT (DBG_LT1, "collector: __collector_expstate->EXP_OPEN\n"); 668 669 /* open the log file */ 670 err = log_open (); 671 if (err != COL_ERROR_NONE) 672 { 673 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_LOG_OPEN\n"); 674 return COL_ERROR_LOG_OPEN; 675 } 676 if (origin != SP_ORIGIN_GENEXP && origin != SP_ORIGIN_KERNEL) 677 log_header_write (origin); 678 679 /* Make a copy of params so that we can modify the string */ 680 int paramsz = CALL_UTIL (strlen)(params) + 1; 681 buf = (char*) alloca (paramsz); 682 if (buf == NULL) 683 { 684 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_ARGS2BIG: %s\n", params); 685 TprintfT (DBG_LT0, "collector: ERROR: experiment parameter `%s' is too long\n", params); 686 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\"/></event>\n", 687 SP_JCMD_CERROR, COL_ERROR_ARGS2BIG); 688 return COL_ERROR_ARGS2BIG; 689 } 690 CALL_UTIL (strlcpy)(buf, params, paramsz); 691 692 /* create directory for archives (if founder) */ 693 char archives[MAXPATHLEN]; 694 CALL_UTIL (snprintf)(archives, MAXPATHLEN, "%s/%s", __collector_exp_dir_name, 695 SP_ARCHIVES_DIR); 696 if (is_founder) 697 { 698 mode_t dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; 699 if ((CALL_UTIL (mkdir)(archives, dmode) != 0) && (errno != EEXIST)) 700 { 701 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_MKDIR: %s: errno = %d\n", archives, errno); 702 TprintfT (0, "collector: ERROR: mkdir(%s) failed: errno = %d\n", archives, errno); 703 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">mkdir(%s): errno=%d</event>\n", 704 SP_JCMD_COMMENT, COL_COMMENT_NONE, archives, errno); 705 /* this is not a fatal error currently */ 706 } 707 else 708 TprintfT (DBG_LT1, "collector: archive mkdir(%s) succeeded\n", archives); 709 } 710 711 /* initialize the segments map and mmap interposition */ 712 if (origin != SP_ORIGIN_GENEXP && origin != SP_ORIGIN_KERNEL) 713 { 714 if ((err = __collector_ext_mmap_install (1)) != COL_ERROR_NONE) 715 { 716 __collector_log_write ("<event kind=\"%s\" id=\"%d\"/></event>\n", SP_JCMD_CERROR, err); 717 return err; 718 } 719 } 720 721 /* open the overview file for sample data */ 722 if (origin != SP_ORIGIN_GENEXP) 723 ovw_open (); 724 725 /* initialize TSD module (note: relies on __collector_heap) */ 726 if (__collector_tsd_init () != 0) 727 { 728 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_TSD_INIT\n"); 729 __collector_log_write ("<event kind=\"%s\" id=\"%d\">TSD could not be initialized</event>\n", SP_JCMD_CERROR, COL_ERROR_TSD_INIT); 730 return COL_ERROR_TSD_INIT; 731 } 732 733 /* experiment is initialized; allow pause/resume/close */ 734 exp_initted = 1; 735 736 // 24935305 should not use SIGPROF if collect -p -t and -S are all off 737 /* (check here if -t or -S is on; -p is checked later) */ 738 if (((params[0] == 't' || params[0] == 'S') && params[1] == ':') 739 || CALL_UTIL (strstr)(params, ";t:") 740 || CALL_UTIL (strstr)(params, ";S:")) 741 { 742 /* set a default time to 100 ms.; use negative value to force setting */ 743 TprintfT (DBG_LT1, "collector: open_experiment setting timer to 100000\n"); 744 __collector_ext_itimer_set (-100000); 745 } 746 747 /* call open for all dynamic modules */ 748 int i; 749 for (i = 0; i < nmodules; i++) 750 { 751 if (modules[i]->openExperiment != NULL) 752 { 753 modules_st[i] = modules[i]->openExperiment (__collector_exp_dir_name); 754 if (modules_st[i] == COL_ERROR_NONE && modules[i]->description != NULL) 755 { 756 modules_hndl[i] = __collector_create_handle (modules[i]->description); 757 if (modules_hndl[i] == NULL) 758 modules_st[i] = -1; 759 } 760 } 761 /* check to see if anyone closed the experiment */ 762 if (!exp_initted) 763 { 764 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_EXP_OPEN\n"); 765 __collector_log_write ("<event kind=\"%s\" id=\"%d\">Experiment closed prematurely</event>\n", SP_JCMD_CERROR, COL_ERROR_EXPOPEN); 766 return COL_ERROR_EXPOPEN; 767 } 768 } 769 770 /* initialize for subsequent stack unwinds */ 771 __collector_ext_unwind_init (1); 772 TprintfT (DBG_LT0, "__collector_open_experiment(); module init done, params=%s\n", 773 buf); 774 775 /* now parse the data descriptor */ 776 /* The parameter string is a series of specifiers, 777 * each of which is of the form: 778 * <key>:<param>; 779 * key is a single letter, the : and ; are mandatory, 780 * and param is a string which may be zero-length, and 781 * which contains any character except a null-byte or ; 782 * param is interpreted by the handler for the particular key 783 */ 784 785 s = buf; 786 787 while (*s) 788 { 789 char *par; 790 char key = *s++; 791 /* ensure that it's followed by a colon */ 792 if (*s++ != ':') 793 { 794 TprintfT (0, "collector: ERROR: parameter %c is not followed by a colon\n", key); 795 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, COL_ERROR_ARGS, params); 796 return COL_ERROR_ARGS; 797 } 798 /* find the semicolon terminator */ 799 par = s; 800 while (*s && (*s != ';')) 801 s++; 802 if (*s != ';') 803 { 804 /* not followed by semicolon */ 805 TprintfT (0, "collector: ERROR: parameter %c:%s is not terminated by a semicolon\n", key, par); 806 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, COL_ERROR_ARGS, params); 807 return COL_ERROR_ARGS; 808 } 809 /* terminate par, and position for next descriptor */ 810 *s++ = 0; 811 812 /* now process that element of the data descriptor */ 813 switch (key) 814 { 815 case 'g': /* g<sig>; */ 816 if ((err = sample_set_user_sig (par)) != COL_ERROR_NONE) 817 { 818 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par); 819 return err; 820 } 821 break; 822 case 'd': /* d<sig>; -or- d<sig>p; */ 823 if ((err = pause_set_user_sig (par)) != COL_ERROR_NONE) 824 { 825 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par); 826 return err; 827 } 828 break; 829 case 'H': 830 m_dlopen ("libgp-heap.so", RTLD_LAZY); /* hack to force .so's constructor to be called (?) */ 831 break; 832 case 's': 833 m_dlopen ("libgp-sync.so", RTLD_LAZY); /* hack to force .so's constructor to be called (?) */ 834 break; 835 case 'i': 836 m_dlopen ("libgp-iotrace.so", RTLD_LAZY); /* hack to force .so's constructor to be called (?) */ 837 break; 838 case 'F': /* F; */ 839 seen_F_flag = 1; 840 TprintfT (DBG_LT0, "__collector_open_experiment: calling __collector_ext_line_install (%s, %s)\n", 841 par, __collector_exp_dir_name); 842 if ((err = __collector_ext_line_install (par, __collector_exp_dir_name)) != COL_ERROR_NONE) 843 { 844 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par); 845 return err; 846 } 847 break; 848 case 'a': /* a; */ 849 archive_mode = __collector_strdup (par); 850 break; 851 case 't': /* t:<expt-duration>; */ 852 duration_string = par; 853 break; 854 case 'S': /* S:<sample-interval>; */ 855 if ((err = sample_set_interval (par)) != COL_ERROR_NONE) 856 { 857 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par); 858 return err; 859 } 860 break; 861 case 'L': /* L:<experiment-size-limit>; */ 862 if ((err = __collector_set_size_limit (par)) != COL_ERROR_NONE) 863 { 864 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par); 865 return err; 866 } 867 break; 868 case 'P': /* P:PROJECT_HOME; */ 869 project_home = __collector_strdup (par); 870 break; 871 case 'h': 872 case 'p': 873 fs_matters = 1; 874 break; 875 case 'Y': 876 err = set_user_sig_action (par); 877 if (err != COL_ERROR_NONE) 878 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par); 879 break; 880 default: 881 /* Ignore unknown parameters; allow them to be handled by modules */ 882 break; 883 } 884 } 885 /* end of data descriptor parsing */ 886 887 if (!seen_F_flag) 888 { 889 char * par = "0"; // This will not happen when collect has no -F option 890 if ((err = __collector_ext_line_install (par, __collector_exp_dir_name)) != COL_ERROR_NONE) 891 { 892 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par); 893 return err; 894 } 895 } 896 897 /* now that we know what data is being collected, we can set the filesystem warning */ 898 fs_warn (); 899 900 // We have to create all tsd keys before __collector_tsd_allocate(). 901 // With the pthreads-based implementation, this might no longer be necessary. 902 // In any case, we still have to create the key before a thread can use it. 903 __collector_ext_gettid_tsd_create_key (); 904 __collector_ext_dispatcher_tsd_create_key (); 905 906 /* allocate tsd for the current thread */ 907 if (__collector_tsd_allocate () != 0) 908 { 909 __collector_log_write ("<event kind=\"%s\" id=\"%d\">TSD allocate failed</event>\n", SP_JCMD_CERROR, COL_ERROR_EXPOPEN); 910 return COL_ERROR_EXPOPEN; 911 } 912 /* init tsd for unwind, called right after __collector_tsd_allocate()*/ 913 __collector_ext_unwind_key_init (1, NULL); 914 915 /* start java attach if suitable */ 916 if (exp_origin == SP_ORIGIN_DBX_ATTACH) 917 __collector_jprofile_start_attach (); 918 start_sec_time = CALL_UTIL (time)(NULL); 919 __collector_start_time = collector_interface.getHiResTime (); 920 TprintfT (DBG_LT0, "\t__collector_open_experiment; resetting start_time\n"); 921 if (duration_string != NULL && (err = set_duration (duration_string)) != COL_ERROR_NONE) 922 { 923 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, duration_string); 924 return err; 925 } 926 927 /* install the common SIGPROF dispatcher (requires TSD) */ 928 if ((err = __collector_ext_dispatcher_install ()) != COL_ERROR_NONE) 929 { 930 __collector_log_write ("<event kind=\"%s\" id=\"%d\"/></event>\n", SP_JCMD_CERROR, err); 931 return err; 932 } 933 934 /* mark the experiment open complete */ 935 exp_open = 1; 936 if (exp_origin == SP_ORIGIN_DBX_ATTACH) 937 __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" time=\"%lld\" tm_zone=\"%lld\"/>\n", 938 SP_JCMD_RUN, 939 (unsigned) (__collector_start_time / NANOSEC), (unsigned) (__collector_start_time % NANOSEC), 940 (long long) start_sec_time, (long long) 0); 941 else 942 log_write_event_run (); 943 944 /* schedule the first sample */ 945 __collector_next_sample = __collector_start_time + ((hrtime_t) NANOSEC) * __collector_sample_period; 946 __collector_ext_usage_sample (MASTER_SMPL, "collector_open_experiment"); 947 948 /* start data collection in dynamic modules */ 949 if (collector_paused == 0) 950 { 951 for (i = 0; i < nmodules; i++) 952 if (modules[i]->startDataCollection != NULL && modules_st[i] == 0) 953 modules[i]->startDataCollection (); 954 } 955 else 956 { 957 hrtime_t ts = GETRELTIME (); 958 (void) __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\"/>\n", 959 SP_JCMD_PAUSE, (unsigned) (ts / NANOSEC), (unsigned) (ts % NANOSEC)); 960 } 961 962 /* mark the experiment active */ 963 __collector_exp_active = 1; 964 return COL_ERROR_NONE; 965 } 966 967 /* prepare directory for new experiment of fork-child */ 968 969 /* return 0 if successful */ 970 static int 971 collector_create_expr_dir (const char *new_exp_name) 972 { 973 int ret = -1; 974 mode_t dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; 975 TprintfT (DBG_LT1, "collector: __collector_create_expr_dir(%s)\n", new_exp_name); 976 if (CALL_UTIL (mkdir)(new_exp_name, dmode) < 0) 977 TprintfT (0, "__collector_create_expr_dir(%s) ERROR: errno=%d\n", new_exp_name, errno); 978 else 979 ret = 0; 980 return (ret); 981 } 982 983 /* append _xN to __collector_exp_dir_name*/ 984 /* return 0 if successful */ 985 static int 986 collector_exp_dir_append_x (int linenum, const char *parent_exp_name) 987 { 988 char buffer[MAXPATHLEN + 1]; 989 char * p = __collector_strrchr (parent_exp_name, '/'); 990 if (p == NULL || (*(p + 1) != '_')) 991 { 992 size_t sz = CALL_UTIL (strlen)(parent_exp_name); 993 const char * q = parent_exp_name + sz - 3; 994 if (sz < 3 || __collector_strncmp (q, ".er", CALL_UTIL (strlen)(q)) != 0 995 || CALL_UTIL (access)(parent_exp_name, F_OK) != 0) 996 { 997 TprintfT (0, "collector_exp_dir_append_x() ERROR: invalid parent_exp_name %s\n", parent_exp_name); 998 return -1; 999 } 1000 CALL_UTIL (strlcpy)(buffer, parent_exp_name, sizeof (buffer)); 1001 CALL_UTIL (snprintf)(__collector_exp_dir_name, sizeof (__collector_exp_dir_name), 1002 "%s/_x%d.er", buffer, linenum); 1003 } 1004 else 1005 { 1006 p = __collector_strrchr (parent_exp_name, '.'); 1007 if (p == NULL || *(p + 1) != 'e' || *(p + 2) != 'r') 1008 { 1009 TprintfT (0, "collector_exp_dir_append_x() ERROR: invalid parent_exp_name %s\n", parent_exp_name); 1010 return -1; 1011 } 1012 CALL_UTIL (strlcpy)(buffer, parent_exp_name, 1013 ((p - parent_exp_name + 1)<sizeof (buffer)) ? (p - parent_exp_name + 1) : sizeof (buffer)); 1014 CALL_UTIL (snprintf)(__collector_exp_dir_name, sizeof (__collector_exp_dir_name), 1015 "%s_x%d.er", buffer, linenum); 1016 } 1017 return 0; 1018 } 1019 1020 /* prepare directory for new experiment of exec/combo child*/ 1021 1022 /* return 0 if successful */ 1023 static int 1024 collector_create_expr_dir_lineage (const char *parent_exp_name) 1025 { 1026 int ret = -1; 1027 mode_t dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; 1028 int linenum = 1; 1029 while (linenum < INT_MAX) 1030 { 1031 if (collector_exp_dir_append_x (linenum, parent_exp_name) != 0) 1032 return -1; 1033 if (CALL_UTIL (access)(__collector_exp_dir_name, F_OK) != 0) 1034 { 1035 if (CALL_UTIL (mkdir)(__collector_exp_dir_name, dmode) == 0) 1036 return 0; 1037 } 1038 linenum++; 1039 TprintfT (DBG_LT0, "collector: collector_create_expr_dir_lineage(%s -> %s)\n", parent_exp_name, __collector_exp_dir_name); 1040 } 1041 return (ret); 1042 } 1043 1044 /* Finish the initializing work if we don't collect data while libcollector.so is preloaded. */ 1045 /* return COL_ERROR_NONE if successful */ 1046 static int 1047 collector_tail_init (const char *parent_exp_name) 1048 { 1049 int err = COL_ERROR_NONE; 1050 if (exp_origin != SP_ORIGIN_FORK) 1051 { 1052 /* For exec/combo descendants. Don't create dir for this subexp, but update lineage by appending "_x0". */ 1053 /* Different children can have the same _x0 if their name don't match -F exp. 1054 * Assume their fork children inherit the program name, there will be no _x0_fN.er to create. 1055 * So we don't need to worry about the lineage messed up by _x0. 1056 */ 1057 int linenum = 0; 1058 if (collector_exp_dir_append_x (linenum, parent_exp_name) != 0) 1059 return COL_ERROR_BADDIR; 1060 static char exp_name_env[MAXPATHLEN + 1]; 1061 CALL_UTIL (snprintf)(exp_name_env, sizeof (exp_name_env), "SP_COLLECTOR_EXPNAME=%s", __collector_exp_dir_name); 1062 TprintfT (DBG_LT1, "collector_tail_init: setting SP_COLLECTOR_EXPNAME to %s\n", __collector_exp_dir_name); 1063 CALL_UTIL (putenv)(exp_name_env); 1064 } 1065 /* initialize the segments map and mmap interposition */ 1066 if (exp_origin != SP_ORIGIN_GENEXP && exp_origin != SP_ORIGIN_KERNEL) 1067 if ((err = __collector_ext_mmap_install (0)) != COL_ERROR_NONE) 1068 return err; 1069 1070 /* initialize TSD module (note: relies on __collector_heap) */ 1071 if (__collector_tsd_init () != 0) 1072 return COL_ERROR_EXPOPEN; 1073 1074 /* initialize for subsequent stack unwinds */ 1075 __collector_ext_unwind_init (0); 1076 1077 char * buf = NULL; 1078 /* Make a copy of params so that we can modify the string */ 1079 int paramsz = CALL_UTIL (strlen)(collector_params) + 1; 1080 buf = (char*) alloca (paramsz); 1081 CALL_UTIL (strlcpy)(buf, collector_params, paramsz); 1082 1083 char *par_F = "0"; 1084 char *s; 1085 for (s = buf; *s;) 1086 { 1087 char key = *s++; 1088 /* ensure that it's followed by a colon */ 1089 if (*s++ != ':') 1090 { 1091 TprintfT (DBG_LT0, "collector_tail_init: ERROR: parameter %c is not followed by a colon\n", key); 1092 return COL_ERROR_ARGS; 1093 } 1094 1095 /* find the semicolon terminator */ 1096 char *par = s; 1097 while (*s && (*s != ';')) 1098 s++; 1099 if (*s != ';') 1100 { 1101 /* not followed by semicolon */ 1102 TprintfT (0, "collector_tail_init: ERROR: parameter %c:%s is not terminated by a semicolon\n", key, par); 1103 return COL_ERROR_ARGS; 1104 } 1105 /* terminate par, and position for next descriptor */ 1106 *s++ = 0; 1107 /* now process that element of the data descriptor */ 1108 if (key == 'F') 1109 { 1110 par_F = par; 1111 break; 1112 } 1113 } 1114 if ((err = __collector_ext_line_install (par_F, __collector_exp_dir_name)) != COL_ERROR_NONE) 1115 return err; 1116 1117 /* allocate tsd for the current thread */ 1118 if (__collector_tsd_allocate () != 0) 1119 return COL_ERROR_EXPOPEN; 1120 return COL_ERROR_NONE; 1121 } 1122 1123 /* routines concerning closing the experiment */ 1124 /* close down -- fini section routine */ 1125 static void collector_fini () __attribute__ ((destructor)); 1126 static void 1127 collector_fini () 1128 { 1129 TprintfT (DBG_LT0, "collector_fini: closing experiment\n"); 1130 __collector_close_experiment (); 1131 1132 } 1133 1134 void collector_terminate_expt () __attribute__ ((weak, alias ("__collector_terminate_expt"))); 1135 1136 /* __collector_terminate_expt called by user, or from dbx */ 1137 void 1138 __collector_terminate_expt () 1139 { 1140 TprintfT (DBG_LT0, "__collector_terminate_expt: %s; calling close\n", __collector_exp_dir_name); 1141 __collector_close_experiment (); 1142 TprintfT (DBG_LT0, "__collector_terminate_expt done\n\n"); 1143 } 1144 1145 /* 1146 * We manage the SIGCHLD handler with sigaction and don't worry about signal or sigset(). 1147 * This is in line with the comments in dispatcher.c 1148 * immediately preceding the wrapper function for (Linux) signal(). 1149 */ 1150 static struct sigaction original_sigchld_sigaction; 1151 static pid_t mychild_pid = -1; 1152 1153 /* __collector_SIGCHLD_signal_handler called when er_archive exits */ 1154 static void 1155 __collector_SIGCHLD_signal_handler (int sig, siginfo_t *si, void *context) 1156 { 1157 pid_t calling_pid = si->si_pid; 1158 /* Potential race. 1159 * We get mychild_pid from the vfork() return value. 1160 * So there is an outside chance that the child completes and sends SIGCHLD 1161 * before the handler knows the value of mychild_pid. 1162 */ 1163 if (calling_pid == mychild_pid) 1164 // er_archive has exited; so restore the user handler 1165 __collector_sigaction (SIGCHLD, &original_sigchld_sigaction, NULL); 1166 else 1167 { 1168 // if we can't identify the pid, the signal must be for the user's handler 1169 if (original_sigchld_sigaction.sa_handler != SIG_DFL 1170 && original_sigchld_sigaction.sa_handler != SIG_IGN) 1171 original_sigchld_sigaction.sa_sigaction (sig, si, context); 1172 } 1173 TprintfT (DBG_LT1, "__collector_SIGCHLD_signal_handler done\n\n"); 1174 } 1175 1176 int 1177 collector_sigchld_sigaction (const struct sigaction *nact, 1178 struct sigaction *oact) 1179 { 1180 // get the current SIGCHLD handler 1181 struct sigaction cur_handler; 1182 __collector_sigaction (SIGCHLD, NULL, &cur_handler); 1183 1184 // if we have NOT installed our own handler, return an error 1185 // (force the caller to deal with this case) 1186 if (cur_handler.sa_sigaction != __collector_SIGCHLD_signal_handler) 1187 return -1; 1188 1189 // if we HAVE installed our own handler, act on the user's handler 1190 if (oact) 1191 __collector_memcpy (oact, &original_sigchld_sigaction, sizeof (struct sigaction)); 1192 if (nact) 1193 __collector_memcpy (&original_sigchld_sigaction, nact, sizeof (struct sigaction)); 1194 return 0; 1195 } 1196 1197 /* 1198 * __collector_close_experiment may be called either from 1199 * __collector_terminate_expt() or the .fini section 1200 */ 1201 void 1202 __collector_close_experiment () 1203 { 1204 hrtime_t ts; 1205 char *argv[10]; 1206 int status; 1207 TprintfT (DBG_LT1, "collector: __collector_close_experiment(): %s\n", __collector_exp_dir_name); 1208 if (!exp_initted) 1209 return; 1210 /* The experiment may have been previously closed */ 1211 if (!exp_open) 1212 return; 1213 1214 if (__collector_mutex_trylock (&__collector_close_guard)) 1215 /* someone else is in the middle of closing the experiment */ 1216 return; 1217 1218 /* record the termination of the experiment */ 1219 ts = GETRELTIME (); 1220 collector_params = NULL; 1221 1222 /* tell all dynamic modules to stop data collection */ 1223 int i; 1224 for (i = 0; i < nmodules; i++) 1225 if (modules[i]->stopDataCollection != NULL) 1226 modules[i]->stopDataCollection (); 1227 1228 /* notify all dynamic modules the experiment is being closed */ 1229 for (i = 0; i < nmodules; i++) 1230 { 1231 if (modules[i]->closeExperiment != NULL) 1232 modules[i]->closeExperiment (); 1233 __collector_delete_handle (modules_hndl[i]); 1234 modules_hndl[i] = NULL; 1235 } 1236 1237 /* acquire the global lock -- only one close at a time */ 1238 __collector_mutex_lock (&__collector_glob_lock); 1239 /* deinstall mmap tracing (with final update) */ 1240 __collector_ext_mmap_deinstall (1); 1241 1242 /* deinstall common SIGPROF dispatcher */ 1243 __collector_ext_dispatcher_deinstall (); 1244 1245 /* disable line interposition */ 1246 __collector_ext_line_close (); 1247 1248 /* Other threads may be reading tsd now. */ 1249 //__collector_tsd_fini(); 1250 1251 /* delete global heap */ 1252 /* omazur: do not delete the global heap 1253 * to avoid crashes in TSD. Need a better solution. 1254 __collector_deleteHeap( __collector_heap ); 1255 __collector_heap = NULL; 1256 */ 1257 __collector_mutex_unlock (&__collector_glob_lock); 1258 1259 /* take a final sample */ 1260 __collector_ext_usage_sample (MASTER_SMPL, "collector_close_experiment"); 1261 sample_mode = 0; 1262 1263 /* close the frameinfo file */ 1264 __collector_ext_unwind_close (); 1265 if (exp_origin != SP_ORIGIN_DBX_ATTACH) 1266 log_write_event_run (); 1267 1268 /* mark the experiment as closed */ 1269 __collector_expstate = EXP_CLOSED; 1270 TprintfT (DBG_LT1, "collector: __collector_expstate->EXP_CLOSED: project_home=%s\n", 1271 STR (project_home)); 1272 __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\"/>\n", 1273 SP_JCMD_EXIT, 1274 (unsigned) (ts / NANOSEC), (unsigned) (ts % NANOSEC)); 1275 1276 /* derive er_archive's absolute path from that of libcollector */ 1277 argv[0] = NULL; 1278 if (project_home && archive_mode && __collector_strcmp (archive_mode, "off")) 1279 { 1280 /* construct a command to launch it */ 1281 char *er_archive_name = "/bin/gp-archive"; 1282 size_t cmdlen = CALL_UTIL (strlen)(project_home) + CALL_UTIL (strlen)(er_archive_name) + 1; 1283 char *command = (char*) alloca (cmdlen); 1284 CALL_UTIL (snprintf)(command, cmdlen, "%s%s", project_home, er_archive_name); 1285 if (CALL_UTIL (access)(command, F_OK) == 0) 1286 { 1287 // build the argument list 1288 int nargs = 0; 1289 argv[nargs++] = command; 1290 argv[nargs++] = "-n"; 1291 argv[nargs++] = "-a"; 1292 argv[nargs++] = archive_mode; 1293 size_t len = CALL_UTIL (strlen)(__collector_exp_dir_name) + 1; 1294 size_t len1 = CALL_UTIL (strlen)(SP_ARCHIVE_LOG_FILE) + 1; 1295 char *str = (char*) alloca (len + len1); 1296 CALL_UTIL (snprintf)(str, len + 15, "%s/%s", __collector_exp_dir_name, SP_ARCHIVE_LOG_FILE); 1297 argv[nargs++] = "--outfile"; 1298 argv[nargs++] = str; 1299 str = (char*) alloca (len); 1300 CALL_UTIL (snprintf)(str, len, "%s", __collector_exp_dir_name); 1301 argv[nargs++] = str; 1302 argv[nargs] = NULL; 1303 } 1304 } 1305 1306 /* log the archive command to be run */ 1307 if (argv[0] == NULL) 1308 { 1309 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", 1310 SP_JCMD_COMMENT, COL_COMMENT_NONE, "No archive command run"); 1311 TprintfT (DBG_LT1, "collector: No archive command run\n"); 1312 } 1313 else 1314 { 1315 char cmdbuf[4096]; 1316 int bufoffset = 0; 1317 int i; 1318 for (i = 0; argv[i] != NULL; i++) 1319 { 1320 bufoffset += CALL_UTIL (snprintf)(&cmdbuf[bufoffset], (sizeof (cmdbuf) - bufoffset), 1321 " %s", argv[i]); 1322 } 1323 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">Archive command `%s'</event>\n", 1324 SP_JCMD_COMMENT, COL_COMMENT_NONE, cmdbuf); 1325 TprintfT (DBG_LT1, "collector: running `%s'\n", cmdbuf); 1326 } 1327 log_close (); 1328 TprintfT (DBG_LT1, "__collector_close_experiment(%s) done\n", __collector_exp_dir_name); 1329 exp_open = 0; /* mark the experiment as closed */ 1330 __collector_exp_active = 0; /* mark the experiment as inactive */ 1331 1332 /* reset all experiment parameters */ 1333 sample_mode = 0; 1334 collector_paused = 0; 1335 __collector_pause_sig = -1; 1336 __collector_pause_sig_warn = 0; 1337 __collector_sample_sig = -1; 1338 __collector_sample_sig_warn = 0; 1339 __collector_sample_period = 0; 1340 __collector_exp_dir_name[0] = 0; 1341 1342 /* uninstall the pause and sample signal handlers */ 1343 /* XXXX -- not yet, because of potential race conditions in libthread */ 1344 if (argv[0] == NULL) 1345 { 1346 /* er_archive command will not be run */ 1347 __collector_mutex_unlock (&__collector_close_guard); 1348 return; 1349 } 1350 1351 struct sigaction sa; 1352 CALL_UTIL (memset)(&sa, 0, sizeof (struct sigaction)); 1353 sa.sa_sigaction = __collector_SIGCHLD_signal_handler; 1354 sa.sa_flags = SA_SIGINFO; 1355 __collector_sigaction (SIGCHLD, &sa, &original_sigchld_sigaction); 1356 1357 /* linetrace interposition takes care of unsetting Environment variables */ 1358 /* create a child process to invoke er_archive */ 1359 pid_t pid = CALL_UTIL (vfork)(); 1360 if (pid == 0) 1361 { 1362 /* pid is zero == child process -- invoke er_archive */ 1363 /* Unset LD_PRELOAD environment variables */ 1364 CALL_UTIL (unsetenv)("LD_PRELOAD_32"); 1365 CALL_UTIL (unsetenv)("LD_PRELOAD_64"); 1366 CALL_UTIL (unsetenv)("LD_PRELOAD"); 1367 /* Invoke er_archive */ 1368 CALL_UTIL (execv)(argv[0], argv); 1369 CALL_UTIL (exit)(1); /* exec failed -- child exits with an error */ 1370 } 1371 else if (pid != -1) 1372 { 1373 mychild_pid = pid; // notify our signal handler who the child is 1374 pid_t w; 1375 /* copied from system.c */ 1376 do 1377 { 1378 w = CALL_UTIL (waitpid)(pid, &status, 0); 1379 } 1380 while (w == -1 && errno == EINTR); 1381 TprintfT (DBG_LT1, "collector: creating archive done\n"); 1382 // __collector_SIGCHLD_signal_handler should now be de-installed, but it does so itself 1383 } 1384 else 1385 /* child-process creation failed */ 1386 TprintfT (DBG_LT0, "collector: creating archive process failed\n"); 1387 1388 __collector_mutex_unlock (&__collector_close_guard); 1389 TprintfT (DBG_LT1, "collector: __collector_close_experiment done\n"); 1390 return; 1391 } 1392 1393 /* 1394 * void __collector_clean_state() 1395 * Perform all necessary cleanup steps in child process after fork(). 1396 */ 1397 void 1398 __collector_clean_state () 1399 { 1400 TprintfT (DBG_LT1, "collector: collector_clean_state()\n"); 1401 int i; 1402 /* 1403 * We are in child process after fork(). 1404 * First of all we have to reset all mutex locks in collector's subsystems. 1405 * After that we can reinitialize modules. 1406 */ 1407 __collector_mmgr_init_mutex_locks (__collector_heap); 1408 __collector_mutex_init (&__collector_glob_lock); 1409 __collector_mutex_init (&__collector_open_guard); 1410 __collector_mutex_init (&__collector_close_guard); 1411 __collector_mutex_init (&__collector_sample_guard); 1412 __collector_mutex_init (&__collector_suspend_guard); 1413 __collector_mutex_init (&__collector_resume_guard); 1414 1415 if (__collector_mutex_trylock (&__collector_close_guard)) 1416 /* someone else is in the middle of closing the experiment */ 1417 return; 1418 1419 /* Stop data collection in all dynamic modules */ 1420 for (i = 0; i < nmodules; i++) 1421 if (modules[i]->stopDataCollection != NULL) 1422 modules[i]->stopDataCollection (); 1423 1424 // Now we can reset modules 1425 for (i = 0; i < nmodules; i++) 1426 { 1427 if (modules[i]->detachExperiment != NULL && modules_st[i] == 0) 1428 modules[i]->detachExperiment (); 1429 __collector_delete_handle (modules_hndl[i]); 1430 modules_hndl[i] = NULL; 1431 } 1432 1433 /* acquire the global lock -- only one suspend at a time */ 1434 __collector_mutex_lock (&__collector_glob_lock); 1435 { 1436 1437 /* stop any profile data writing */ 1438 paused_when_suspended = collector_paused; 1439 collector_paused = 1; 1440 1441 /* deinstall common SIGPROF dispatcher */ 1442 __collector_ext_dispatcher_suspend (); 1443 1444 /* mark the experiment as suspended */ 1445 __collector_exp_active = 0; 1446 1447 /* XXXX mark the experiment as closed! */ 1448 exp_open = 0; /* This is a hack to allow fork child to call__collector_open_experiment() */ 1449 1450 /* mark the experiment log closed! */ 1451 log_close (); 1452 } 1453 __collector_mutex_unlock (&__collector_glob_lock); 1454 1455 // Now we can reset subsystems. 1456 __collector_ext_dispatcher_fork_child_cleanup (); 1457 __collector_mmap_fork_child_cleanup (); 1458 __collector_tsd_fork_child_cleanup (); 1459 paused_when_suspended = 0; 1460 collector_paused = 0; 1461 __collector_expstate = EXP_INIT; 1462 TprintfT (DBG_LT1, "__collector_clean_slate: __collector_expstate->EXP_INIT\n"); 1463 exp_origin = SP_ORIGIN_LIBCOL_INIT; 1464 exp_initted = 0; 1465 __collector_start_time = collector_interface.getHiResTime (); 1466 TprintfT (DBG_LT1, " -->__collector_clean_slate; resetting start_time\n"); 1467 start_sec_time = 0; 1468 1469 /* Sample related data */ 1470 sample_installed = 0; // 1 if the sample signal handler installed 1471 sample_mode = 0; // dynamically turns sample record writing on/off 1472 sample_number = 0; // index of the current sample record 1473 __collector_sample_sig = -1; // user-specified sample signal 1474 __collector_sample_sig_warn = 0; // non-zero if warning already given 1475 1476 /* Pause/resume related data */ 1477 __collector_pause_sig = -1; // user-specified pause signal 1478 __collector_pause_sig_warn = 0; // non-zero if warning already given 1479 __collector_mutex_unlock (&__collector_close_guard); 1480 return; 1481 } 1482 1483 /* modelled on __collector_close_experiment */ 1484 void 1485 __collector_suspend_experiment (char *why) 1486 { 1487 if (!exp_initted) 1488 return; 1489 /* The experiment may have been previously closed */ 1490 if (!exp_open) 1491 return; 1492 /* The experiment may have been previously suspended */ 1493 if (!__collector_exp_active) 1494 return; 1495 if (__collector_mutex_trylock (&__collector_suspend_guard)) 1496 /* someone else is in the middle of suspending the experiment */ 1497 return; 1498 1499 /* Stop data collection in all dynamic modules */ 1500 int i; 1501 for (i = 0; i < nmodules; i++) 1502 if (modules[i]->stopDataCollection != NULL) 1503 modules[i]->stopDataCollection (); 1504 1505 /* take a pre-suspension sample */ 1506 __collector_ext_usage_sample (MASTER_SMPL, why); 1507 1508 /* acquire the global lock -- only one suspend at a time */ 1509 __collector_mutex_lock (&__collector_glob_lock); 1510 /* stop any profile data writing */ 1511 paused_when_suspended = collector_paused; 1512 collector_paused = 1; 1513 1514 /* deinstall common SIGPROF dispatcher */ 1515 __collector_ext_dispatcher_suspend (); 1516 1517 /* mark the experiment as suspended */ 1518 __collector_exp_active = 0; 1519 1520 /* XXXX mark the experiment as closed! */ 1521 exp_open = 0; // This is a hack to allow fork child to call __collector_open_experiment() 1522 log_pause (); // mark the experiment log closed! 1523 TprintfT (DBG_LT0, "collector: collector_suspend_experiment(%s, %d)\n\n", why, collector_paused); 1524 __collector_mutex_unlock (&__collector_glob_lock); 1525 __collector_mutex_unlock (&__collector_suspend_guard); 1526 return; 1527 } 1528 1529 void 1530 __collector_resume_experiment () 1531 { 1532 if (!exp_initted) 1533 return; 1534 1535 /* The experiment may have been previously resumed */ 1536 if (__collector_exp_active) 1537 return; 1538 if (__collector_mutex_trylock (&__collector_resume_guard)) 1539 /* someone else is in the middle of resuming the experiment */ 1540 return; 1541 1542 /* acquire the global lock -- only one resume at a time */ 1543 __collector_mutex_lock (&__collector_glob_lock); 1544 /* mark the experiment as re-activated */ 1545 __collector_exp_active = 1; 1546 /* XXXX mark the experiment as open! */ 1547 exp_open = 1; // This is a hack to allow fork child to call__collector_open_experiment() 1548 log_resume (); // mark the experiment log re-opened! 1549 TprintfT (DBG_LT0, "collector: collector_resume_experiment(%d)\n", paused_when_suspended); 1550 /* resume any profile data writing */ 1551 collector_paused = paused_when_suspended; 1552 /* restart common SIGPROF dispatcher */ 1553 __collector_ext_dispatcher_restart (); 1554 __collector_mutex_unlock (&__collector_glob_lock); 1555 1556 /* take a post-suspension sample */ 1557 __collector_ext_usage_sample (MASTER_SMPL, "collector_resume_experiment"); 1558 1559 /* Resume data collection in all dynamic modules */ 1560 if (collector_paused == 0) 1561 { 1562 int i; 1563 for (i = 0; i < nmodules; i++) 1564 if (modules[i]->startDataCollection != NULL && modules_st[i] == 0) 1565 modules[i]->startDataCollection (); 1566 } 1567 1568 if (__collector_sample_period != 0) 1569 { 1570 hrtime_t now = collector_interface.getHiResTime (); 1571 while (__collector_next_sample < now) 1572 __collector_next_sample += ((hrtime_t) NANOSEC) * __collector_sample_period; 1573 } 1574 1575 /* check for experiment past termination time */ 1576 if (__collector_terminate_time != 0) 1577 { 1578 hrtime_t now = collector_interface.getHiResTime (); 1579 if (__collector_terminate_time < now) 1580 { 1581 TprintfT (DBG_LT0, "__collector_resume_experiment: now (%lld) > terminate_time (%lld); closing experiment\n", 1582 (now - __collector_start_time), (__collector_terminate_time - __collector_start_time)); 1583 __collector_close_experiment (); 1584 } 1585 } 1586 __collector_mutex_unlock (&__collector_resume_guard); 1587 return; 1588 } 1589 1590 /* Code to support Samples and Pause/Resume */ 1591 void collector_sample () __attribute__ ((weak, alias ("__collector_sample"))); 1592 void 1593 __collector_sample (char *name) 1594 { 1595 __collector_ext_usage_sample (PROGRAM_SMPL, name); 1596 } 1597 1598 static void 1599 write_sample (char *name) 1600 { 1601 if (sample_mode == 0) 1602 return; 1603 /* make the sample timestamp relative to the start */ 1604 hrtime_t ts, now = collector_interface.getHiResTime (); 1605 1606 /* update time for next periodic sample */ 1607 /* since this is common to all LWPs, and only one (the first!) will 1608 update it to the next period, doing the update early will avoid 1609 the overhead/frustration of the other LWPs 1610 */ 1611 if (__collector_sample_period != 0) 1612 { 1613 /* this update should only be done for periodic samples */ 1614 while (__collector_next_sample < now) 1615 __collector_next_sample += ((hrtime_t) NANOSEC) * __collector_sample_period; 1616 } 1617 1618 /* take the sample and record it; use (return - __collector_start_time) for timestamp */ 1619 now = ovw_write (); 1620 ts = now - __collector_start_time; 1621 1622 /* write sample records to log file */ 1623 __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" id=\"%d\" label=\"%s\"/>\n", 1624 SP_JCMD_SAMPLE, 1625 (unsigned) (ts / NANOSEC), (unsigned) (ts % NANOSEC), 1626 sample_number, 1627 name); 1628 /* increment the sample number */ 1629 sample_number++; 1630 } 1631 1632 /* 1633 * __collector_ext_usage_sample 1634 * 1635 * Handle taking a process usage sample and recording it. 1636 * Common to all different types of sample: 1637 * libcollector master samples at initiation and close, 1638 * programmatic samples via libcollector API calls, 1639 * periodic samples originating in the dispatcher, 1640 * manual samples originating in the signal sample handler, 1641 * manual samples originating from the debugger 1642 * Differentiating type and name information is currently not recorded. 1643 */ 1644 void 1645 __collector_ext_usage_sample (Smpl_type type, char *name) 1646 { 1647 /* name is optional */ 1648 if (name == NULL) 1649 name = ""; 1650 TprintfT (DBG_LT3, "collector: __collector_ext_usage_sample(%d,%s)\n", type, name); 1651 if (!exp_initted) 1652 return; 1653 1654 /* if paused, don't record periodic samples */ 1655 if ((type == PERIOD_SMPL) && (collector_paused == 1)) 1656 return; 1657 1658 /* There is a possibility of entering this function 1659 * from sample_handler, dbx direct call to __collector_sample, 1660 * and user called collector_sample. Since we are making a 1661 * new sample anyway just return. 1662 */ 1663 if (__collector_mutex_trylock (&__collector_sample_guard)) 1664 return; 1665 if (type != PERIOD_SMPL || __collector_sample_period != 0) 1666 write_sample (name); 1667 __collector_mutex_unlock (&__collector_sample_guard); 1668 } 1669 1670 /* set the sample period from the parameter */ 1671 static int 1672 sample_set_interval (char *param) 1673 { 1674 if (!exp_initted) 1675 return COL_ERROR_SMPLINIT; 1676 __collector_sample_period = CALL_UTIL (strtol)(param, NULL, 0); /* seconds */ 1677 TprintfT (DBG_LT1, "collector: collector_sample period set to %d seconds.\n", 1678 __collector_sample_period); 1679 if (__collector_sample_period > 0) 1680 (void) __collector_log_write ("<setting %s=\"%d\"/>\n", 1681 SP_JCMD_SAMPLE_PERIOD, __collector_sample_period); 1682 return COL_ERROR_NONE; 1683 } 1684 1685 /* set the experiment duration from the parameter */ 1686 1687 /* parameter is of the form nnn:mmm, where nnn is the start delay in seconds, 1688 * and mmm is the terminate time in seconds; if nnn is zero, 1689 * data collection starts when the run starts. If mmm is zero, 1690 * data collection terminates when the run terminates. Otherwise, 1691 * nnn must be less than mmm 1692 */ 1693 static int 1694 set_duration (char *param) 1695 { 1696 if (!exp_initted) 1697 return COL_ERROR_DURATION_INIT; 1698 int delay_start = CALL_UTIL (strtol)(param, ¶m, 0); /* seconds */ 1699 int terminate_duration = 0; 1700 if (*param == 0) 1701 { 1702 /* we only have one parameter, the terminate time */ 1703 terminate_duration = delay_start; 1704 delay_start = 0; 1705 } 1706 else if (*param == ':') 1707 { 1708 param++; 1709 terminate_duration = CALL_UTIL (strtol)(param, ¶m, 0); /* seconds */ 1710 } 1711 else 1712 return COL_ERROR_DURATION_INIT; 1713 TprintfT (DBG_LT1, "collector: collector_delay_start duration set to %d seconds.\n", 1714 delay_start); 1715 TprintfT (DBG_LT1, "collector: collector_terminate duration set to %d seconds.\n", 1716 terminate_duration); 1717 if (terminate_duration > 0) 1718 __collector_log_write ("<setting %s=\"%d\"/>\n<setting %s=\"%d\"/>\n", 1719 SP_JCMD_DELAYSTART, delay_start, 1720 SP_JCMD_TERMINATE, terminate_duration); 1721 __collector_delay_start = (hrtime_t) 0; 1722 if (delay_start != 0) 1723 { 1724 __collector_delay_start = __collector_start_time + ((hrtime_t) NANOSEC) * delay_start; 1725 collector_paused = 1; 1726 } 1727 __collector_terminate_time = terminate_duration == 0 ? (hrtime_t) 0 : 1728 __collector_start_time + ((hrtime_t) NANOSEC) * terminate_duration; 1729 return COL_ERROR_NONE; 1730 } 1731 1732 static int 1733 sample_set_user_sig (char *par) 1734 { 1735 int sig = CALL_UTIL (strtol)(par, &par, 0); 1736 TprintfT (DBG_LT1, "collector: sample_set_user_sig(sig=%d,installed=%d)\n", 1737 sig, sample_installed); 1738 /* Installing the sampling signal handler more 1739 * than once is not good. 1740 */ 1741 if (!sample_installed) 1742 { 1743 struct sigaction act; 1744 sigemptyset (&act.sa_mask); 1745 /* XXXX should any signals be blocked? */ 1746 act.sa_sigaction = sample_handler; 1747 act.sa_flags = SA_RESTART | SA_SIGINFO; 1748 if (sigaction (sig, &act, &old_sample_handler) == -1) 1749 { 1750 TprintfT (DBG_LT0, "collector: ERROR: collector_sample_handler install failed (sig=%d).\n", 1751 __collector_sample_sig); 1752 return COL_ERROR_ARGS; 1753 } 1754 if (old_sample_handler.sa_handler == SIG_DFL || 1755 old_sample_handler.sa_sigaction == sample_handler) 1756 old_sample_handler.sa_handler = SIG_IGN; 1757 TprintfT (DBG_LT1, "collector: collector_sample_handler installed (sig=%d,hndlr=0x%p).\n", 1758 sig, sample_handler); 1759 __collector_sample_sig = sig; 1760 sample_installed = 1; 1761 } 1762 (void) __collector_log_write ("<setting %s=\"%u\"/>\n", SP_JCMD_SAMPLE_SIG, __collector_sample_sig); 1763 return COL_ERROR_NONE; 1764 } 1765 1766 /* signal handler for sample signal */ 1767 static void 1768 sample_handler (int sig, siginfo_t *sip, void *uap) 1769 { 1770 if (sip && sip->si_code == SI_USER) 1771 { 1772 TprintfT (DBG_LT1, "collector: collector_sample_handler sampling!\n"); 1773 __collector_ext_usage_sample (MANUAL_SMPL, "signal"); 1774 } 1775 else if (old_sample_handler.sa_handler != SIG_IGN) 1776 { 1777 TprintfT (DBG_LT1, "collector: collector_sample_handler forwarding signal.\n"); 1778 (old_sample_handler.sa_sigaction)(sig, sip, uap); 1779 } 1780 } 1781 1782 void collector_pause () __attribute__ ((weak, alias ("__collector_pause"))); 1783 1784 void 1785 __collector_pause () 1786 { 1787 __collector_pause_m ("API"); 1788 } 1789 1790 void 1791 __collector_pause_m (char *reason) 1792 { 1793 hrtime_t now; 1794 char xreason[MAXPATHLEN]; 1795 TprintfT (DBG_LT0, "collector: __collector_pause_m(%s)\n", reason); 1796 1797 /* Stop data collection in all dynamic modules */ 1798 for (int i = 0; i < nmodules; i++) 1799 if (modules[i]->stopDataCollection != NULL) 1800 modules[i]->stopDataCollection (); 1801 1802 /* Take a pause sample */ 1803 CALL_UTIL (snprintf)(xreason, sizeof (xreason), "collector_pause(%s)", reason); 1804 __collector_ext_usage_sample (MASTER_SMPL, xreason); 1805 1806 /* Record the event in the log file */ 1807 now = GETRELTIME (); 1808 (void) __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" name=\"%s\"/>\n", SP_JCMD_PAUSE, 1809 (unsigned) (now / NANOSEC), (unsigned) (now % NANOSEC), reason); 1810 __collector_expstate = EXP_PAUSED; 1811 TprintfT (DBG_LT1, "collector: __collector_expstate->EXP_PAUSED\n"); 1812 collector_paused = 1; 1813 } 1814 1815 void collector_resume () __attribute__ ((weak, alias ("__collector_resume"))); 1816 1817 void 1818 __collector_resume () 1819 { 1820 TprintfT (DBG_LT0, "collector: __collector_resume()\n"); 1821 __collector_expstate = EXP_OPEN; 1822 TprintfT (DBG_LT1, "collector: __collector_expstate->EXP_OPEN\n"); 1823 1824 /* Record the event in the log file */ 1825 hrtime_t now = GETRELTIME (); 1826 (void) __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\"/>\n", SP_JCMD_RESUME, 1827 (unsigned) (now / NANOSEC), (unsigned) (now % NANOSEC)); 1828 /* Take a resume sample */ 1829 __collector_ext_usage_sample (MASTER_SMPL, "collector_resume"); 1830 1831 /* Resume data collection in all dynamic modules */ 1832 for (int i = 0; i < nmodules; i++) 1833 if (modules[i]->startDataCollection != NULL && modules_st[i] == 0) 1834 modules[i]->startDataCollection (); 1835 collector_paused = 0; 1836 } 1837 1838 static int 1839 pause_set_user_sig (char *par) 1840 { 1841 struct sigaction act; 1842 int sig = CALL_UTIL (strtol)(par, &par, 0); 1843 if (*par) 1844 { 1845 /* not end of the token */ 1846 if (*par != 'p') 1847 { 1848 /* it should be a p */ 1849 TprintfT (DBG_LT0, "collector: ERROR: collector_user_handler bad terminator (par=%p[0]=%d).\n", 1850 par, (int) *par); 1851 return COL_ERROR_ARGS; 1852 1853 } 1854 else 1855 { 1856 /*, it's a p, make sure next is end of token */ 1857 par++; 1858 if (*par) 1859 { 1860 TprintfT (DBG_LT0, "collector: ERROR: collector_user_handler bad terminator (par=%p[0]=%d).\n", 1861 par, (int) *par); 1862 return COL_ERROR_ARGS; 1863 } 1864 else 1865 /* start off paused */ 1866 collector_paused = 1; 1867 } 1868 } 1869 sigemptyset (&act.sa_mask); 1870 /* XXXX should any signals be blocked? */ 1871 act.sa_sigaction = pause_handler; 1872 act.sa_flags = SA_RESTART | SA_SIGINFO; 1873 if (sigaction (sig, &act, &old_pause_handler) == -1) 1874 { 1875 TprintfT (DBG_LT0, "collector: ERROR: collector_pause_handler install failed (sig=%d).\n", sig); 1876 return COL_ERROR_ARGS; 1877 } 1878 if (old_pause_handler.sa_handler == SIG_DFL || 1879 old_pause_handler.sa_sigaction == pause_handler) 1880 old_pause_handler.sa_handler = SIG_IGN; 1881 TprintfT (DBG_LT1, "collector: collector_pause_handler installed (sig=%d,hndlr=0x%p).\n", 1882 sig, pause_handler); 1883 __collector_pause_sig = sig; 1884 (void) __collector_log_write ("<setting %s=\"%u\"/>\n", SP_JCMD_PAUSE_SIG, 1885 __collector_pause_sig); 1886 return COL_ERROR_NONE; 1887 } 1888 1889 /* signal handler for pause/resume signal */ 1890 static void 1891 pause_handler (int sig, siginfo_t *sip, void *uap) 1892 { 1893 if (sip && sip->si_code == SI_USER) 1894 { 1895 if (collector_paused == 1) 1896 { 1897 __collector_resume (); 1898 TprintfT (DBG_LT0, "collector: collector_pause_handler resumed!\n"); 1899 } 1900 else 1901 { 1902 __collector_pause_m ("signal"); 1903 TprintfT (DBG_LT0, "collector: collector_pause_handler paused!\n"); 1904 } 1905 } 1906 else if (old_pause_handler.sa_handler != SIG_IGN) 1907 { 1908 TprintfT (DBG_LT0, "collector: collector_pause_handler forwarding signal.\n"); 1909 (old_pause_handler.sa_sigaction)(sig, sip, uap); 1910 } 1911 } 1912 1913 static void 1914 get_progspec (char *retstr, int tmp_sz, char *name, int name_sz) 1915 { 1916 int procfd, count, i; 1917 *retstr = 0; 1918 tmp_sz--; 1919 *name = 0; 1920 name_sz--; 1921 procfd = CALL_UTIL (open)("/proc/self/cmdline", O_RDONLY); 1922 int getting_name = 0; 1923 if (procfd != -1) 1924 { 1925 count = CALL_UTIL (read)(procfd, retstr, tmp_sz); 1926 retstr[count] = '\0'; 1927 for (i = 0; i < count; i++) 1928 { 1929 if (getting_name == 0) 1930 name[i] = retstr[i]; 1931 if (retstr[i] == '\0') 1932 { 1933 getting_name = 1; 1934 if ((i + 1) < count) 1935 retstr[i] = ' '; 1936 } 1937 } 1938 CALL_UTIL (close)(procfd); 1939 } 1940 } 1941 1942 static void 1943 fs_warn () 1944 { 1945 /* if data implies we don't care, just return */ 1946 if (fs_matters == 0) 1947 return; 1948 } 1949 1950 static void 1951 close_handler (int sig, siginfo_t *sip, void *uap) 1952 { 1953 if (sip && sip->si_code == SI_USER) 1954 { 1955 TprintfT (DBG_LT0, "collector: close_handler: processing signal.\n"); 1956 __collector_close_experiment (); 1957 } 1958 else if (old_close_handler.sa_handler != SIG_IGN) 1959 { 1960 TprintfT (DBG_LT0, "collector: close_handler forwarding signal.\n"); 1961 (old_close_handler.sa_sigaction)(sig, sip, uap); 1962 } 1963 } 1964 1965 static void 1966 exit_handler (int sig, siginfo_t *sip, void *uap) 1967 { 1968 if (sip && sip->si_code == SI_USER) 1969 { 1970 TprintfT (DBG_LT0, "collector: exit_handler: processing signal.\n"); 1971 CALL_UTIL (exit)(1); 1972 } 1973 else if (old_exit_handler.sa_handler != SIG_IGN) 1974 { 1975 TprintfT (DBG_LT0, "collector: exit_handler forwarding signal.\n"); 1976 (old_exit_handler.sa_sigaction)(sig, sip, uap); 1977 } 1978 } 1979 1980 static int 1981 set_user_sig_action (char *par) 1982 { 1983 int sig = CALL_UTIL (strtol)(par, &par, 0); 1984 if (*par != '=') 1985 { 1986 TprintfT (DBG_LT0, "collector: ERROR: set_user_sig_action bad separator: %s.\n", par); 1987 return COL_ERROR_ARGS; 1988 } 1989 par++; 1990 struct sigaction act; 1991 sigemptyset (&act.sa_mask); 1992 act.sa_flags = SA_RESTART | SA_SIGINFO; 1993 if (__collector_strcmp (par, "exit") == 0) 1994 { 1995 act.sa_sigaction = exit_handler; 1996 if (sigaction (sig, &act, &old_exit_handler) == -1) 1997 { 1998 TprintfT (DBG_LT0, "collector: ERROR: set_user_sig_action failed: %d=%s.\n", sig, par); 1999 return COL_ERROR_ARGS; 2000 } 2001 } 2002 else if (__collector_strcmp (par, "close") == 0) 2003 { 2004 act.sa_sigaction = close_handler; 2005 if (sigaction (sig, &act, &old_close_handler) == -1) 2006 { 2007 TprintfT (DBG_LT0, "collector: ERROR: set_user_sig_action failed: %d=%s.\n", sig, par); 2008 return COL_ERROR_ARGS; 2009 } 2010 } 2011 else 2012 { 2013 TprintfT (DBG_LT0, "collector: ERROR: set_user_sig_action unknown action: %d=%s.\n", sig, par); 2014 return COL_ERROR_ARGS; 2015 } 2016 __collector_log_write ("<setting signal=\"%u\" action=\"%s\"/>\n", sig, par); 2017 return COL_ERROR_NONE; 2018 } 2019 2020 /*============================================================*/ 2021 /* 2022 * Routines for handling the log file 2023 */ 2024 static struct DataHandle *log_hndl = NULL; 2025 static int log_initted = 0; 2026 static int log_enabled = 0; 2027 2028 static int 2029 log_open () 2030 { 2031 log_hndl = __collector_create_handle (SP_LOG_FILE); 2032 if (log_hndl == NULL) 2033 return COL_ERROR_LOG_OPEN; 2034 log_initted = 1; 2035 log_enabled = 1; 2036 TprintfT (DBG_LT1, "log_open()\n"); 2037 return COL_ERROR_NONE; 2038 } 2039 2040 static void 2041 log_header_write (sp_origin_t origin) 2042 { 2043 __collector_log_write ("<experiment %s=\"%d.%d\">\n", 2044 SP_JCMD_VERSION, SUNPERF_VERNUM, SUNPERF_VERNUM_MINOR); 2045 __collector_log_write ("<collector>%s</collector>\n", VERSION); 2046 __collector_log_write ("</experiment>\n"); 2047 2048 struct utsname sysinfo; 2049 if (uname (&sysinfo) < 0) 2050 { 2051 __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\"/></event>\n", SP_JCMD_CERROR, COL_ERROR_SYSINFO, errno); 2052 __collector_log_write ("<system>\n"); 2053 } 2054 else 2055 { 2056 long page_size = CALL_UTIL (sysconf)(_SC_PAGESIZE); 2057 long npages = CALL_UTIL (sysconf)(_SC_PHYS_PAGES); 2058 __collector_log_write ("<system hostname=\"%s\" arch=\"%s\" os=\"%s %s\" pagesz=\"%ld\" npages=\"%ld\">\n", 2059 sysinfo.nodename, sysinfo.machine, sysinfo.sysname, sysinfo.release, page_size, npages); 2060 } 2061 2062 //YXXX Updating this section? Check similar cut/paste code in: 2063 // collctrl.cc::Coll_Ctrl() 2064 // collector.c::log_header_write() 2065 // cpu_frequency.h::get_cpu_frequency() 2066 2067 FILE *procf = CALL_UTIL (fopen)("/proc/cpuinfo", "r"); 2068 if (procf != NULL) 2069 { 2070 char temp[1024]; 2071 int cpu = -1; 2072 while (CALL_UTIL (fgets)(temp, sizeof (temp), procf) != NULL) 2073 { 2074 #if ARCH(Intel) 2075 if (__collector_strStartWith (temp, "processor") == 0) 2076 { 2077 char *val = CALL_UTIL (strchr)(temp, ':'); 2078 cpu = val ? CALL_UTIL (atoi)(val + 1) : -1; 2079 } 2080 // else if ( __collector_strStartWith(temp, "model") == 0 2081 // && CALL_UTIL(strstr)(temp, "name") == 0) { 2082 // char *val = CALL_UTIL(strchr)( temp, ':' ); 2083 // int model = val ? CALL_UTIL(atoi)( val + 1 ) : -1; 2084 // } 2085 // else if ( __collector_strStartWith(temp, "cpu family") == 0 ) { 2086 // char *val = CALL_UTIL(strchr)( temp, ':' ); 2087 // int family = val ? CALL_UTIL(atoi)( val + 1 ) : -1; 2088 // } 2089 else if (__collector_strStartWith (temp, "cpu MHz") == 0) 2090 { 2091 char *val = CALL_UTIL (strchr)(temp, ':'); 2092 int mhz = val ? CALL_UTIL (atoi)(val + 1) : 0; /* reading it as int is fine */ 2093 (void) __collector_log_write (" <cpu id=\"%d\" clk=\"%d\"/>\n", cpu, mhz); 2094 } 2095 #elif ARCH(SPARC) 2096 if (__collector_strStartWith (temp, "Cpu") == 0 && 2097 temp[3] != '\0' && 2098 __collector_strStartWith ((CALL_UTIL (strchr)(temp + 1, 'C')) ? CALL_UTIL (strchr)(temp + 1, 'C') : (temp + 4), "ClkTck") == 0) 2099 { // sparc-Linux 2100 char *val = CALL_UTIL (strchr)(temp, ':'); 2101 int mhz = 0; 2102 if (val) 2103 { 2104 unsigned long long freq; 2105 (*__collector_sscanfp) (val + 2, "%llx", &freq); 2106 mhz = (unsigned int) (((double) freq) / 1000000.0 + 0.5); 2107 } 2108 char *numend = CALL_UTIL (strchr)(temp + 1, 'C') ? CALL_UTIL (strchr)(temp + 1, 'C') : (temp + 4); 2109 *numend = '\0'; 2110 cpu = CALL_UTIL (atoi)(temp + 3); 2111 __collector_log_write (" <cpu id=\"%d\" clk=\"%d\"/>\n", cpu, mhz); 2112 } 2113 #elif defined(__aarch64__) 2114 if (__collector_strStartWith (temp, "processor") == 0) 2115 { 2116 char *val = CALL_UTIL (strchr)(temp, ':'); 2117 cpu = val ? CALL_UTIL (atoi)(val + 1) : -1; 2118 if (cpu != -1) 2119 { 2120 unsigned int mhz; 2121 asm volatile("mrs %0, cntfrq_el0" : "=r" (mhz)); 2122 __collector_log_write (" <cpu id=\"%d\" clk=\"%d\"/>\n", cpu, 2123 mhz / 1000000); 2124 } 2125 } 2126 #endif 2127 } 2128 CALL_UTIL (fclose)(procf); 2129 } 2130 __collector_log_write ("</system>\n"); 2131 __collector_log_write ("<process pid=\"%d\"></process>\n", getpid ()); 2132 __collector_log_write ("<process ppid=\"%d\"></process>\n", getppid ()); 2133 __collector_log_write ("<process pgrp=\"%d\"></process>\n", getpgrp ()); 2134 __collector_log_write ("<process sid=\"%d\"></process>\n", getsid (0)); 2135 2136 /* XXX -- cwd commented out 2137 It would be nice to get the current directory for the experiment, 2138 but neither method below will work--the /proc method returns a 2139 0-length string, and using getcwd will break collect on /bin/sh 2140 (as cuserid does) because of /bin/sh's private malloc 2141 omazur: readlink seems to work on Linux 2142 */ 2143 /* write the current directory */ 2144 char cwd[MAXPATHLEN + 1]; 2145 int i = readlink ("/proc/self/cwd", cwd, sizeof (cwd)); 2146 if (i >= 0) 2147 { 2148 cwd[i < sizeof (cwd) ? i : sizeof (cwd) - 1] = 0; 2149 (void) __collector_log_write ("<process cwd=\"%s\"></process>\n", cwd); 2150 } 2151 (void) __collector_log_write ("<process wsize=\"%d\"></process>\n", (int) (8 * sizeof (void *))); 2152 2153 ucontext_t ucp; 2154 ucp.uc_stack.ss_sp = NULL; 2155 ucp.uc_stack.ss_size = 0; 2156 if (getcontext (&ucp) == 0) 2157 { 2158 (void) __collector_log_write ("<process stackbase=\"0x%lx\"></process>\n", 2159 (unsigned long) ucp.uc_stack.ss_sp + ucp.uc_stack.ss_size); 2160 } 2161 2162 (void) __collector_log_write ("<process>%s</process>\n", 2163 origin == SP_ORIGIN_FORK ? "(fork)" : exp_progspec); 2164 __collector_libthread_T1 = 0; 2165 } 2166 2167 static void 2168 log_pause (void) 2169 { 2170 if (log_initted) 2171 log_enabled = 0; 2172 } 2173 2174 static void 2175 log_resume (void) 2176 { 2177 if (log_initted) 2178 log_enabled = 1; 2179 } 2180 2181 /* __collector_log_write -- write a line to the log file 2182 * return value: 2183 * 0 if OK 2184 * 1 if error (in creating or extending the log file) 2185 */ 2186 int 2187 __collector_log_write (char *format, ...) 2188 { 2189 char buf[4096]; 2190 va_list va; 2191 int rc = 0; 2192 static size_t loglen = 0; 2193 2194 va_start (va, format); 2195 char *bufptr = buf; 2196 int sz = __collector_xml_vsnprintf (bufptr, sizeof (buf), format, va); 2197 int allocated_sz = 0; 2198 va_end (va); 2199 if (sz >= sizeof (buf)) 2200 { 2201 /* Allocate a new buffer. 2202 * We need this buffer only temporarily and locally. 2203 * But don't use the thread stack 2204 * since it already has buf 2205 * and is unlikely to have additonal room for something even larger than buf. 2206 */ 2207 sz += 1; /* add the terminating null byte */ 2208 bufptr = (char*) __collector_allocCSize (__collector_heap, sz, 0); 2209 if (bufptr) 2210 { 2211 allocated_sz = sz; 2212 va_start (va, format); 2213 sz = __collector_xml_vsnprintf (bufptr, sz, format, va); 2214 va_end (va); 2215 } 2216 } 2217 int newlen = CALL_UTIL (strlen)(bufptr); 2218 if (sz != newlen) 2219 // no need to free bufptr if we're going to abort anyhow 2220 abort (); 2221 bufptr[newlen + 1] = 0; 2222 loglen = loglen + newlen; 2223 TprintfT (DBG_LT2, "__collector_log_write len=%ld, loglen=%ld %s", 2224 (long) newlen, (long) loglen, bufptr); 2225 if (log_enabled <= 0) 2226 { 2227 #if 0 2228 /* XXX suppress log_write messages with no log file open 2229 * this is reached from SimApp dealing with the clock frequency, which it should 2230 * not be doing. For now, don't write a message. 2231 */ 2232 CALL_UTIL (fprintf)(stderr, "__collector_log_write COL_ERROR_LOG_OPEN: %s", buf); 2233 #endif 2234 } 2235 else 2236 rc = __collector_write_string (log_hndl, bufptr, sz); 2237 if (allocated_sz) 2238 __collector_freeCSize (__collector_heap, (void *) bufptr, allocated_sz); 2239 return rc; 2240 } 2241 2242 static void 2243 log_close () 2244 { 2245 log_enabled = 0; 2246 log_initted = 0; 2247 __collector_delete_handle (log_hndl); 2248 log_hndl = NULL; 2249 } 2250 2251 /*============================================================*/ 2252 /* 2253 * Routines for handling the overview file 2254 */ 2255 static void 2256 ovw_open () 2257 { 2258 CALL_UTIL (strlcpy)(ovw_name, __collector_exp_dir_name, sizeof (ovw_name)); 2259 CALL_UTIL (strlcat)(ovw_name, "/", sizeof (ovw_name)); 2260 CALL_UTIL (strlcat)(ovw_name, SP_OVERVIEW_FILE, sizeof (ovw_name)); 2261 int fd = CALL_UTIL (open)(ovw_name, O_WRONLY | O_CREAT | O_TRUNC, 2262 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); 2263 if (fd < 0) 2264 { 2265 __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n", 2266 SP_JCMD_CERROR, COL_ERROR_OVWOPEN, errno, ovw_name); 2267 return; 2268 } 2269 CALL_UTIL (close)(fd); 2270 sample_mode = 1; 2271 } 2272 2273 static __inline__ void 2274 timeval_to_timespec(struct timeval *tval, struct timespec *value) 2275 { 2276 value->tv_nsec = tval->tv_usec * 1000; 2277 value->tv_sec = tval->tv_sec; 2278 } 2279 2280 /* 2281 * Resource usage. /proc/<pid>/usage /proc/<pid>/lwp/<lwpid>/lwpusage 2282 */ 2283 typedef struct prusage 2284 { 2285 id_t pr_lwpid; /* lwp id. 0: process or defunct */ 2286 int pr_count; /* number of contributing lwps */ 2287 timestruc_t pr_tstamp; /* current time stamp */ 2288 timestruc_t pr_create; /* process/lwp creation time stamp */ 2289 timestruc_t pr_term; /* process/lwp termination time stamp */ 2290 timestruc_t pr_rtime; /* total lwp real (elapsed) time */ 2291 timestruc_t pr_utime; /* user level cpu time */ 2292 timestruc_t pr_stime; /* system call cpu time */ 2293 timestruc_t pr_ttime; /* other system trap cpu time */ 2294 timestruc_t pr_tftime; /* text page fault sleep time */ 2295 timestruc_t pr_dftime; /* data page fault sleep time */ 2296 timestruc_t pr_kftime; /* kernel page fault sleep time */ 2297 timestruc_t pr_ltime; /* user lock wait sleep time */ 2298 timestruc_t pr_slptime; /* all other sleep time */ 2299 timestruc_t pr_wtime; /* wait-cpu (latency) time */ 2300 timestruc_t pr_stoptime; /* stopped time */ 2301 timestruc_t filltime[6]; /* filler for future expansion */ 2302 ulong_t pr_minf; /* minor page faults */ 2303 ulong_t pr_majf; /* major page faults */ 2304 ulong_t pr_nswap; /* swaps */ 2305 ulong_t pr_inblk; /* input blocks */ 2306 ulong_t pr_oublk; /* output blocks */ 2307 ulong_t pr_msnd; /* messages sent */ 2308 ulong_t pr_mrcv; /* messages received */ 2309 ulong_t pr_sigs; /* signals received */ 2310 ulong_t pr_vctx; /* voluntary context switches */ 2311 ulong_t pr_ictx; /* involuntary context switches */ 2312 ulong_t pr_sysc; /* system calls */ 2313 ulong_t pr_ioch; /* chars read and written */ 2314 ulong_t filler[10]; /* filler for future expansion */ 2315 } prusage_t; 2316 2317 static hrtime_t starttime = 0; 2318 2319 static hrtime_t 2320 ovw_write () 2321 { 2322 if (sample_mode == 0) 2323 return 0; 2324 int fd; 2325 int res; 2326 struct prusage usage; 2327 struct rusage rusage; 2328 hrtime_t hrt, delta; 2329 2330 /* Fill in the prusage structure with info from getrusage() */ 2331 hrt = collector_interface.getHiResTime (); 2332 if (starttime == 0) 2333 starttime = hrt; 2334 res = getrusage (RUSAGE_SELF, &rusage); 2335 if (res != 0) 2336 { 2337 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n", 2338 SP_JCMD_CERROR, COL_ERROR_OVWREAD, errno, ovw_name); 2339 return ( hrt); 2340 } 2341 2342 CALL_UTIL (memset)(&usage, 0, sizeof (struct prusage)); 2343 usage.pr_lwpid = getpid (); 2344 usage.pr_count = 1; 2345 usage.pr_tstamp.tv_sec = hrt / NANOSEC; 2346 usage.pr_tstamp.tv_nsec = hrt % NANOSEC; 2347 usage.pr_create.tv_sec = starttime / NANOSEC; 2348 usage.pr_create.tv_nsec = starttime % NANOSEC; 2349 delta = hrt - starttime; 2350 usage.pr_rtime.tv_sec = delta / NANOSEC; 2351 usage.pr_rtime.tv_nsec = delta % NANOSEC; 2352 timeval_to_timespec (&rusage.ru_utime, &usage.pr_utime); 2353 timeval_to_timespec (&rusage.ru_stime, &usage.pr_stime); 2354 2355 /* make sure that user- and system cpu time are not negative */ 2356 if (ts2hrt (usage.pr_utime) < 0) 2357 { 2358 usage.pr_utime.tv_sec = 0; 2359 usage.pr_utime.tv_nsec = 0; 2360 } 2361 if (ts2hrt (usage.pr_stime) < 0) 2362 { 2363 usage.pr_stime.tv_sec = 0; 2364 usage.pr_stime.tv_nsec = 0; 2365 } 2366 2367 /* fill in other fields */ 2368 usage.pr_minf = (ulong_t) rusage.ru_minflt; 2369 usage.pr_majf = (ulong_t) rusage.ru_majflt; 2370 usage.pr_nswap = (ulong_t) rusage.ru_nswap; 2371 usage.pr_inblk = (ulong_t) rusage.ru_inblock; 2372 usage.pr_oublk = (ulong_t) rusage.ru_oublock; 2373 usage.pr_msnd = (ulong_t) rusage.ru_msgsnd; 2374 usage.pr_mrcv = (ulong_t) rusage.ru_msgrcv; 2375 usage.pr_sigs = (ulong_t) rusage.ru_nsignals; 2376 usage.pr_vctx = (ulong_t) rusage.ru_nvcsw; 2377 usage.pr_ictx = (ulong_t) rusage.ru_nivcsw; 2378 2379 fd = CALL_UTIL (open)(ovw_name, O_WRONLY | O_APPEND); 2380 if (fd < 0) 2381 { 2382 __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n", 2383 SP_JCMD_CERROR, COL_ERROR_OVWOPEN, errno, ovw_name); 2384 return ( ts2hrt (usage.pr_tstamp)); 2385 } 2386 2387 CALL_UTIL (lseek)(fd, 0, SEEK_END); 2388 res = CALL_UTIL (write)(fd, &usage, sizeof (usage)); 2389 CALL_UTIL (close)(fd); 2390 if (res != sizeof (usage)) 2391 __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n", 2392 SP_JCMD_CERROR, COL_ERROR_OVWWRITE, errno, ovw_name); 2393 return (hrt); 2394 } 2395 2396 void 2397 __collector_dlog (int tflag, int level, char *format, ...) 2398 { 2399 if ((tflag & SP_DUMP_FLAG) == 0) 2400 { 2401 if (level > __collector_tracelevel) 2402 return; 2403 } 2404 else if ((tflag & collector_debug_opt) == 0) 2405 return; 2406 2407 /* In most cases this allocation should suffice */ 2408 int bufsz = CALL_UTIL (strlen)(format) + 128; 2409 char *buf = (char*) alloca (bufsz); 2410 char *p = buf; 2411 int left = bufsz; 2412 if ((tflag & SP_DUMP_NOHEADER) == 0) 2413 { 2414 p += CALL_UTIL (snprintf)(p, left, "P%d,L%02u,t%02lu", 2415 (int) getpid (), 2416 (unsigned int) __collector_lwp_self (), 2417 __collector_no_threads ? 0 : __collector_thr_self ()); 2418 left = bufsz - (p - buf); 2419 if (tflag) 2420 { 2421 hrtime_t ts = GETRELTIME (); 2422 p += CALL_UTIL (snprintf)(p, left, " %u.%09u ", (unsigned) (ts / NANOSEC), (unsigned) (ts % NANOSEC)); 2423 } 2424 else 2425 p += CALL_UTIL (snprintf)(p, left, ": "); 2426 left = bufsz - (p - buf); 2427 } 2428 2429 va_list va; 2430 va_start (va, format); 2431 int nbufsz = CALL_UTIL (vsnprintf)(p, left, format, va); 2432 va_end (va); 2433 2434 if (nbufsz >= left) 2435 { 2436 /* Allocate a new buffer */ 2437 nbufsz += 1; /* add the terminating null byte */ 2438 char *nbuf = (char*) alloca (nbufsz + (p - buf)); 2439 __collector_memcpy (nbuf, buf, p - buf); 2440 p = nbuf + (p - buf); 2441 2442 va_start (va, format); 2443 nbufsz = CALL_UTIL (vsnprintf)(p, nbufsz, format, va); 2444 va_end (va); 2445 buf = nbuf; 2446 } 2447 CALL_UTIL (write)(2, buf, CALL_UTIL (strlen)(buf)); 2448 } 2449 2450 /*============================================================*/ 2451 #if ! ARCH(SPARC) /* !sparc-Linux */ 2452 /* 2453 * Routines for handling _exit and _Exit 2454 */ 2455 /*------------------------------------------------------------- _exit */ 2456 2457 #define CALL_REAL(x) (*(int(*)())__real_##x) 2458 #define NULL_PTR(x) ( __real_##x == NULL ) 2459 2460 static void *__real__exit = NULL; /* libc only: _exit */ 2461 static void *__real__Exit = NULL; /* libc only: _Exit */ 2462 void _exit () __attribute__ ((weak, alias ("__collector_exit"))); 2463 void _Exit () __attribute__ ((weak, alias ("__collector_Exit"))); 2464 2465 void 2466 __collector_exit (int status) 2467 { 2468 if (NULL_PTR (_exit)) 2469 { 2470 __real__exit = dlsym (RTLD_NEXT, "_exit"); 2471 if (__real__exit == NULL) 2472 __real__exit = dlsym (RTLD_DEFAULT, "_exit"); 2473 } 2474 TprintfT (DBG_LT1, "__collector_exit() interposing @0x%p __real__exit\n", __real__exit); 2475 __collector_terminate_expt (); 2476 TprintfT (DBG_LT1, "__collector_exit(): experiment terminated\n"); 2477 CALL_REAL (_exit)(status); // this will exit the process 2478 } 2479 2480 void 2481 __collector_Exit (int status) 2482 { 2483 if (NULL_PTR (_Exit)) 2484 { 2485 __real__Exit = dlsym (RTLD_NEXT, "_Exit"); 2486 if (__real__Exit == NULL) 2487 __real__Exit = dlsym (RTLD_DEFAULT, "_exit"); 2488 } 2489 TprintfT (DBG_LT1, "__collector_Exit() interposing @0x%p __real__Exit\n", __real__Exit); 2490 __collector_terminate_expt (); 2491 TprintfT (DBG_LT1, "__collector_Exit(): experiment terminated\n"); 2492 CALL_REAL (_Exit)(status); // this will exit the process 2493 } 2494 #endif /* !sparc-Linux */ 2495