xref: /netbsd-src/external/gpl3/binutils.old/dist/gprofng/libcollector/collector.c (revision c42dbd0ed2e61fe6eda8590caa852ccf34719964)
1 /* Copyright (C) 2021 Free Software Foundation, Inc.
2    Contributed by Oracle.
3 
4    This file is part of GNU Binutils.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software
18    Foundation, 51 Franklin Street - Fifth Floor, Boston,
19    MA 02110-1301, USA.  */
20 
21 #include "config.h"
22 #include <alloca.h>
23 #include <errno.h>
24 #include <signal.h>
25 #include <ucontext.h>
26 #include <stdlib.h>     /* exit() */
27 #include <sys/param.h>
28 #include <sys/utsname.h>	/* struct utsname	*/
29 #include <sys/resource.h>
30 #include <sys/syscall.h>	/* system call fork() */
31 
32 #include "gp-defs.h"
33 #include "collector.h"
34 #include "descendants.h"
35 #include "gp-experiment.h"
36 #include "memmgr.h"
37 #include "cc_libcollector.h"
38 #include "tsd.h"
39 
40 /* TprintfT(<level>,...) definitions.  Adjust per module as needed */
41 #define DBG_LT0 0 // for high-level configuration, unexpected errors/warnings
42 #define DBG_LT1 1 // for configuration details, warnings
43 #define DBG_LT2 2
44 #define DBG_LT3 3
45 
46 typedef unsigned long ulong_t;
47 
48 extern char **environ;
49 extern void __collector_close_experiment ();
50 extern int __collector_set_size_limit (char *par);
51 
52 /* -------  internal function prototypes ---------- */
53 CollectorModule __collector_register_module (ModuleInterface *modint);
54 static void write_sample (char *name);
55 static const char *__collector_get_params ();
56 static const char *__collector_get_expdir ();
57 static FrameInfo __collector_getUserCtx (CollectorModule modl, HiResTime ts, int mode, void *arg);
58 static FrameInfo __collector_getUID1 (CM_Array *arg);
59 static int __collector_writeMetaData (CollectorModule modl, char *format, ...);
60 static int __collector_writeDataRecord (CollectorModule modl, struct Common_packet *pckt);
61 static int __collector_writeDataPacket (CollectorModule modl, struct CM_Packet *pckt);
62 static void *allocCSize (struct Heap*, unsigned, int);
63 static void freeCSize (struct Heap*, void*, unsigned);
64 static void *allocVSize (struct Heap*, unsigned);
65 static void *reallocVSize (struct Heap*, void*, unsigned);
66 
67 static int collector_create_expr_dir (const char *new_exp_name);
68 static int collector_create_expr_dir_lineage (const char *parent_exp_name);
69 static int collector_exp_dir_append_x (int linenum, const char *parent_exp_name);
70 static int collector_tail_init (const char *parent_exp_name);
71 static int log_open ();
72 static void log_header_write (sp_origin_t origin);
73 static void log_pause ();
74 static void log_resume ();
75 static void fs_warn ();
76 static void log_close ();
77 static void get_progspec (char *cmdline, int tmp_sz, char *progname, int sz);
78 static void sample_handler (int, siginfo_t*, void*);
79 static int sample_set_interval (char *);
80 static int set_duration (char *);
81 static int sample_set_user_sig (char *);
82 static void pause_handler (int, siginfo_t*, void*);
83 static int pause_set_user_sig (char *);
84 static int set_user_sig_action (char*);
85 static void ovw_open ();
86 static hrtime_t ovw_write ();
87 
88 /* ------- global data controlling the collector's behavior -------- */
89 
90 static CollectorInterface collector_interface ={
91   __collector_register_module,  /* registerModule */
92   __collector_get_params,       /* getParams */
93   __collector_get_expdir,       /* getExpDir */
94   __collector_log_write,        /* writeLog */
95   __collector_getUserCtx,       /* getFrameInfo */
96   __collector_getUID1,          /* getUID */
97   __collector_getUID,           /* getUID2 */
98   __collector_getStackTrace,    /* getStackTrace */
99   __collector_writeMetaData,    /* writeMetaData */
100   __collector_writeDataRecord,  /* writeDataRecord */
101   __collector_writeDataPacket,  /* writeDataPacket */
102   write_sample,                 /* write_sample */
103   get_progspec,                 /* get_progspec */
104   __collector_open_experiment,  /* open_experiment */
105   NULL,                         /* getHiResTime */
106   __collector_newHeap,          /* newHeap */
107   __collector_deleteHeap,       /* deleteHeap */
108   allocCSize,                   /* allocCSize */
109   freeCSize,                    /* freeCSize */
110   allocVSize,                   /* allocVSize */
111   reallocVSize,                 /* reallocVSize */
112   __collector_tsd_create_key,   /* createKey */
113   __collector_tsd_get_by_key,   /* getKey */
114   __collector_dlog              /* writeDebugInfo */
115 };
116 
117 #define MAX_MODULES 32
118 static ModuleInterface *modules[MAX_MODULES];
119 static int modules_st[MAX_MODULES];
120 static void *modules_hndl[MAX_MODULES];
121 static volatile int nmodules = 0;
122 
123 /* flag set non-zero, if data collected implies a filesystem warning is appropriate */
124 static int fs_matters = 0;
125 static const char *collector_params = NULL;
126 static const char *project_home = NULL;
127 Heap *__collector_heap = NULL;
128 int __collector_no_threads;
129 int __collector_libthread_T1 = -1;
130 
131 static volatile int collector_paused = 0;
132 
133 int __collector_tracelevel = -1;
134 static int collector_debug_opt = 0;
135 
136 hrtime_t __collector_next_sample = 0;
137 int __collector_sample_period = 0; /* if non-zero, periodic sampling is enabled */
138 
139 hrtime_t __collector_delay_start = 0; /* if non-zero, delay before starting data */
140 hrtime_t __collector_terminate_time = 0; /* if non-zero, fixed duration run */
141 
142 static collector_mutex_t __collector_glob_lock = COLLECTOR_MUTEX_INITIALIZER;
143 static collector_mutex_t __collector_open_guard = COLLECTOR_MUTEX_INITIALIZER;
144 static collector_mutex_t __collector_close_guard = COLLECTOR_MUTEX_INITIALIZER;
145 static collector_mutex_t __collector_sample_guard = COLLECTOR_MUTEX_INITIALIZER;
146 static collector_mutex_t __collector_suspend_guard = COLLECTOR_MUTEX_INITIALIZER;
147 static collector_mutex_t __collector_resume_guard = COLLECTOR_MUTEX_INITIALIZER;
148 char __collector_exp_dir_name[MAXPATHLEN + 1] = ""; /* experiment directory */
149 int __collector_size_limit = 0;
150 
151 static char *archive_mode = NULL;
152 
153 volatile sp_state_t __collector_expstate = EXP_INIT;
154 static int exp_origin = SP_ORIGIN_LIBCOL_INIT;
155 static int exp_open = 0;
156 int __collector_exp_active = 0;
157 static int paused_when_suspended = 0;
158 static int exp_initted = 0;
159 static char exp_progspec[_POSIX_ARG_MAX + 1]; /* program cmdline. includes args */
160 static char exp_progname[_POSIX_ARG_MAX + 1]; /* program name == argv[0] */
161 
162 hrtime_t __collector_start_time = 0;
163 static time_t start_sec_time = 0;
164 
165 /* Sample related data */
166 static int sample_installed = 0; /* 1 if the sample signal handler installed */
167 static int sample_mode = 0; /* dynamically turns sample record writing on/off */
168 static int sample_number = 0; /* index of the current sample record */
169 static struct sigaction old_sample_handler;
170 int __collector_sample_sig = -1;     /* user-specified sample signal */
171 int __collector_sample_sig_warn = 0; /* non-zero if warning already given */
172 
173 /* Pause/resume related data */
174 static struct sigaction old_pause_handler;
175 int __collector_pause_sig = -1;     /* user-specified pause signal */
176 int __collector_pause_sig_warn = 0; /* non-zero if warning already given */
177 
178 static struct sigaction old_close_handler;
179 static struct sigaction old_exit_handler;
180 
181 /* Experiment files */
182 static char ovw_name[MAXPATHLEN];   /* Overview data file name */
183 
184 /* macro to convert a timestruc to hrtime_t */
185 #define ts2hrt(x)   ((hrtime_t)(x).tv_sec*NANOSEC + (hrtime_t)(x).tv_nsec)
186 
187 static void
init_tracelevel()188 init_tracelevel ()
189 {
190 #if DEBUG
191   char *s = CALL_UTIL (getenv)("SP_COLLECTOR_TRACELEVEL");
192   if (s != NULL)
193     __collector_tracelevel = CALL_UTIL (atoi)(s);
194   TprintfT (DBG_LT0, "collector: SP_COLLECTOR_TRACELEVEL=%d\n", __collector_tracelevel);
195   s = CALL_UTIL (getenv)("SP_COLLECTOR_DEBUG");
196   if (s != NULL)
197     collector_debug_opt = CALL_UTIL (atoi)(s) & ~(SP_DUMP_TIME | SP_DUMP_FLAG);
198 #endif
199 }
200 
201 static CollectorInterface *
get_collector_interface()202 get_collector_interface ()
203 {
204   if (collector_interface.getHiResTime == NULL)
205     collector_interface.getHiResTime = __collector_gethrtime;
206   return &collector_interface;
207 }
208 
209 /*
210  *    __collector_module_init is an alternate method to initialize
211  *    dynamic collector modules (er_heap, er_sync, er_iotrace, er_mpi, tha).
212  *    Every module that needs to register itself with libcollector
213  *    before the experiment is open implements its own global
214  *    __collector_module_init and makes sure the next one is called.
215  */
216 static void
collector_module_init(CollectorInterface * col_intf)217 collector_module_init (CollectorInterface *col_intf)
218 {
219   int nmodules = 0;
220 
221   ModuleInitFunc next_init = (ModuleInitFunc) dlsym (RTLD_DEFAULT, "__collector_module_init");
222   if (next_init != NULL)
223     {
224       nmodules++;
225       next_init (col_intf);
226     }
227   TprintfT (DBG_LT1, "collector_module_init: %d modules\n", nmodules);
228 }
229 
230 /*   Routines concerned with general experiment start and stop */
231 
232 /* initialization -- init section routine -- called when libcollector loaded */
233 static void collector_init () __attribute__ ((constructor));
234 
235 static void
collector_init()236 collector_init ()
237 {
238   if (__collector_util_init () != 0)
239     /* we can't do anything without various utility functions */
240     abort ();
241   init_tracelevel ();
242 
243   /*
244    * Unconditionally install the SIGPROF handler
245    * to process signals originated in dtracelets.
246    */
247   __collector_sigprof_install ();
248 
249   /* Initialize all preloaded modules */
250   collector_module_init (get_collector_interface ());
251 
252   /* determine experiment name */
253   char *exp = CALL_UTIL (getenv)("SP_COLLECTOR_EXPNAME");
254   if ((exp == NULL) || (CALL_UTIL (strlen)(exp) == 0))
255     {
256       TprintfT (DBG_LT0, "collector_init: SP_COLLECTOR_EXPNAME undefined - no experiment to start\n");
257       /* not set -- no experiment to run */
258       return;
259     }
260   else
261     TprintfT (DBG_LT1, "collector_init: found SP_COLLECTOR_EXPNAME = %s\n", exp);
262 
263   /* determine the data descriptor for the experiment */
264   char *params = CALL_UTIL (getenv)("SP_COLLECTOR_PARAMS");
265   if (params == NULL)
266     {
267       TprintfT (0, "collector_init: SP_COLLECTOR_PARAMS undefined - no experiment to start\n");
268       return;
269     }
270 
271   /* now do the real open of the experiment */
272   if (__collector_open_experiment (exp, params, SP_ORIGIN_LIBCOL_INIT))
273     {
274       TprintfT (0, "collector_init: __collector_open_experiment failed\n");
275       /* experiment open failed, close it */
276       __collector_close_experiment ();
277       return;
278     }
279   return;
280 }
281 
282 CollectorModule
__collector_register_module(ModuleInterface * modint)283 __collector_register_module (ModuleInterface *modint)
284 {
285   TprintfT (DBG_LT1, "collector: module %s calls for registration.\n",
286 	    modint->description == NULL ? "(null)" : modint->description);
287   if (modint == NULL)
288     return COLLECTOR_MODULE_ERR;
289   if (nmodules >= MAX_MODULES)
290     return COLLECTOR_MODULE_ERR;
291   if (modint->initInterface &&
292       modint->initInterface (get_collector_interface ()))
293     return COLLECTOR_MODULE_ERR;
294   int idx = nmodules++;
295   modules[idx] = modint;
296   modules_st[idx] = 0;
297 
298   if (exp_open && modint->openExperiment)
299     {
300       modules_st[idx] = modint->openExperiment (__collector_exp_dir_name);
301       if (modules_st[idx] == COL_ERROR_NONE && modules[idx]->description != NULL)
302 	{
303 	  modules_hndl[idx] = __collector_create_handle (modules[idx]->description);
304 	  if (modules_hndl[idx] == NULL)
305 	    modules_st[idx] = -1;
306 	}
307     }
308   if (__collector_exp_active && collector_paused == 0 &&
309       modint->startDataCollection && modules_st[idx] == 0)
310     modint->startDataCollection ();
311   TprintfT (DBG_LT1, "collector: module %s (%d) registered.\n",
312 	    modint->description == NULL ? "(null)" : modint->description, idx);
313   return (CollectorModule) idx;
314 }
315 
316 static const char *
__collector_get_params()317 __collector_get_params ()
318 {
319   return collector_params;
320 }
321 
322 static const char *
__collector_get_expdir()323 __collector_get_expdir ()
324 {
325   return __collector_exp_dir_name;
326 }
327 
328 static FrameInfo
__collector_getUserCtx(CollectorModule modl,HiResTime ts,int mode,void * arg)329 __collector_getUserCtx (CollectorModule modl, HiResTime ts, int mode, void *arg)
330 {
331   return __collector_get_frame_info (ts, mode, arg);
332 }
333 
334 static FrameInfo
__collector_getUID1(CM_Array * arg)335 __collector_getUID1 (CM_Array *arg)
336 {
337   return __collector_getUID (arg, (FrameInfo) 0);
338 }
339 
340 static int
__collector_writeMetaData(CollectorModule modl,char * format,...)341 __collector_writeMetaData (CollectorModule modl, char *format, ...)
342 {
343   if (modl < 0 || modl >= nmodules || modules[modl]->description == NULL)
344     {
345       TprintfT (DBG_LT0, "__collector_writeMetaData(): bad module: %d\n", modl);
346       return 1;
347     }
348   char fname[MAXPATHLEN + 1];
349   CALL_UTIL (strlcpy)(fname, __collector_exp_dir_name, sizeof (fname));
350   CALL_UTIL (strlcat)(fname, "/metadata.", sizeof (fname));
351   CALL_UTIL (strlcat)(fname, modules[modl]->description, sizeof (fname));
352   CALL_UTIL (strlcat)(fname, ".xml", sizeof (fname));
353   int fd = CALL_UTIL (open)(fname, O_CREAT | O_WRONLY | O_APPEND,
354 			    S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
355   if (fd < 0)
356     {
357       TprintfT (DBG_LT0, "__collector_writeMetaData(): can't open file: %s\n", fname);
358       return 1;
359     }
360   char buf[1024];
361   char *bufptr = buf;
362   va_list va;
363   va_start (va, format);
364   int sz = __collector_xml_vsnprintf (bufptr, sizeof (buf), format, va);
365   va_end (va);
366 
367   if (sz >= sizeof (buf))
368     {
369       /* Allocate a new buffer */
370       sz += 1; /* add the terminating null byte */
371       bufptr = (char*) alloca (sz);
372 
373       va_start (va, format);
374       sz = __collector_xml_vsnprintf (bufptr, sz, format, va);
375       va_end (va);
376     }
377   CALL_UTIL (write)(fd, bufptr, sz);
378   CALL_UTIL (close)(fd);
379   return COL_ERROR_NONE;
380 }
381 
382 /* check that the header fields are filled-in, and then call __collector_writeDataPacket */
383 static int
__collector_writeDataRecord(CollectorModule modl,struct Common_packet * pckt)384 __collector_writeDataRecord (CollectorModule modl, struct Common_packet *pckt)
385 {
386   return __collector_write_record (modules_hndl[modl], pckt);
387 }
388 
389 static int
__collector_writeDataPacket(CollectorModule modl,struct CM_Packet * pckt)390 __collector_writeDataPacket (CollectorModule modl, struct CM_Packet *pckt)
391 {
392   return __collector_write_packet (modules_hndl[modl], pckt);
393 }
394 
395 static void *
allocCSize(struct Heap * heap,unsigned sz,int log)396 allocCSize (struct Heap *heap, unsigned sz, int log)
397 {
398   return __collector_allocCSize (heap ? heap : __collector_heap, sz, log);
399 }
400 
401 static void
freeCSize(struct Heap * heap,void * ptr,unsigned sz)402 freeCSize (struct Heap *heap, void *ptr, unsigned sz)
403 {
404   __collector_freeCSize (heap ? heap : __collector_heap, ptr, sz);
405 }
406 
407 static void *
allocVSize(struct Heap * heap,unsigned sz)408 allocVSize (struct Heap *heap, unsigned sz)
409 {
410   return __collector_allocVSize (heap ? heap : __collector_heap, sz);
411 }
412 
413 static void *
reallocVSize(struct Heap * heap,void * ptr,unsigned sz)414 reallocVSize (struct Heap *heap, void *ptr, unsigned sz)
415 {
416   return __collector_reallocVSize (heap ? heap : __collector_heap, ptr, sz);
417 }
418 
419 static time_t
get_gm_time(struct tm * tp)420 get_gm_time (struct tm *tp)
421 {
422   /*
423      Note that glibc contains a function of the same purpose named `timegm'.
424    But obviously, it is not universally available.
425 
426      Some implementations of mktime return -1 for the nonexistent localtime hour
427    at the beginning of DST. In this event, use 'mktime(tm - 1hr) + 3600'.
428   nonexistent
429      tm_isdst is set to 0 to force mktime to introduce a consistent offset
430    (the non DST offset) since tm and tm+o might be on opposite sides of a DST change.
431 
432    Schematically:
433      mktime(tm)    --> t+o
434      gmtime_r(t+o) --> tm+o
435      mktime(tm+o)  --> t+2o
436      t = t+o - (t+2o - t+o)
437    */
438   struct tm stm;
439   time_t tl = CALL_UTIL (mktime)(tp);
440   if (tl == -1)
441     {
442       stm = *tp;
443       stm.tm_hour--;
444       tl = CALL_UTIL (mktime)(&stm);
445       if (tl == -1)
446 	return -1;
447       tl += 3600;
448     }
449 
450   (void) (CALL_UTIL (gmtime_r)(&tl, &stm));
451   stm.tm_isdst = 0;
452   time_t tb = CALL_UTIL (mktime)(&stm);
453   if (tb == -1)
454     {
455       stm.tm_hour--;
456       tb = CALL_UTIL (mktime)(&stm);
457       if (tb == -1)
458 	return -1;
459       tb += 3600;
460     }
461   return (tl - (tb - tl));
462 }
463 
464 static void
log_write_event_run()465 log_write_event_run ()
466 {
467   /* get the gm and local time */
468   struct tm start_stm;
469   CALL_UTIL (gmtime_r)(&start_sec_time, &start_stm);
470   time_t start_gm_time = get_gm_time (&start_stm);
471   time_t lcl_time = CALL_UTIL (mktime)(&start_stm);
472   __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" time=\"%lld\" tm_zone=\"%lld\"/>\n",
473 			 SP_JCMD_RUN,
474 			 (unsigned) (__collector_start_time / NANOSEC),
475 			 (unsigned) (__collector_start_time % NANOSEC),
476 			 (long long) start_gm_time,
477 			 (long long) (lcl_time - start_gm_time));
478 }
479 
480 static void *
m_dlopen(const char * filename,int flag)481 m_dlopen (const char *filename, int flag)
482 {
483   void *p = dlopen (filename, flag);
484   TprintfT (DBG_LT1, "collector.c: dlopen(%s, %d) returns %p\n", filename, flag, p);
485   return p;
486 }
487 /* real routine to open an experiment
488  * called by collector_init from libcollector init section
489  * called by __collector_start_experiment when a child is forked */
490 int
__collector_open_experiment(const char * exp,const char * params,sp_origin_t origin)491 __collector_open_experiment (const char *exp, const char *params, sp_origin_t origin)
492 {
493   char *s;
494   char *buf = NULL;
495   char *duration_string = NULL;
496   int err;
497   int is_founder = 1;
498   int record_this_experiment = 1;
499   int seen_F_flag = 0;
500   static char buffer[32];
501   if (exp_open)
502     {
503       /* experiment already opened */
504       TprintfT (0, "collector: ERROR: Attempt to open opened experiment\n");
505       return COL_ERROR_EXPOPEN;
506     }
507   __collector_start_time = collector_interface.getHiResTime ();
508   TprintfT (DBG_LT1, "\n\t\t__collector_open_experiment(SP_COLLECTOR_EXPNAME=%s, params=%s, origin=%d); setting start_time\n",
509 	    exp, params, origin);
510   if (environ)
511     __collector_env_printall ("__collector_open_experiment", environ);
512   else
513     TprintfT (DBG_LT1, "collector_open_experiment found environ == NULL)\n");
514 
515   /*
516    * Recheck sigprof handler
517    * XXXX Bug 18177509 - additional sigprof signal kills target program
518    */
519   __collector_sigprof_install ();
520   exp_origin = origin;
521   collector_params = params;
522 
523   /* Determine which of the three possible threading models:
524    *	    singlethreaded
525    *	    multi-LWP (no threads)
526    *	    multithreaded
527    * is the one the target is actually using.
528    *
529    * we really only need to distinguish between first two
530    * and the third. The thr_main() trick does exactly that.
531    * is the one the target is actually using.
532    *
533    * __collector_no_threads applies to all signal handlers,
534    * and must be set before signal handlers are installed.
535    */
536   __collector_no_threads = 0;
537   __collector_exp_dir_name[0] = 0;
538   sample_mode = 0;
539   sample_number = 0;
540 
541   /* create global heap */
542   if (__collector_heap == NULL)
543     {
544       __collector_heap = __collector_newHeap ();
545       if (__collector_heap == NULL)
546 	{
547 	  CALL_UTIL (fprintf)(stderr, "__collector_open_experiment COLERROR_NOZMEM 1\n");
548 	  return COL_ERROR_NOZMEM;
549 	}
550     }
551   //check whether is origin is collect
552   char * envar = CALL_UTIL (getenv)("SP_COLLECTOR_ORIGIN_COLLECT");
553   TprintfT (DBG_LT1, "__collector_open_experiment SP_COLLECTOR_ORIGIN_COLLECT = '%s'\n",
554 	    (envar == NULL) ? "NULL" : envar);
555   if (envar)
556     exp_origin = SP_ORIGIN_COLLECT;
557 
558   //check if this is the founder process
559   is_founder = getpid ();
560   if (origin != SP_ORIGIN_DBX_ATTACH)
561     {
562       envar = CALL_UTIL (getenv)("SP_COLLECTOR_FOUNDER");
563       if (envar)
564 	is_founder = CALL_UTIL (atoi)(envar);
565       if (is_founder != 0)
566 	{
567 	  if (is_founder != getpid ())
568 	    {
569 	      TprintfT (0, "__collector_open_experiment SP_COLLECTOR_FOUNDER=%d != pid(%d)\n",
570 			is_founder, getpid ());
571 	      //CALL_UTIL(fprintf)(stderr, "__collector_open_experiment SP_COLLECTOR_FOUNDER=%d != pid(%d); not recording experiment\n",
572 	      //is_founder, getpid() );
573 	      //return COL_ERROR_UNEXP_FOUNDER;
574 	      is_founder = 0; // Special case (CR 22917352)
575 	    }
576 	  /* clear FOUNDER for descendant experiments */
577 	  TprintfT (0, "__collector_open_experiment setting SP_COLLECTOR_FOUNDER=0\n");
578 	  CALL_UTIL (strlcpy)(buffer, "SP_COLLECTOR_FOUNDER=0", sizeof (buffer));
579 	  CALL_UTIL (putenv)(buffer);
580 	}
581     }
582 
583   /* Set up fork/exec interposition (requires __collector_heap). */
584   /* Determine if "collect -F" specification enables this subexperiment */
585   get_progspec (exp_progspec, sizeof (exp_progspec), exp_progname, sizeof (exp_progname));
586 
587   /* convert the returned exp_progname to a basename */
588   const char * base_name = __collector_strrchr (exp_progname, '/');
589   if (base_name == NULL)
590     base_name = exp_progname;
591   else
592     base_name = base_name + 1;
593   err = __collector_ext_line_init (&record_this_experiment, exp_progspec, base_name);
594   if (err != COL_ERROR_NONE)
595     {
596       CALL_UTIL (fprintf)(stderr, "__collector_open_experiment COLERROR: %d\n", err);
597       return err;
598     }
599 
600   /* Due to the fix of bug 15691122, we need to initialize unwind to make
601    * the function __collector_ext_return_address() work for dlopen interposition.
602    * */
603   if (!record_this_experiment && !is_founder)
604     {
605       TprintfT (DBG_LT0, "__collector_open_experiment: NOT creating experiment.  (is_founder=%d, record=%d)\n",
606 		is_founder, record_this_experiment);
607       return collector_tail_init (exp);
608     }
609   TprintfT (DBG_LT0, "__collector_open_experiment: is_founder=%d, record=%d\n",
610 	    is_founder, record_this_experiment);
611   if (is_founder || origin == SP_ORIGIN_FORK)
612     {
613       CALL_UTIL (strlcpy)(__collector_exp_dir_name, exp, sizeof (__collector_exp_dir_name));
614       if (origin == SP_ORIGIN_FORK)
615 	{ /*create exp dir for fork-child*/
616 	  if (collector_create_expr_dir (__collector_exp_dir_name))
617 	    {
618 	      CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_BADDIR 1: `%s'\n", exp);
619 	      return COL_ERROR_BADDIR;
620 	    }
621 	}
622     }
623   else
624     {/* founder/fork-child will already have created experiment dir, but exec/combo descendants must do so now */
625       if (collector_create_expr_dir_lineage (exp))
626 	{
627 	  CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_BADDIR 2: `%s'\n", exp);
628 	  return COL_ERROR_BADDIR;
629 	}
630       static char exp_name_env[MAXPATHLEN + 1];
631       TprintfT (DBG_LT1, "collector_open_experiment: setting SP_COLLECTOR_EXPNAME to %s\n", __collector_exp_dir_name);
632       CALL_UTIL (snprintf)(exp_name_env, sizeof (exp_name_env), "SP_COLLECTOR_EXPNAME=%s", __collector_exp_dir_name);
633       CALL_UTIL (putenv)(exp_name_env);
634     }
635   /* Check that the name is that of a directory (new structure) */
636   DIR *expDir = CALL_UTIL (opendir)(__collector_exp_dir_name);
637   if (expDir == NULL)
638     {
639       /* can't open it */
640       CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_BADDIR 3: `%s'\n", exp);
641       return COL_ERROR_BADDIR;
642     }
643   CALL_UTIL (closedir)(expDir);
644 
645   if (CALL_UTIL (access)(__collector_exp_dir_name, W_OK))
646     {
647       TprintfT (0, "collector: ERROR: access error: errno=%d\n", errno);
648       if ((errno == EACCES) || (errno == EROFS))
649 	{
650 	  CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_DIRPERM: `%s'\n", exp);
651 	  TprintfT (DBG_LT0, "collector: ERROR: experiment directory `%s' is not writeable\n",
652 		    __collector_exp_dir_name);
653 	  return COL_ERROR_DIRPERM;
654 	}
655       else
656 	{
657 	  CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_BADDIR 4: `%s'\n", exp);
658 	  return COL_ERROR_BADDIR;
659 	}
660     }
661 
662   /* reset the paused flag */
663   collector_paused = (origin == SP_ORIGIN_FORK ? paused_when_suspended : 0);
664 
665   /* mark the experiment as opened */
666   __collector_expstate = EXP_OPEN;
667   TprintfT (DBG_LT1, "collector: __collector_expstate->EXP_OPEN\n");
668 
669   /* open the log file */
670   err = log_open ();
671   if (err != COL_ERROR_NONE)
672     {
673       CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_LOG_OPEN\n");
674       return COL_ERROR_LOG_OPEN;
675     }
676   if (origin != SP_ORIGIN_GENEXP && origin != SP_ORIGIN_KERNEL)
677     log_header_write (origin);
678 
679   /* Make a copy of params so that we can modify the string */
680   int paramsz = CALL_UTIL (strlen)(params) + 1;
681   buf = (char*) alloca (paramsz);
682   if (buf == NULL)
683     {
684       CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_ARGS2BIG: %s\n", params);
685       TprintfT (DBG_LT0, "collector: ERROR: experiment parameter `%s' is too long\n", params);
686       (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\"/></event>\n",
687 				    SP_JCMD_CERROR, COL_ERROR_ARGS2BIG);
688       return COL_ERROR_ARGS2BIG;
689     }
690   CALL_UTIL (strlcpy)(buf, params, paramsz);
691 
692   /* create directory for archives (if founder) */
693   char archives[MAXPATHLEN];
694   CALL_UTIL (snprintf)(archives, MAXPATHLEN, "%s/%s", __collector_exp_dir_name,
695 		       SP_ARCHIVES_DIR);
696   if (is_founder)
697     {
698       mode_t dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
699       if ((CALL_UTIL (mkdir)(archives, dmode) != 0) && (errno != EEXIST))
700 	{
701 	  CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_MKDIR: %s: errno = %d\n", archives, errno);
702 	  TprintfT (0, "collector: ERROR: mkdir(%s) failed: errno = %d\n", archives, errno);
703 	  (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">mkdir(%s): errno=%d</event>\n",
704 					SP_JCMD_COMMENT, COL_COMMENT_NONE, archives, errno);
705 	  /* this is not a fatal error currently */
706 	}
707       else
708 	TprintfT (DBG_LT1, "collector: archive mkdir(%s) succeeded\n", archives);
709     }
710 
711   /* initialize the segments map and mmap interposition */
712   if (origin != SP_ORIGIN_GENEXP && origin != SP_ORIGIN_KERNEL)
713     {
714       if ((err = __collector_ext_mmap_install (1)) != COL_ERROR_NONE)
715 	{
716 	  __collector_log_write ("<event kind=\"%s\" id=\"%d\"/></event>\n", SP_JCMD_CERROR, err);
717 	  return err;
718 	}
719     }
720 
721   /* open the overview file for sample data */
722   if (origin != SP_ORIGIN_GENEXP)
723     ovw_open ();
724 
725   /* initialize TSD module (note: relies on __collector_heap) */
726   if (__collector_tsd_init () != 0)
727     {
728       CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_TSD_INIT\n");
729       __collector_log_write ("<event kind=\"%s\" id=\"%d\">TSD could not be initialized</event>\n", SP_JCMD_CERROR, COL_ERROR_TSD_INIT);
730       return COL_ERROR_TSD_INIT;
731     }
732 
733   /* experiment is initialized; allow pause/resume/close */
734   exp_initted = 1;
735 
736   // 24935305 should not use SIGPROF if collect -p -t and -S are all off
737   /* (check here if -t or -S is on; -p is checked later) */
738   if (((params[0] == 't' || params[0] == 'S') && params[1] == ':')
739       || CALL_UTIL (strstr)(params, ";t:")
740       || CALL_UTIL (strstr)(params, ";S:"))
741     {
742       /* set a default time to 100 ms.; use negative value to force setting */
743       TprintfT (DBG_LT1, "collector: open_experiment setting timer to 100000\n");
744       __collector_ext_itimer_set (-100000);
745     }
746 
747   /* call open for all dynamic modules */
748   int i;
749   for (i = 0; i < nmodules; i++)
750     {
751       if (modules[i]->openExperiment != NULL)
752 	{
753 	  modules_st[i] = modules[i]->openExperiment (__collector_exp_dir_name);
754 	  if (modules_st[i] == COL_ERROR_NONE && modules[i]->description != NULL)
755 	    {
756 	      modules_hndl[i] = __collector_create_handle (modules[i]->description);
757 	      if (modules_hndl[i] == NULL)
758 		modules_st[i] = -1;
759 	    }
760 	}
761       /* check to see if anyone closed the experiment */
762       if (!exp_initted)
763 	{
764 	  CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_EXP_OPEN\n");
765 	  __collector_log_write ("<event kind=\"%s\" id=\"%d\">Experiment closed prematurely</event>\n", SP_JCMD_CERROR, COL_ERROR_EXPOPEN);
766 	  return COL_ERROR_EXPOPEN;
767 	}
768     }
769 
770   /* initialize for subsequent stack unwinds */
771   __collector_ext_unwind_init (1);
772   TprintfT (DBG_LT0, "__collector_open_experiment(); module init done, params=%s\n",
773 	    buf);
774 
775   /* now parse the data descriptor */
776   /* The parameter string is a series of specifiers,
777    *	each of which is of the form:
778    *		<key>:<param>;
779    *	key is a single letter, the : and ; are mandatory,
780    *	and param is a string which may be zero-length, and
781    *	which contains any character except a null-byte or ;
782    *	param is interpreted by the handler for the particular key
783    */
784 
785   s = buf;
786 
787   while (*s)
788     {
789       char *par;
790       char key = *s++;
791       /* ensure that it's followed by a colon */
792       if (*s++ != ':')
793 	{
794 	  TprintfT (0, "collector: ERROR: parameter %c is not followed by a colon\n", key);
795 	  (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, COL_ERROR_ARGS, params);
796 	  return COL_ERROR_ARGS;
797 	}
798       /* find the semicolon terminator */
799       par = s;
800       while (*s && (*s != ';'))
801 	s++;
802       if (*s != ';')
803 	{
804 	  /* not followed by semicolon */
805 	  TprintfT (0, "collector: ERROR: parameter %c:%s is not terminated by a semicolon\n", key, par);
806 	  (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, COL_ERROR_ARGS, params);
807 	  return COL_ERROR_ARGS;
808 	}
809       /* terminate par, and position for next descriptor */
810       *s++ = 0;
811 
812       /* now process that element of the data descriptor */
813       switch (key)
814 	{
815 	case 'g': /* g<sig>; */
816 	  if ((err = sample_set_user_sig (par)) != COL_ERROR_NONE)
817 	    {
818 	      __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
819 	      return err;
820 	    }
821 	  break;
822 	case 'd': /* d<sig>; -or- d<sig>p; */
823 	  if ((err = pause_set_user_sig (par)) != COL_ERROR_NONE)
824 	    {
825 	      __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
826 	      return err;
827 	    }
828 	  break;
829 	case 'H':
830 	  m_dlopen ("libgp-heap.so", RTLD_LAZY); /* hack to force .so's constructor to be called (?) */
831 	  break;
832 	case 's':
833 	  m_dlopen ("libgp-sync.so", RTLD_LAZY); /* hack to force .so's constructor to be called (?) */
834 	  break;
835 	case 'i':
836 	  m_dlopen ("libgp-iotrace.so", RTLD_LAZY); /* hack to force .so's constructor to be called (?) */
837 	  break;
838 	case 'F': /* F; */
839 	  seen_F_flag = 1;
840 	  TprintfT (DBG_LT0, "__collector_open_experiment: calling __collector_ext_line_install (%s, %s)\n",
841 		    par, __collector_exp_dir_name);
842 	  if ((err = __collector_ext_line_install (par, __collector_exp_dir_name)) != COL_ERROR_NONE)
843 	    {
844 	      __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
845 	      return err;
846 	    }
847 	  break;
848 	case 'a': /* a; */
849 	  archive_mode = __collector_strdup (par);
850 	  break;
851 	case 't': /* t:<expt-duration>; */
852 	  duration_string = par;
853 	  break;
854 	case 'S': /* S:<sample-interval>; */
855 	  if ((err = sample_set_interval (par)) != COL_ERROR_NONE)
856 	    {
857 	      __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
858 	      return err;
859 	    }
860 	  break;
861 	case 'L': /* L:<experiment-size-limit>; */
862 	  if ((err = __collector_set_size_limit (par)) != COL_ERROR_NONE)
863 	    {
864 	      __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
865 	      return err;
866 	    }
867 	  break;
868 	case 'P': /* P:PROJECT_HOME; */
869 	  project_home = __collector_strdup (par);
870 	  break;
871 	case 'h':
872 	case 'p':
873 	  fs_matters = 1;
874 	  break;
875 	case 'Y':
876 	  err = set_user_sig_action (par);
877 	  if (err != COL_ERROR_NONE)
878 	    __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
879 	  break;
880 	default:
881 	  /* Ignore unknown parameters; allow them to be handled by modules */
882 	  break;
883 	}
884     }
885   /* end of data descriptor parsing */
886 
887   if (!seen_F_flag)
888     {
889       char * par = "0"; // This will not happen when collect has no -F option
890       if ((err = __collector_ext_line_install (par, __collector_exp_dir_name)) != COL_ERROR_NONE)
891 	{
892 	  __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
893 	  return err;
894 	}
895     }
896 
897   /* now that we know what data is being collected, we can set the filesystem warning */
898   fs_warn ();
899 
900   // We have to create all tsd keys before __collector_tsd_allocate().
901   // With the pthreads-based implementation, this might no longer be necessary.
902   // In any case, we still have to create the key before a thread can use it.
903   __collector_ext_gettid_tsd_create_key ();
904   __collector_ext_dispatcher_tsd_create_key ();
905 
906   /* allocate tsd for the current thread */
907   if (__collector_tsd_allocate () != 0)
908     {
909       __collector_log_write ("<event kind=\"%s\" id=\"%d\">TSD allocate failed</event>\n", SP_JCMD_CERROR, COL_ERROR_EXPOPEN);
910       return COL_ERROR_EXPOPEN;
911     }
912   /* init tsd for unwind, called right after __collector_tsd_allocate()*/
913   __collector_ext_unwind_key_init (1, NULL);
914 
915   /* start java attach if suitable */
916   if (exp_origin == SP_ORIGIN_DBX_ATTACH)
917     __collector_jprofile_start_attach ();
918   start_sec_time = CALL_UTIL (time)(NULL);
919   __collector_start_time = collector_interface.getHiResTime ();
920   TprintfT (DBG_LT0, "\t__collector_open_experiment; resetting start_time\n");
921   if (duration_string != NULL && (err = set_duration (duration_string)) != COL_ERROR_NONE)
922     {
923       __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, duration_string);
924       return err;
925     }
926 
927   /* install the common SIGPROF dispatcher (requires TSD) */
928   if ((err = __collector_ext_dispatcher_install ()) != COL_ERROR_NONE)
929     {
930       __collector_log_write ("<event kind=\"%s\" id=\"%d\"/></event>\n", SP_JCMD_CERROR, err);
931       return err;
932     }
933 
934   /* mark the experiment open complete */
935   exp_open = 1;
936   if (exp_origin == SP_ORIGIN_DBX_ATTACH)
937     __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" time=\"%lld\" tm_zone=\"%lld\"/>\n",
938 			   SP_JCMD_RUN,
939 			   (unsigned) (__collector_start_time / NANOSEC), (unsigned) (__collector_start_time % NANOSEC),
940 			   (long long) start_sec_time, (long long) 0);
941   else
942     log_write_event_run ();
943 
944   /* schedule the first sample */
945   __collector_next_sample = __collector_start_time + ((hrtime_t) NANOSEC) * __collector_sample_period;
946   __collector_ext_usage_sample (MASTER_SMPL, "collector_open_experiment");
947 
948   /* start data collection in dynamic modules */
949   if (collector_paused == 0)
950     {
951       for (i = 0; i < nmodules; i++)
952 	if (modules[i]->startDataCollection != NULL && modules_st[i] == 0)
953 	  modules[i]->startDataCollection ();
954     }
955   else
956     {
957       hrtime_t ts = GETRELTIME ();
958       (void) __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\"/>\n",
959 				    SP_JCMD_PAUSE, (unsigned) (ts / NANOSEC), (unsigned) (ts % NANOSEC));
960     }
961 
962   /* mark the experiment active */
963   __collector_exp_active = 1;
964   return COL_ERROR_NONE;
965 }
966 
967 /* prepare directory for new experiment of fork-child */
968 
969 /* return 0 if successful */
970 static int
collector_create_expr_dir(const char * new_exp_name)971 collector_create_expr_dir (const char *new_exp_name)
972 {
973   int ret = -1;
974   mode_t dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
975   TprintfT (DBG_LT1, "collector: __collector_create_expr_dir(%s)\n", new_exp_name);
976   if (CALL_UTIL (mkdir)(new_exp_name, dmode) < 0)
977     TprintfT (0, "__collector_create_expr_dir(%s) ERROR: errno=%d\n", new_exp_name, errno);
978   else
979     ret = 0;
980   return (ret);
981 }
982 
983 /* append _xN to __collector_exp_dir_name*/
984 /* return 0 if successful */
985 static int
collector_exp_dir_append_x(int linenum,const char * parent_exp_name)986 collector_exp_dir_append_x (int linenum, const char *parent_exp_name)
987 {
988   char buffer[MAXPATHLEN + 1];
989   char * p = __collector_strrchr (parent_exp_name, '/');
990   if (p == NULL || (*(p + 1) != '_'))
991     {
992       size_t sz = CALL_UTIL (strlen)(parent_exp_name);
993       const char * q = parent_exp_name + sz - 3;
994       if (sz < 3 || __collector_strncmp (q, ".er", CALL_UTIL (strlen)(q)) != 0
995 	  || CALL_UTIL (access)(parent_exp_name, F_OK) != 0)
996 	{
997 	  TprintfT (0, "collector_exp_dir_append_x() ERROR: invalid  parent_exp_name %s\n", parent_exp_name);
998 	  return -1;
999 	}
1000       CALL_UTIL (strlcpy)(buffer, parent_exp_name, sizeof (buffer));
1001       CALL_UTIL (snprintf)(__collector_exp_dir_name, sizeof (__collector_exp_dir_name),
1002 			   "%s/_x%d.er", buffer, linenum);
1003     }
1004   else
1005     {
1006       p = __collector_strrchr (parent_exp_name, '.');
1007       if (p == NULL || *(p + 1) != 'e' || *(p + 2) != 'r')
1008 	{
1009 	  TprintfT (0, "collector_exp_dir_append_x() ERROR: invalid  parent_exp_name %s\n", parent_exp_name);
1010 	  return -1;
1011 	}
1012       CALL_UTIL (strlcpy)(buffer, parent_exp_name,
1013 			  ((p - parent_exp_name + 1)<sizeof (buffer)) ? (p - parent_exp_name + 1) : sizeof (buffer));
1014       CALL_UTIL (snprintf)(__collector_exp_dir_name, sizeof (__collector_exp_dir_name),
1015 			   "%s_x%d.er", buffer, linenum);
1016     }
1017   return 0;
1018 }
1019 
1020 /* prepare directory for new experiment of exec/combo child*/
1021 
1022 /* return 0 if successful */
1023 static int
collector_create_expr_dir_lineage(const char * parent_exp_name)1024 collector_create_expr_dir_lineage (const char *parent_exp_name)
1025 {
1026   int ret = -1;
1027   mode_t dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
1028   int linenum = 1;
1029   while (linenum < INT_MAX)
1030     {
1031       if (collector_exp_dir_append_x (linenum, parent_exp_name) != 0)
1032 	return -1;
1033       if (CALL_UTIL (access)(__collector_exp_dir_name, F_OK) != 0)
1034 	{
1035 	  if (CALL_UTIL (mkdir)(__collector_exp_dir_name, dmode) == 0)
1036 	    return 0;
1037 	}
1038       linenum++;
1039       TprintfT (DBG_LT0, "collector: collector_create_expr_dir_lineage(%s -> %s)\n", parent_exp_name, __collector_exp_dir_name);
1040     }
1041   return (ret);
1042 }
1043 
1044 /* Finish the initializing work if we don't collect data while libcollector.so is preloaded. */
1045 /* return COL_ERROR_NONE if successful */
1046 static int
collector_tail_init(const char * parent_exp_name)1047 collector_tail_init (const char *parent_exp_name)
1048 {
1049   int err = COL_ERROR_NONE;
1050   if (exp_origin != SP_ORIGIN_FORK)
1051     {
1052       /* For exec/combo descendants. Don't create dir for this subexp, but update lineage by appending "_x0". */
1053       /* Different children can have the same _x0 if their name don't match -F exp.
1054        * Assume their fork children inherit the program name, there will be no  _x0_fN.er to create.
1055        * So we don't need to worry about the lineage messed up by _x0.
1056        */
1057       int linenum = 0;
1058       if (collector_exp_dir_append_x (linenum, parent_exp_name) != 0)
1059 	return COL_ERROR_BADDIR;
1060       static char exp_name_env[MAXPATHLEN + 1];
1061       CALL_UTIL (snprintf)(exp_name_env, sizeof (exp_name_env), "SP_COLLECTOR_EXPNAME=%s", __collector_exp_dir_name);
1062       TprintfT (DBG_LT1, "collector_tail_init: setting SP_COLLECTOR_EXPNAME to %s\n", __collector_exp_dir_name);
1063       CALL_UTIL (putenv)(exp_name_env);
1064     }
1065   /* initialize the segments map and mmap interposition */
1066   if (exp_origin != SP_ORIGIN_GENEXP && exp_origin != SP_ORIGIN_KERNEL)
1067     if ((err = __collector_ext_mmap_install (0)) != COL_ERROR_NONE)
1068       return err;
1069 
1070   /* initialize TSD module (note: relies on __collector_heap) */
1071   if (__collector_tsd_init () != 0)
1072     return COL_ERROR_EXPOPEN;
1073 
1074   /* initialize for subsequent stack unwinds */
1075   __collector_ext_unwind_init (0);
1076 
1077   char * buf = NULL;
1078   /* Make a copy of params so that we can modify the string */
1079   int paramsz = CALL_UTIL (strlen)(collector_params) + 1;
1080   buf = (char*) alloca (paramsz);
1081   CALL_UTIL (strlcpy)(buf, collector_params, paramsz);
1082 
1083   char *par_F = "0";
1084   char *s;
1085   for (s = buf; *s;)
1086     {
1087       char key = *s++;
1088       /* ensure that it's followed by a colon */
1089       if (*s++ != ':')
1090 	{
1091 	  TprintfT (DBG_LT0, "collector_tail_init: ERROR: parameter %c is not followed by a colon\n", key);
1092 	  return COL_ERROR_ARGS;
1093 	}
1094 
1095       /* find the semicolon terminator */
1096       char *par = s;
1097       while (*s && (*s != ';'))
1098 	s++;
1099       if (*s != ';')
1100 	{
1101 	  /* not followed by semicolon */
1102 	  TprintfT (0, "collector_tail_init: ERROR: parameter %c:%s is not terminated by a semicolon\n", key, par);
1103 	  return COL_ERROR_ARGS;
1104 	}
1105       /* terminate par, and position for next descriptor */
1106       *s++ = 0;
1107       /* now process that element of the data descriptor */
1108       if (key == 'F')
1109 	{
1110 	  par_F = par;
1111 	  break;
1112 	}
1113     }
1114   if ((err = __collector_ext_line_install (par_F, __collector_exp_dir_name)) != COL_ERROR_NONE)
1115     return err;
1116 
1117   /* allocate tsd for the current thread */
1118   if (__collector_tsd_allocate () != 0)
1119     return COL_ERROR_EXPOPEN;
1120   return COL_ERROR_NONE;
1121 }
1122 
1123 /*  routines concerning closing the experiment */
1124 /*  close down -- fini section routine */
1125 static void collector_fini () __attribute__ ((destructor));
1126 static void
collector_fini()1127 collector_fini ()
1128 {
1129   TprintfT (DBG_LT0, "collector_fini: closing experiment\n");
1130   __collector_close_experiment ();
1131 
1132 }
1133 
1134 void collector_terminate_expt () __attribute__ ((weak, alias ("__collector_terminate_expt")));
1135 
1136 /* __collector_terminate_expt called by user, or from dbx */
1137 void
__collector_terminate_expt()1138 __collector_terminate_expt ()
1139 {
1140   TprintfT (DBG_LT0, "__collector_terminate_expt: %s; calling close\n", __collector_exp_dir_name);
1141   __collector_close_experiment ();
1142   TprintfT (DBG_LT0, "__collector_terminate_expt done\n\n");
1143 }
1144 
1145 /*
1146  * We manage the SIGCHLD handler with sigaction and don't worry about signal or sigset().
1147  * This is in line with the comments in dispatcher.c
1148  * immediately preceding the wrapper function for (Linux) signal().
1149  */
1150 static struct sigaction original_sigchld_sigaction;
1151 static pid_t mychild_pid = -1;
1152 
1153 /* __collector_SIGCHLD_signal_handler called when er_archive exits */
1154 static void
__collector_SIGCHLD_signal_handler(int sig,siginfo_t * si,void * context)1155 __collector_SIGCHLD_signal_handler (int sig, siginfo_t *si, void *context)
1156 {
1157   pid_t calling_pid = si->si_pid;
1158   /* Potential race.
1159    * We get mychild_pid from the vfork() return value.
1160    * So there is an outside chance that the child completes and sends SIGCHLD
1161    * before the handler knows the value of mychild_pid.
1162    */
1163   if (calling_pid == mychild_pid)
1164     // er_archive has exited; so restore the user handler
1165     __collector_sigaction (SIGCHLD, &original_sigchld_sigaction, NULL);
1166   else
1167     {
1168       // if we can't identify the pid, the signal must be for the user's handler
1169       if (original_sigchld_sigaction.sa_handler != SIG_DFL
1170 	  && original_sigchld_sigaction.sa_handler != SIG_IGN)
1171 	original_sigchld_sigaction.sa_sigaction (sig, si, context);
1172     }
1173   TprintfT (DBG_LT1, "__collector_SIGCHLD_signal_handler done\n\n");
1174 }
1175 
1176 int
collector_sigchld_sigaction(const struct sigaction * nact,struct sigaction * oact)1177 collector_sigchld_sigaction (const struct sigaction *nact,
1178 			     struct sigaction *oact)
1179 {
1180   // get the current SIGCHLD handler
1181   struct sigaction cur_handler;
1182   __collector_sigaction (SIGCHLD, NULL, &cur_handler);
1183 
1184   // if we have NOT installed our own handler, return an error
1185   // (force the caller to deal with this case)
1186   if (cur_handler.sa_sigaction != __collector_SIGCHLD_signal_handler)
1187     return -1;
1188 
1189   // if we HAVE installed our own handler, act on the user's handler
1190   if (oact)
1191     __collector_memcpy (oact, &original_sigchld_sigaction, sizeof (struct sigaction));
1192   if (nact)
1193     __collector_memcpy (&original_sigchld_sigaction, nact, sizeof (struct sigaction));
1194   return 0;
1195 }
1196 
1197 /*
1198  * __collector_close_experiment may be called either from
1199  * __collector_terminate_expt() or the .fini section
1200  */
1201 void
__collector_close_experiment()1202 __collector_close_experiment ()
1203 {
1204   hrtime_t ts;
1205   char *argv[10];
1206   int status;
1207   TprintfT (DBG_LT1, "collector: __collector_close_experiment(): %s\n", __collector_exp_dir_name);
1208   if (!exp_initted)
1209     return;
1210   /* The experiment may have been previously closed */
1211   if (!exp_open)
1212     return;
1213 
1214   if (__collector_mutex_trylock (&__collector_close_guard))
1215     /* someone else is in the middle of closing the experiment */
1216     return;
1217 
1218   /* record the termination of the experiment */
1219   ts = GETRELTIME ();
1220   collector_params = NULL;
1221 
1222   /* tell all dynamic modules to stop data collection */
1223   int i;
1224   for (i = 0; i < nmodules; i++)
1225     if (modules[i]->stopDataCollection != NULL)
1226       modules[i]->stopDataCollection ();
1227 
1228   /* notify all dynamic modules the experiment is being closed */
1229   for (i = 0; i < nmodules; i++)
1230     {
1231       if (modules[i]->closeExperiment != NULL)
1232 	modules[i]->closeExperiment ();
1233       __collector_delete_handle (modules_hndl[i]);
1234       modules_hndl[i] = NULL;
1235     }
1236 
1237   /* acquire the global lock -- only one close at a time */
1238   __collector_mutex_lock (&__collector_glob_lock);
1239   /* deinstall mmap tracing (with final update) */
1240   __collector_ext_mmap_deinstall (1);
1241 
1242   /* deinstall common SIGPROF dispatcher */
1243   __collector_ext_dispatcher_deinstall ();
1244 
1245   /* disable line interposition */
1246   __collector_ext_line_close ();
1247 
1248   /* Other threads may be reading tsd now. */
1249   //__collector_tsd_fini();
1250 
1251   /* delete global heap */
1252   /* omazur: do not delete the global heap
1253    * to avoid crashes in TSD. Need a better solution.
1254   __collector_deleteHeap( __collector_heap );
1255   __collector_heap = NULL;
1256    */
1257   __collector_mutex_unlock (&__collector_glob_lock);
1258 
1259   /* take a final sample */
1260   __collector_ext_usage_sample (MASTER_SMPL, "collector_close_experiment");
1261   sample_mode = 0;
1262 
1263   /* close the frameinfo file */
1264   __collector_ext_unwind_close ();
1265   if (exp_origin != SP_ORIGIN_DBX_ATTACH)
1266     log_write_event_run ();
1267 
1268   /* mark the experiment as closed */
1269   __collector_expstate = EXP_CLOSED;
1270   TprintfT (DBG_LT1, "collector: __collector_expstate->EXP_CLOSED: project_home=%s\n",
1271 	    STR (project_home));
1272   __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\"/>\n",
1273 			 SP_JCMD_EXIT,
1274 			 (unsigned) (ts / NANOSEC), (unsigned) (ts % NANOSEC));
1275 
1276   /* derive er_archive's absolute path from that of libcollector */
1277   argv[0] = NULL;
1278   if (project_home && archive_mode && __collector_strcmp (archive_mode, "off"))
1279     {
1280       /* construct a command to launch it */
1281       char *er_archive_name = "/bin/gp-archive";
1282       size_t cmdlen = CALL_UTIL (strlen)(project_home) + CALL_UTIL (strlen)(er_archive_name) + 1;
1283       char *command = (char*) alloca (cmdlen);
1284       CALL_UTIL (snprintf)(command, cmdlen, "%s%s", project_home, er_archive_name);
1285       if (CALL_UTIL (access)(command, F_OK) == 0)
1286 	{
1287 	  // build the argument list
1288 	  int nargs = 0;
1289 	  argv[nargs++] = command;
1290 	  argv[nargs++] = "-n";
1291 	  argv[nargs++] = "-a";
1292 	  argv[nargs++] = archive_mode;
1293 	  size_t len = CALL_UTIL (strlen)(__collector_exp_dir_name) + 1;
1294 	  size_t len1 = CALL_UTIL (strlen)(SP_ARCHIVE_LOG_FILE) + 1;
1295 	  char *str = (char*) alloca (len + len1);
1296 	  CALL_UTIL (snprintf)(str, len + 15, "%s/%s", __collector_exp_dir_name, SP_ARCHIVE_LOG_FILE);
1297 	  argv[nargs++] = "--outfile";
1298 	  argv[nargs++] = str;
1299 	  str = (char*) alloca (len);
1300 	  CALL_UTIL (snprintf)(str, len, "%s", __collector_exp_dir_name);
1301 	  argv[nargs++] = str;
1302 	  argv[nargs] = NULL;
1303 	}
1304     }
1305 
1306   /* log the archive command to be run */
1307   if (argv[0] == NULL)
1308     {
1309       (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n",
1310 				    SP_JCMD_COMMENT, COL_COMMENT_NONE, "No archive command run");
1311       TprintfT (DBG_LT1, "collector: No archive command run\n");
1312     }
1313   else
1314     {
1315       char cmdbuf[4096];
1316       int bufoffset = 0;
1317       int i;
1318       for (i = 0; argv[i] != NULL; i++)
1319 	{
1320 	  bufoffset += CALL_UTIL (snprintf)(&cmdbuf[bufoffset], (sizeof (cmdbuf) - bufoffset),
1321 					    " %s", argv[i]);
1322 	}
1323       (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">Archive command `%s'</event>\n",
1324 				    SP_JCMD_COMMENT, COL_COMMENT_NONE, cmdbuf);
1325       TprintfT (DBG_LT1, "collector: running `%s'\n", cmdbuf);
1326     }
1327   log_close ();
1328   TprintfT (DBG_LT1, "__collector_close_experiment(%s) done\n", __collector_exp_dir_name);
1329   exp_open = 0;                 /* mark the experiment as closed */
1330   __collector_exp_active = 0;   /* mark the experiment as inactive */
1331 
1332   /* reset all experiment parameters */
1333   sample_mode = 0;
1334   collector_paused = 0;
1335   __collector_pause_sig = -1;
1336   __collector_pause_sig_warn = 0;
1337   __collector_sample_sig = -1;
1338   __collector_sample_sig_warn = 0;
1339   __collector_sample_period = 0;
1340   __collector_exp_dir_name[0] = 0;
1341 
1342   /* uninstall the pause and sample signal handlers */
1343   /* XXXX -- not yet, because of potential race conditions in libthread */
1344   if (argv[0] == NULL)
1345     {
1346       /* er_archive command will not be run */
1347       __collector_mutex_unlock (&__collector_close_guard);
1348       return;
1349     }
1350 
1351   struct sigaction sa;
1352   CALL_UTIL (memset)(&sa, 0, sizeof (struct sigaction));
1353   sa.sa_sigaction = __collector_SIGCHLD_signal_handler;
1354   sa.sa_flags = SA_SIGINFO;
1355   __collector_sigaction (SIGCHLD, &sa, &original_sigchld_sigaction);
1356 
1357   /* linetrace interposition takes care of unsetting Environment variables */
1358   /* create a child process to invoke er_archive */
1359   pid_t pid = CALL_UTIL (vfork)();
1360   if (pid == 0)
1361     {
1362       /* pid is zero == child process -- invoke er_archive */
1363       /* Unset LD_PRELOAD environment variables */
1364       CALL_UTIL (unsetenv)("LD_PRELOAD_32");
1365       CALL_UTIL (unsetenv)("LD_PRELOAD_64");
1366       CALL_UTIL (unsetenv)("LD_PRELOAD");
1367       /* Invoke er_archive */
1368       CALL_UTIL (execv)(argv[0], argv);
1369       CALL_UTIL (exit)(1);  /* exec failed -- child exits with an error */
1370     }
1371   else if (pid != -1)
1372     {
1373       mychild_pid = pid; // notify our signal handler who the child is
1374       pid_t w;
1375       /* copied from system.c */
1376       do
1377 	{
1378 	  w = CALL_UTIL (waitpid)(pid, &status, 0);
1379 	}
1380       while (w == -1 && errno == EINTR);
1381       TprintfT (DBG_LT1, "collector: creating archive done\n");
1382       // __collector_SIGCHLD_signal_handler should now be de-installed, but it does so itself
1383     }
1384   else
1385     /* child-process creation failed */
1386     TprintfT (DBG_LT0, "collector: creating archive process failed\n");
1387 
1388   __collector_mutex_unlock (&__collector_close_guard);
1389   TprintfT (DBG_LT1, "collector: __collector_close_experiment done\n");
1390   return;
1391 }
1392 
1393 /*
1394  * void __collector_clean_state()
1395  *	Perform all necessary cleanup steps in child process after fork().
1396  */
1397 void
__collector_clean_state()1398 __collector_clean_state ()
1399 {
1400   TprintfT (DBG_LT1, "collector: collector_clean_state()\n");
1401   int i;
1402   /*
1403    * We are in child process after fork().
1404    * First of all we have to reset all mutex locks in collector's subsystems.
1405    * After that we can reinitialize modules.
1406    */
1407   __collector_mmgr_init_mutex_locks (__collector_heap);
1408   __collector_mutex_init (&__collector_glob_lock);
1409   __collector_mutex_init (&__collector_open_guard);
1410   __collector_mutex_init (&__collector_close_guard);
1411   __collector_mutex_init (&__collector_sample_guard);
1412   __collector_mutex_init (&__collector_suspend_guard);
1413   __collector_mutex_init (&__collector_resume_guard);
1414 
1415   if (__collector_mutex_trylock (&__collector_close_guard))
1416     /* someone else is in the middle of closing the experiment */
1417     return;
1418 
1419   /* Stop data collection in all dynamic modules */
1420   for (i = 0; i < nmodules; i++)
1421     if (modules[i]->stopDataCollection != NULL)
1422       modules[i]->stopDataCollection ();
1423 
1424   // Now we can reset modules
1425   for (i = 0; i < nmodules; i++)
1426     {
1427       if (modules[i]->detachExperiment != NULL && modules_st[i] == 0)
1428 	modules[i]->detachExperiment ();
1429       __collector_delete_handle (modules_hndl[i]);
1430       modules_hndl[i] = NULL;
1431     }
1432 
1433   /* acquire the global lock -- only one suspend at a time */
1434   __collector_mutex_lock (&__collector_glob_lock);
1435   {
1436 
1437     /* stop any profile data writing */
1438     paused_when_suspended = collector_paused;
1439     collector_paused = 1;
1440 
1441     /* deinstall common SIGPROF dispatcher */
1442     __collector_ext_dispatcher_suspend ();
1443 
1444     /* mark the experiment as suspended */
1445     __collector_exp_active = 0;
1446 
1447     /* XXXX mark the experiment as closed! */
1448     exp_open = 0; /* This is a hack to allow fork child to call__collector_open_experiment() */
1449 
1450     /* mark the experiment log closed! */
1451     log_close ();
1452   }
1453   __collector_mutex_unlock (&__collector_glob_lock);
1454 
1455   // Now we can reset subsystems.
1456   __collector_ext_dispatcher_fork_child_cleanup ();
1457   __collector_mmap_fork_child_cleanup ();
1458   __collector_tsd_fork_child_cleanup ();
1459   paused_when_suspended = 0;
1460   collector_paused = 0;
1461   __collector_expstate = EXP_INIT;
1462   TprintfT (DBG_LT1, "__collector_clean_slate: __collector_expstate->EXP_INIT\n");
1463   exp_origin = SP_ORIGIN_LIBCOL_INIT;
1464   exp_initted = 0;
1465   __collector_start_time = collector_interface.getHiResTime ();
1466   TprintfT (DBG_LT1, " -->__collector_clean_slate; resetting start_time\n");
1467   start_sec_time = 0;
1468 
1469   /* Sample related data */
1470   sample_installed = 0;     // 1 if the sample signal handler installed
1471   sample_mode = 0;          // dynamically turns sample record writing on/off
1472   sample_number = 0;        // index of the current sample record
1473   __collector_sample_sig = -1;      // user-specified sample signal
1474   __collector_sample_sig_warn = 0;  // non-zero if warning already given
1475 
1476   /* Pause/resume related data */
1477   __collector_pause_sig = -1;       // user-specified pause signal
1478   __collector_pause_sig_warn = 0;   // non-zero if warning already given
1479   __collector_mutex_unlock (&__collector_close_guard);
1480   return;
1481 }
1482 
1483 /* modelled on __collector_close_experiment */
1484 void
__collector_suspend_experiment(char * why)1485 __collector_suspend_experiment (char *why)
1486 {
1487   if (!exp_initted)
1488     return;
1489   /* The experiment may have been previously closed */
1490   if (!exp_open)
1491     return;
1492   /* The experiment may have been previously suspended */
1493   if (!__collector_exp_active)
1494     return;
1495   if (__collector_mutex_trylock (&__collector_suspend_guard))
1496     /* someone else is in the middle of suspending the experiment */
1497     return;
1498 
1499   /* Stop data collection in all dynamic modules */
1500   int i;
1501   for (i = 0; i < nmodules; i++)
1502     if (modules[i]->stopDataCollection != NULL)
1503       modules[i]->stopDataCollection ();
1504 
1505   /* take a pre-suspension sample */
1506   __collector_ext_usage_sample (MASTER_SMPL, why);
1507 
1508   /* acquire the global lock -- only one suspend at a time */
1509   __collector_mutex_lock (&__collector_glob_lock);
1510   /* stop any profile data writing */
1511   paused_when_suspended = collector_paused;
1512   collector_paused = 1;
1513 
1514   /* deinstall common SIGPROF dispatcher */
1515   __collector_ext_dispatcher_suspend ();
1516 
1517   /* mark the experiment as suspended */
1518   __collector_exp_active = 0;
1519 
1520   /* XXXX mark the experiment as closed! */
1521   exp_open = 0;     // This is a hack to allow fork child to call __collector_open_experiment()
1522   log_pause ();     // mark the experiment log closed!
1523   TprintfT (DBG_LT0, "collector: collector_suspend_experiment(%s, %d)\n\n", why, collector_paused);
1524   __collector_mutex_unlock (&__collector_glob_lock);
1525   __collector_mutex_unlock (&__collector_suspend_guard);
1526   return;
1527 }
1528 
1529 void
__collector_resume_experiment()1530 __collector_resume_experiment ()
1531 {
1532   if (!exp_initted)
1533     return;
1534 
1535   /* The experiment may have been previously resumed */
1536   if (__collector_exp_active)
1537     return;
1538   if (__collector_mutex_trylock (&__collector_resume_guard))
1539     /* someone else is in the middle of resuming the experiment */
1540     return;
1541 
1542   /* acquire the global lock -- only one resume at a time */
1543   __collector_mutex_lock (&__collector_glob_lock);
1544   /* mark the experiment as re-activated */
1545   __collector_exp_active = 1;
1546   /* XXXX mark the experiment as open! */
1547   exp_open = 1; // This is a hack to allow fork child to call__collector_open_experiment()
1548   log_resume (); // mark the experiment log re-opened!
1549   TprintfT (DBG_LT0, "collector: collector_resume_experiment(%d)\n", paused_when_suspended);
1550   /* resume any profile data writing */
1551   collector_paused = paused_when_suspended;
1552   /* restart common SIGPROF dispatcher */
1553   __collector_ext_dispatcher_restart ();
1554   __collector_mutex_unlock (&__collector_glob_lock);
1555 
1556   /* take a post-suspension sample */
1557   __collector_ext_usage_sample (MASTER_SMPL, "collector_resume_experiment");
1558 
1559   /* Resume data collection in all dynamic modules */
1560   if (collector_paused == 0)
1561     {
1562       int i;
1563       for (i = 0; i < nmodules; i++)
1564 	if (modules[i]->startDataCollection != NULL && modules_st[i] == 0)
1565 	  modules[i]->startDataCollection ();
1566     }
1567 
1568   if (__collector_sample_period != 0)
1569     {
1570       hrtime_t now = collector_interface.getHiResTime ();
1571       while (__collector_next_sample < now)
1572 	__collector_next_sample += ((hrtime_t) NANOSEC) * __collector_sample_period;
1573     }
1574 
1575   /* check for experiment past termination time */
1576   if (__collector_terminate_time != 0)
1577     {
1578       hrtime_t now = collector_interface.getHiResTime ();
1579       if (__collector_terminate_time < now)
1580 	{
1581 	  TprintfT (DBG_LT0, "__collector_resume_experiment: now (%lld) > terminate_time (%lld); closing experiment\n",
1582 		    (now - __collector_start_time), (__collector_terminate_time - __collector_start_time));
1583 	  __collector_close_experiment ();
1584 	}
1585     }
1586   __collector_mutex_unlock (&__collector_resume_guard);
1587   return;
1588 }
1589 
1590 /* Code to support Samples and Pause/Resume */
1591 void collector_sample () __attribute__ ((weak, alias ("__collector_sample")));
1592 void
__collector_sample(char * name)1593 __collector_sample (char *name)
1594 {
1595   __collector_ext_usage_sample (PROGRAM_SMPL, name);
1596 }
1597 
1598 static void
write_sample(char * name)1599 write_sample (char *name)
1600 {
1601   if (sample_mode == 0)
1602     return;
1603   /* make the sample timestamp relative to the start */
1604   hrtime_t ts, now = collector_interface.getHiResTime ();
1605 
1606   /* update time for next periodic sample */
1607   /* since this is common to all LWPs, and only one (the first!) will
1608      update it to the next period, doing the update early will avoid
1609      the overhead/frustration of the other LWPs
1610    */
1611   if (__collector_sample_period != 0)
1612     {
1613       /* this update should only be done for periodic samples */
1614       while (__collector_next_sample < now)
1615 	__collector_next_sample += ((hrtime_t) NANOSEC) * __collector_sample_period;
1616     }
1617 
1618   /* take the sample and record it; use (return - __collector_start_time) for timestamp */
1619   now = ovw_write ();
1620   ts = now - __collector_start_time;
1621 
1622   /* write sample records to log file  */
1623   __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" id=\"%d\" label=\"%s\"/>\n",
1624 			 SP_JCMD_SAMPLE,
1625 			 (unsigned) (ts / NANOSEC), (unsigned) (ts % NANOSEC),
1626 			 sample_number,
1627 			 name);
1628   /* increment the sample number */
1629   sample_number++;
1630 }
1631 
1632 /*
1633  * __collector_ext_usage_sample
1634  *
1635  * Handle taking a process usage sample and recording it.
1636  * Common to all different types of sample:
1637  *     libcollector master samples at initiation and close,
1638  *     programmatic samples via libcollector API calls,
1639  *     periodic samples originating in the dispatcher,
1640  *     manual samples originating in the signal sample handler,
1641  *     manual samples originating from the debugger
1642  * Differentiating type and name information is currently not recorded.
1643  */
1644 void
__collector_ext_usage_sample(Smpl_type type,char * name)1645 __collector_ext_usage_sample (Smpl_type type, char *name)
1646 {
1647   /* name is optional */
1648   if (name == NULL)
1649     name = "";
1650   TprintfT (DBG_LT3, "collector: __collector_ext_usage_sample(%d,%s)\n", type, name);
1651   if (!exp_initted)
1652     return;
1653 
1654   /* if paused, don't record periodic samples */
1655   if ((type == PERIOD_SMPL) && (collector_paused == 1))
1656     return;
1657 
1658   /* There is a possibility of entering this function
1659    * from sample_handler, dbx direct call to __collector_sample,
1660    * and user called collector_sample. Since we are making a
1661    * new sample anyway just return.
1662    */
1663   if (__collector_mutex_trylock (&__collector_sample_guard))
1664     return;
1665   if (type != PERIOD_SMPL || __collector_sample_period != 0)
1666     write_sample (name);
1667   __collector_mutex_unlock (&__collector_sample_guard);
1668 }
1669 
1670 /* set the sample period from the parameter */
1671 static int
sample_set_interval(char * param)1672 sample_set_interval (char *param)
1673 {
1674   if (!exp_initted)
1675     return COL_ERROR_SMPLINIT;
1676   __collector_sample_period = CALL_UTIL (strtol)(param, NULL, 0); /* seconds */
1677   TprintfT (DBG_LT1, "collector: collector_sample period set to %d seconds.\n",
1678 	    __collector_sample_period);
1679   if (__collector_sample_period > 0)
1680     (void) __collector_log_write ("<setting %s=\"%d\"/>\n",
1681 				  SP_JCMD_SAMPLE_PERIOD, __collector_sample_period);
1682   return COL_ERROR_NONE;
1683 }
1684 
1685 /* set the experiment duration from the parameter */
1686 
1687 /* parameter is of the form nnn:mmm, where nnn is the start delay in seconds,
1688  *	and mmm is the terminate time in seconds; if nnn is zero,
1689  *	data collection starts when the run starts.  If mmm is zero,
1690  *	data collection terminates when the run terminates.  Otherwise,
1691  *	nnn must be less than mmm
1692  */
1693 static int
set_duration(char * param)1694 set_duration (char *param)
1695 {
1696   if (!exp_initted)
1697     return COL_ERROR_DURATION_INIT;
1698   int delay_start = CALL_UTIL (strtol)(param, &param, 0); /* seconds */
1699   int terminate_duration = 0;
1700   if (*param == 0)
1701     {
1702       /* we only have one parameter, the terminate time */
1703       terminate_duration = delay_start;
1704       delay_start = 0;
1705     }
1706   else if (*param == ':')
1707     {
1708       param++;
1709       terminate_duration = CALL_UTIL (strtol)(param, &param, 0); /* seconds */
1710     }
1711   else
1712     return COL_ERROR_DURATION_INIT;
1713   TprintfT (DBG_LT1, "collector: collector_delay_start duration set to %d seconds.\n",
1714 	    delay_start);
1715   TprintfT (DBG_LT1, "collector: collector_terminate duration set to %d seconds.\n",
1716 	    terminate_duration);
1717   if (terminate_duration > 0)
1718     __collector_log_write ("<setting %s=\"%d\"/>\n<setting %s=\"%d\"/>\n",
1719 			   SP_JCMD_DELAYSTART, delay_start,
1720 			   SP_JCMD_TERMINATE, terminate_duration);
1721   __collector_delay_start = (hrtime_t) 0;
1722   if (delay_start != 0)
1723     {
1724       __collector_delay_start = __collector_start_time + ((hrtime_t) NANOSEC) * delay_start;
1725       collector_paused = 1;
1726     }
1727   __collector_terminate_time = terminate_duration == 0 ? (hrtime_t) 0 :
1728 	  __collector_start_time + ((hrtime_t) NANOSEC) * terminate_duration;
1729   return COL_ERROR_NONE;
1730 }
1731 
1732 static int
sample_set_user_sig(char * par)1733 sample_set_user_sig (char *par)
1734 {
1735   int sig = CALL_UTIL (strtol)(par, &par, 0);
1736   TprintfT (DBG_LT1, "collector: sample_set_user_sig(sig=%d,installed=%d)\n",
1737 	    sig, sample_installed);
1738   /* Installing the sampling signal handler more
1739    * than once is not good.
1740    */
1741   if (!sample_installed)
1742     {
1743       struct sigaction act;
1744       sigemptyset (&act.sa_mask);
1745       /* XXXX should any signals be blocked? */
1746       act.sa_sigaction = sample_handler;
1747       act.sa_flags = SA_RESTART | SA_SIGINFO;
1748       if (sigaction (sig, &act, &old_sample_handler) == -1)
1749 	{
1750 	  TprintfT (DBG_LT0, "collector: ERROR: collector_sample_handler install failed (sig=%d).\n",
1751 		    __collector_sample_sig);
1752 	  return COL_ERROR_ARGS;
1753 	}
1754       if (old_sample_handler.sa_handler == SIG_DFL ||
1755 	  old_sample_handler.sa_sigaction == sample_handler)
1756 	old_sample_handler.sa_handler = SIG_IGN;
1757       TprintfT (DBG_LT1, "collector: collector_sample_handler installed (sig=%d,hndlr=0x%p).\n",
1758 		sig, sample_handler);
1759       __collector_sample_sig = sig;
1760       sample_installed = 1;
1761     }
1762   (void) __collector_log_write ("<setting %s=\"%u\"/>\n", SP_JCMD_SAMPLE_SIG, __collector_sample_sig);
1763   return COL_ERROR_NONE;
1764 }
1765 
1766 /* signal handler for sample signal */
1767 static void
sample_handler(int sig,siginfo_t * sip,void * uap)1768 sample_handler (int sig, siginfo_t *sip, void *uap)
1769 {
1770   if (sip && sip->si_code == SI_USER)
1771     {
1772       TprintfT (DBG_LT1, "collector: collector_sample_handler sampling!\n");
1773       __collector_ext_usage_sample (MANUAL_SMPL, "signal");
1774     }
1775   else if (old_sample_handler.sa_handler != SIG_IGN)
1776     {
1777       TprintfT (DBG_LT1, "collector: collector_sample_handler forwarding signal.\n");
1778       (old_sample_handler.sa_sigaction)(sig, sip, uap);
1779     }
1780 }
1781 
1782 void collector_pause () __attribute__ ((weak, alias ("__collector_pause")));
1783 
1784 void
__collector_pause()1785 __collector_pause ()
1786 {
1787   __collector_pause_m ("API");
1788 }
1789 
1790 void
__collector_pause_m(char * reason)1791 __collector_pause_m (char *reason)
1792 {
1793   hrtime_t now;
1794   char xreason[MAXPATHLEN];
1795   TprintfT (DBG_LT0, "collector: __collector_pause_m(%s)\n", reason);
1796 
1797   /* Stop data collection in all dynamic modules */
1798   for (int i = 0; i < nmodules; i++)
1799     if (modules[i]->stopDataCollection != NULL)
1800       modules[i]->stopDataCollection ();
1801 
1802   /* Take a pause sample */
1803   CALL_UTIL (snprintf)(xreason, sizeof (xreason), "collector_pause(%s)", reason);
1804   __collector_ext_usage_sample (MASTER_SMPL, xreason);
1805 
1806   /* Record the event in the log file */
1807   now = GETRELTIME ();
1808   (void) __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" name=\"%s\"/>\n", SP_JCMD_PAUSE,
1809 				(unsigned) (now / NANOSEC), (unsigned) (now % NANOSEC), reason);
1810   __collector_expstate = EXP_PAUSED;
1811   TprintfT (DBG_LT1, "collector: __collector_expstate->EXP_PAUSED\n");
1812   collector_paused = 1;
1813 }
1814 
1815 void collector_resume () __attribute__ ((weak, alias ("__collector_resume")));
1816 
1817 void
__collector_resume()1818 __collector_resume ()
1819 {
1820   TprintfT (DBG_LT0, "collector: __collector_resume()\n");
1821   __collector_expstate = EXP_OPEN;
1822   TprintfT (DBG_LT1, "collector: __collector_expstate->EXP_OPEN\n");
1823 
1824   /* Record the event in the log file */
1825   hrtime_t now = GETRELTIME ();
1826   (void) __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\"/>\n", SP_JCMD_RESUME,
1827 				(unsigned) (now / NANOSEC), (unsigned) (now % NANOSEC));
1828   /* Take a resume sample */
1829   __collector_ext_usage_sample (MASTER_SMPL, "collector_resume");
1830 
1831   /* Resume data collection in all dynamic modules */
1832   for (int i = 0; i < nmodules; i++)
1833     if (modules[i]->startDataCollection != NULL && modules_st[i] == 0)
1834       modules[i]->startDataCollection ();
1835   collector_paused = 0;
1836 }
1837 
1838 static int
pause_set_user_sig(char * par)1839 pause_set_user_sig (char *par)
1840 {
1841   struct sigaction act;
1842   int sig = CALL_UTIL (strtol)(par, &par, 0);
1843   if (*par)
1844     {
1845       /* not end of the token */
1846       if (*par != 'p')
1847 	{
1848 	  /* it should be a p */
1849 	  TprintfT (DBG_LT0, "collector: ERROR: collector_user_handler bad terminator (par=%p[0]=%d).\n",
1850 		    par, (int) *par);
1851 	  return COL_ERROR_ARGS;
1852 
1853 	}
1854       else
1855 	{
1856 	  /*, it's a p, make sure next is end of token */
1857 	  par++;
1858 	  if (*par)
1859 	    {
1860 	      TprintfT (DBG_LT0, "collector: ERROR: collector_user_handler bad terminator (par=%p[0]=%d).\n",
1861 			par, (int) *par);
1862 	      return COL_ERROR_ARGS;
1863 	    }
1864 	  else
1865 	    /* start off paused */
1866 	    collector_paused = 1;
1867 	}
1868     }
1869   sigemptyset (&act.sa_mask);
1870   /* XXXX should any signals be blocked? */
1871   act.sa_sigaction = pause_handler;
1872   act.sa_flags = SA_RESTART | SA_SIGINFO;
1873   if (sigaction (sig, &act, &old_pause_handler) == -1)
1874     {
1875       TprintfT (DBG_LT0, "collector: ERROR: collector_pause_handler install failed (sig=%d).\n", sig);
1876       return COL_ERROR_ARGS;
1877     }
1878   if (old_pause_handler.sa_handler == SIG_DFL ||
1879       old_pause_handler.sa_sigaction == pause_handler)
1880     old_pause_handler.sa_handler = SIG_IGN;
1881   TprintfT (DBG_LT1, "collector: collector_pause_handler installed (sig=%d,hndlr=0x%p).\n",
1882 	    sig, pause_handler);
1883   __collector_pause_sig = sig;
1884   (void) __collector_log_write ("<setting %s=\"%u\"/>\n", SP_JCMD_PAUSE_SIG,
1885 				__collector_pause_sig);
1886   return COL_ERROR_NONE;
1887 }
1888 
1889 /* signal handler for pause/resume signal */
1890 static void
pause_handler(int sig,siginfo_t * sip,void * uap)1891 pause_handler (int sig, siginfo_t *sip, void *uap)
1892 {
1893   if (sip && sip->si_code == SI_USER)
1894     {
1895       if (collector_paused == 1)
1896 	{
1897 	  __collector_resume ();
1898 	  TprintfT (DBG_LT0, "collector: collector_pause_handler resumed!\n");
1899 	}
1900       else
1901 	{
1902 	  __collector_pause_m ("signal");
1903 	  TprintfT (DBG_LT0, "collector: collector_pause_handler paused!\n");
1904 	}
1905     }
1906   else if (old_pause_handler.sa_handler != SIG_IGN)
1907     {
1908       TprintfT (DBG_LT0, "collector: collector_pause_handler forwarding signal.\n");
1909       (old_pause_handler.sa_sigaction)(sig, sip, uap);
1910     }
1911 }
1912 
1913 static void
get_progspec(char * retstr,int tmp_sz,char * name,int name_sz)1914 get_progspec (char *retstr, int tmp_sz, char *name, int name_sz)
1915 {
1916   int procfd, count, i;
1917   *retstr = 0;
1918   tmp_sz--;
1919   *name = 0;
1920   name_sz--;
1921   procfd = CALL_UTIL (open)("/proc/self/cmdline", O_RDONLY);
1922   int getting_name = 0;
1923   if (procfd != -1)
1924     {
1925       count = CALL_UTIL (read)(procfd, retstr, tmp_sz);
1926       retstr[count] = '\0';
1927       for (i = 0; i < count; i++)
1928 	{
1929 	  if (getting_name == 0)
1930 	    name[i] = retstr[i];
1931 	  if (retstr[i] == '\0')
1932 	    {
1933 	      getting_name = 1;
1934 	      if ((i + 1) < count)
1935 		retstr[i] = ' ';
1936 	    }
1937 	}
1938       CALL_UTIL (close)(procfd);
1939     }
1940 }
1941 
1942 static void
fs_warn()1943 fs_warn ()
1944 {
1945   /* if data implies we don't care, just return */
1946   if (fs_matters == 0)
1947     return;
1948 }
1949 
1950 static void
close_handler(int sig,siginfo_t * sip,void * uap)1951 close_handler (int sig, siginfo_t *sip, void *uap)
1952 {
1953   if (sip && sip->si_code == SI_USER)
1954     {
1955       TprintfT (DBG_LT0, "collector: close_handler: processing signal.\n");
1956       __collector_close_experiment ();
1957     }
1958   else if (old_close_handler.sa_handler != SIG_IGN)
1959     {
1960       TprintfT (DBG_LT0, "collector: close_handler forwarding signal.\n");
1961       (old_close_handler.sa_sigaction)(sig, sip, uap);
1962     }
1963 }
1964 
1965 static void
exit_handler(int sig,siginfo_t * sip,void * uap)1966 exit_handler (int sig, siginfo_t *sip, void *uap)
1967 {
1968   if (sip && sip->si_code == SI_USER)
1969     {
1970       TprintfT (DBG_LT0, "collector: exit_handler: processing signal.\n");
1971       CALL_UTIL (exit)(1);
1972     }
1973   else if (old_exit_handler.sa_handler != SIG_IGN)
1974     {
1975       TprintfT (DBG_LT0, "collector: exit_handler forwarding signal.\n");
1976       (old_exit_handler.sa_sigaction)(sig, sip, uap);
1977     }
1978 }
1979 
1980 static int
set_user_sig_action(char * par)1981 set_user_sig_action (char *par)
1982 {
1983   int sig = CALL_UTIL (strtol)(par, &par, 0);
1984   if (*par != '=')
1985     {
1986       TprintfT (DBG_LT0, "collector: ERROR: set_user_sig_action bad separator: %s.\n", par);
1987       return COL_ERROR_ARGS;
1988     }
1989   par++;
1990   struct sigaction act;
1991   sigemptyset (&act.sa_mask);
1992   act.sa_flags = SA_RESTART | SA_SIGINFO;
1993   if (__collector_strcmp (par, "exit") == 0)
1994     {
1995       act.sa_sigaction = exit_handler;
1996       if (sigaction (sig, &act, &old_exit_handler) == -1)
1997 	{
1998 	  TprintfT (DBG_LT0, "collector: ERROR: set_user_sig_action failed: %d=%s.\n", sig, par);
1999 	  return COL_ERROR_ARGS;
2000 	}
2001     }
2002   else if (__collector_strcmp (par, "close") == 0)
2003     {
2004       act.sa_sigaction = close_handler;
2005       if (sigaction (sig, &act, &old_close_handler) == -1)
2006 	{
2007 	  TprintfT (DBG_LT0, "collector: ERROR: set_user_sig_action failed: %d=%s.\n", sig, par);
2008 	  return COL_ERROR_ARGS;
2009 	}
2010     }
2011   else
2012     {
2013       TprintfT (DBG_LT0, "collector: ERROR: set_user_sig_action unknown action: %d=%s.\n", sig, par);
2014       return COL_ERROR_ARGS;
2015     }
2016   __collector_log_write ("<setting signal=\"%u\" action=\"%s\"/>\n", sig, par);
2017   return COL_ERROR_NONE;
2018 }
2019 
2020 /*============================================================*/
2021 /*
2022  * Routines for handling the log file
2023  */
2024 static struct DataHandle *log_hndl = NULL;
2025 static int log_initted = 0;
2026 static int log_enabled = 0;
2027 
2028 static int
log_open()2029 log_open ()
2030 {
2031   log_hndl = __collector_create_handle (SP_LOG_FILE);
2032   if (log_hndl == NULL)
2033     return COL_ERROR_LOG_OPEN;
2034   log_initted = 1;
2035   log_enabled = 1;
2036   TprintfT (DBG_LT1, "log_open()\n");
2037   return COL_ERROR_NONE;
2038 }
2039 
2040 static void
log_header_write(sp_origin_t origin)2041 log_header_write (sp_origin_t origin)
2042 {
2043   __collector_log_write ("<experiment %s=\"%d.%d\">\n",
2044 			 SP_JCMD_VERSION, SUNPERF_VERNUM, SUNPERF_VERNUM_MINOR);
2045   __collector_log_write ("<collector>%s</collector>\n", VERSION);
2046   __collector_log_write ("</experiment>\n");
2047 
2048   struct utsname sysinfo;
2049   if (uname (&sysinfo) < 0)
2050     {
2051       __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\"/></event>\n", SP_JCMD_CERROR, COL_ERROR_SYSINFO, errno);
2052       __collector_log_write ("<system>\n");
2053     }
2054   else
2055     {
2056       long page_size = CALL_UTIL (sysconf)(_SC_PAGESIZE);
2057       long npages = CALL_UTIL (sysconf)(_SC_PHYS_PAGES);
2058       __collector_log_write ("<system hostname=\"%s\" arch=\"%s\" os=\"%s %s\" pagesz=\"%ld\" npages=\"%ld\">\n",
2059 			     sysinfo.nodename, sysinfo.machine, sysinfo.sysname, sysinfo.release, page_size, npages);
2060     }
2061 
2062   //YXXX Updating this section?  Check similar cut/paste code in:
2063   // collctrl.cc::Coll_Ctrl()
2064   // collector.c::log_header_write()
2065   // cpu_frequency.h::get_cpu_frequency()
2066 
2067   FILE *procf = CALL_UTIL (fopen)("/proc/cpuinfo", "r");
2068   if (procf != NULL)
2069     {
2070       char temp[1024];
2071       int cpu = -1;
2072       while (CALL_UTIL (fgets)(temp, sizeof (temp), procf) != NULL)
2073 	{
2074 #if ARCH(Intel)
2075 	  if (__collector_strStartWith (temp, "processor") == 0)
2076 	    {
2077 	      char *val = CALL_UTIL (strchr)(temp, ':');
2078 	      cpu = val ? CALL_UTIL (atoi)(val + 1) : -1;
2079 	    }
2080 	    //            else if ( __collector_strStartWith(temp, "model") == 0
2081 	    //                    && CALL_UTIL(strstr)(temp, "name") == 0) {
2082 	    //                char *val = CALL_UTIL(strchr)( temp, ':' );
2083 	    //                int model = val ? CALL_UTIL(atoi)( val + 1 ) : -1;
2084 	    //            }
2085 	    //            else if ( __collector_strStartWith(temp, "cpu family") == 0 ) {
2086 	    //                char *val = CALL_UTIL(strchr)( temp, ':' );
2087 	    //                int family = val ? CALL_UTIL(atoi)( val + 1 ) : -1;
2088 	    //            }
2089 	  else if (__collector_strStartWith (temp, "cpu MHz") == 0)
2090 	    {
2091 	      char *val = CALL_UTIL (strchr)(temp, ':');
2092 	      int mhz = val ? CALL_UTIL (atoi)(val + 1) : 0; /* reading it as int is fine */
2093 	      (void) __collector_log_write ("  <cpu id=\"%d\" clk=\"%d\"/>\n", cpu, mhz);
2094 	    }
2095 #elif ARCH(SPARC)
2096 	  if (__collector_strStartWith (temp, "Cpu") == 0 &&
2097 	      temp[3] != '\0' &&
2098 	      __collector_strStartWith ((CALL_UTIL (strchr)(temp + 1, 'C')) ? CALL_UTIL (strchr)(temp + 1, 'C') : (temp + 4), "ClkTck") == 0)
2099 	    { // sparc-Linux
2100 	      char *val = CALL_UTIL (strchr)(temp, ':');
2101 	      int mhz = 0;
2102 	      if (val)
2103 		{
2104 		  unsigned long long freq;
2105 		  (*__collector_sscanfp) (val + 2, "%llx", &freq);
2106 		  mhz = (unsigned int) (((double) freq) / 1000000.0 + 0.5);
2107 		}
2108 	      char *numend = CALL_UTIL (strchr)(temp + 1, 'C') ? CALL_UTIL (strchr)(temp + 1, 'C') : (temp + 4);
2109 	      *numend = '\0';
2110 	      cpu = CALL_UTIL (atoi)(temp + 3);
2111 	      __collector_log_write ("  <cpu id=\"%d\" clk=\"%d\"/>\n", cpu, mhz);
2112 	    }
2113 #elif defined(__aarch64__)
2114 	  if (__collector_strStartWith (temp, "processor") == 0)
2115 	    {
2116 	      char *val = CALL_UTIL (strchr)(temp, ':');
2117 	      cpu = val ? CALL_UTIL (atoi)(val + 1) : -1;
2118 	      if (cpu != -1)
2119 		{
2120 		  unsigned int mhz;
2121 		  asm volatile("mrs %0, cntfrq_el0" : "=r" (mhz));
2122 		  __collector_log_write ("  <cpu id=\"%d\" clk=\"%d\"/>\n", cpu,
2123 					 mhz / 1000000);
2124 		}
2125 	    }
2126 #endif
2127 	}
2128       CALL_UTIL (fclose)(procf);
2129     }
2130   __collector_log_write ("</system>\n");
2131   __collector_log_write ("<process pid=\"%d\"></process>\n", getpid ());
2132   __collector_log_write ("<process ppid=\"%d\"></process>\n", getppid ());
2133   __collector_log_write ("<process pgrp=\"%d\"></process>\n", getpgrp ());
2134   __collector_log_write ("<process sid=\"%d\"></process>\n", getsid (0));
2135 
2136   /* XXX -- cwd commented out
2137   It would be nice to get the current directory for the experiment,
2138   but neither method below will work--the /proc method returns a
2139   0-length string, and using getcwd will break collect on /bin/sh
2140   (as cuserid does) because of /bin/sh's private malloc
2141   omazur: readlink seems to work on Linux
2142    */
2143   /* write the current directory */
2144   char cwd[MAXPATHLEN + 1];
2145   int i = readlink ("/proc/self/cwd", cwd, sizeof (cwd));
2146   if (i >= 0)
2147     {
2148       cwd[i < sizeof (cwd) ? i : sizeof (cwd) - 1] = 0;
2149       (void) __collector_log_write ("<process cwd=\"%s\"></process>\n", cwd);
2150     }
2151   (void) __collector_log_write ("<process wsize=\"%d\"></process>\n", (int) (8 * sizeof (void *)));
2152 
2153   ucontext_t ucp;
2154   ucp.uc_stack.ss_sp = NULL;
2155   ucp.uc_stack.ss_size = 0;
2156   if (getcontext (&ucp) == 0)
2157     {
2158       (void) __collector_log_write ("<process stackbase=\"0x%lx\"></process>\n",
2159 				    (unsigned long) ucp.uc_stack.ss_sp + ucp.uc_stack.ss_size);
2160     }
2161 
2162   (void) __collector_log_write ("<process>%s</process>\n",
2163 				origin == SP_ORIGIN_FORK ? "(fork)" : exp_progspec);
2164   __collector_libthread_T1 = 0;
2165 }
2166 
2167 static void
log_pause(void)2168 log_pause (void)
2169 {
2170   if (log_initted)
2171     log_enabled = 0;
2172 }
2173 
2174 static void
log_resume(void)2175 log_resume (void)
2176 {
2177   if (log_initted)
2178     log_enabled = 1;
2179 }
2180 
2181 /* __collector_log_write -- write a line to the log file
2182  *	return value:
2183  *	    0 if OK
2184  *	    1 if error (in creating or extending the log file)
2185  */
2186 int
__collector_log_write(char * format,...)2187 __collector_log_write (char *format, ...)
2188 {
2189   char buf[4096];
2190   va_list va;
2191   int rc = 0;
2192   static size_t loglen = 0;
2193 
2194   va_start (va, format);
2195   char *bufptr = buf;
2196   int sz = __collector_xml_vsnprintf (bufptr, sizeof (buf), format, va);
2197   int allocated_sz = 0;
2198   va_end (va);
2199   if (sz >= sizeof (buf))
2200     {
2201       /* Allocate a new buffer.
2202        * We need this buffer only temporarily and locally.
2203        * But don't use the thread stack
2204        * since it already has buf
2205        * and is unlikely to have additonal room for something even larger than buf.
2206        */
2207       sz += 1; /* add the terminating null byte */
2208       bufptr = (char*) __collector_allocCSize (__collector_heap, sz, 0);
2209       if (bufptr)
2210 	{
2211 	  allocated_sz = sz;
2212 	  va_start (va, format);
2213 	  sz = __collector_xml_vsnprintf (bufptr, sz, format, va);
2214 	  va_end (va);
2215 	}
2216     }
2217   int newlen = CALL_UTIL (strlen)(bufptr);
2218   if (sz != newlen)
2219     // no need to free bufptr if we're going to abort anyhow
2220     abort ();
2221   bufptr[newlen + 1] = 0;
2222   loglen = loglen + newlen;
2223   TprintfT (DBG_LT2, "__collector_log_write len=%ld, loglen=%ld %s",
2224 	    (long) newlen, (long) loglen, bufptr);
2225   if (log_enabled <= 0)
2226     {
2227 #if 0
2228       /*  XXX suppress log_write messages with no log file open
2229        *	this is reached from SimApp dealing with the clock frequency, which it should
2230        *	not be doing.  For now, don't write a message.
2231        */
2232       CALL_UTIL (fprintf)(stderr, "__collector_log_write COL_ERROR_LOG_OPEN: %s", buf);
2233 #endif
2234     }
2235   else
2236     rc = __collector_write_string (log_hndl, bufptr, sz);
2237   if (allocated_sz)
2238     __collector_freeCSize (__collector_heap, (void *) bufptr, allocated_sz);
2239   return rc;
2240 }
2241 
2242 static void
log_close()2243 log_close ()
2244 {
2245   log_enabled = 0;
2246   log_initted = 0;
2247   __collector_delete_handle (log_hndl);
2248   log_hndl = NULL;
2249 }
2250 
2251 /*============================================================*/
2252 /*
2253  * Routines for handling the overview file
2254  */
2255 static void
ovw_open()2256 ovw_open ()
2257 {
2258   CALL_UTIL (strlcpy)(ovw_name, __collector_exp_dir_name, sizeof (ovw_name));
2259   CALL_UTIL (strlcat)(ovw_name, "/", sizeof (ovw_name));
2260   CALL_UTIL (strlcat)(ovw_name, SP_OVERVIEW_FILE, sizeof (ovw_name));
2261   int fd = CALL_UTIL (open)(ovw_name, O_WRONLY | O_CREAT | O_TRUNC,
2262 			    S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
2263   if (fd < 0)
2264     {
2265       __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n",
2266 			     SP_JCMD_CERROR, COL_ERROR_OVWOPEN, errno, ovw_name);
2267       return;
2268     }
2269   CALL_UTIL (close)(fd);
2270   sample_mode = 1;
2271 }
2272 
2273 static __inline__ void
timeval_to_timespec(struct timeval * tval,struct timespec * value)2274 timeval_to_timespec(struct timeval *tval, struct timespec *value)
2275 {
2276 	value->tv_nsec = tval->tv_usec * 1000;
2277 	value->tv_sec = tval->tv_sec;
2278 }
2279 
2280 /*
2281  * Resource usage.  /proc/<pid>/usage /proc/<pid>/lwp/<lwpid>/lwpusage
2282  */
2283 typedef struct prusage
2284 {
2285   id_t        pr_lwpid;     /* lwp id.  0: process or defunct */
2286   int         pr_count;     /* number of contributing lwps */
2287   timestruc_t pr_tstamp;    /* current time stamp */
2288   timestruc_t pr_create;    /* process/lwp creation time stamp */
2289   timestruc_t pr_term;      /* process/lwp termination time stamp */
2290   timestruc_t pr_rtime;     /* total lwp real (elapsed) time */
2291   timestruc_t pr_utime;     /* user level cpu time */
2292   timestruc_t pr_stime;     /* system call cpu time */
2293   timestruc_t pr_ttime;     /* other system trap cpu time */
2294   timestruc_t pr_tftime;    /* text page fault sleep time */
2295   timestruc_t pr_dftime;    /* data page fault sleep time */
2296   timestruc_t pr_kftime;    /* kernel page fault sleep time */
2297   timestruc_t pr_ltime;     /* user lock wait sleep time */
2298   timestruc_t pr_slptime;   /* all other sleep time */
2299   timestruc_t pr_wtime;     /* wait-cpu (latency) time */
2300   timestruc_t pr_stoptime;  /* stopped time */
2301   timestruc_t filltime[6];  /* filler for future expansion */
2302   ulong_t     pr_minf;      /* minor page faults */
2303   ulong_t     pr_majf;      /* major page faults */
2304   ulong_t     pr_nswap;     /* swaps */
2305   ulong_t     pr_inblk;     /* input blocks */
2306   ulong_t     pr_oublk;     /* output blocks */
2307   ulong_t     pr_msnd;      /* messages sent */
2308   ulong_t     pr_mrcv;      /* messages received */
2309   ulong_t     pr_sigs;      /* signals received */
2310   ulong_t     pr_vctx;      /* voluntary context switches */
2311   ulong_t     pr_ictx;      /* involuntary context switches */
2312   ulong_t     pr_sysc;      /* system calls */
2313   ulong_t     pr_ioch;      /* chars read and written */
2314   ulong_t     filler[10];   /* filler for future expansion */
2315 } prusage_t;
2316 
2317 static hrtime_t starttime = 0;
2318 
2319 static hrtime_t
ovw_write()2320 ovw_write ()
2321 {
2322   if (sample_mode == 0)
2323     return 0;
2324   int fd;
2325   int res;
2326   struct prusage usage;
2327   struct rusage rusage;
2328   hrtime_t hrt, delta;
2329 
2330   /* Fill in the prusage structure with info from getrusage() */
2331   hrt = collector_interface.getHiResTime ();
2332   if (starttime == 0)
2333     starttime = hrt;
2334   res = getrusage (RUSAGE_SELF, &rusage);
2335   if (res != 0)
2336     {
2337       (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n",
2338 				    SP_JCMD_CERROR, COL_ERROR_OVWREAD, errno, ovw_name);
2339       return ( hrt);
2340     }
2341 
2342   CALL_UTIL (memset)(&usage, 0, sizeof (struct prusage));
2343   usage.pr_lwpid = getpid ();
2344   usage.pr_count = 1;
2345   usage.pr_tstamp.tv_sec = hrt / NANOSEC;
2346   usage.pr_tstamp.tv_nsec = hrt % NANOSEC;
2347   usage.pr_create.tv_sec = starttime / NANOSEC;
2348   usage.pr_create.tv_nsec = starttime % NANOSEC;
2349   delta = hrt - starttime;
2350   usage.pr_rtime.tv_sec = delta / NANOSEC;
2351   usage.pr_rtime.tv_nsec = delta % NANOSEC;
2352   timeval_to_timespec (&rusage.ru_utime, &usage.pr_utime);
2353   timeval_to_timespec (&rusage.ru_stime, &usage.pr_stime);
2354 
2355   /* make sure that user- and system cpu time are not negative */
2356   if (ts2hrt (usage.pr_utime) < 0)
2357     {
2358       usage.pr_utime.tv_sec = 0;
2359       usage.pr_utime.tv_nsec = 0;
2360     }
2361   if (ts2hrt (usage.pr_stime) < 0)
2362     {
2363       usage.pr_stime.tv_sec = 0;
2364       usage.pr_stime.tv_nsec = 0;
2365     }
2366 
2367   /* fill in other fields */
2368   usage.pr_minf = (ulong_t) rusage.ru_minflt;
2369   usage.pr_majf = (ulong_t) rusage.ru_majflt;
2370   usage.pr_nswap = (ulong_t) rusage.ru_nswap;
2371   usage.pr_inblk = (ulong_t) rusage.ru_inblock;
2372   usage.pr_oublk = (ulong_t) rusage.ru_oublock;
2373   usage.pr_msnd = (ulong_t) rusage.ru_msgsnd;
2374   usage.pr_mrcv = (ulong_t) rusage.ru_msgrcv;
2375   usage.pr_sigs = (ulong_t) rusage.ru_nsignals;
2376   usage.pr_vctx = (ulong_t) rusage.ru_nvcsw;
2377   usage.pr_ictx = (ulong_t) rusage.ru_nivcsw;
2378 
2379   fd = CALL_UTIL (open)(ovw_name, O_WRONLY | O_APPEND);
2380   if (fd < 0)
2381     {
2382       __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n",
2383 			    SP_JCMD_CERROR, COL_ERROR_OVWOPEN, errno, ovw_name);
2384       return ( ts2hrt (usage.pr_tstamp));
2385     }
2386 
2387   CALL_UTIL (lseek)(fd, 0, SEEK_END);
2388   res = CALL_UTIL (write)(fd, &usage, sizeof (usage));
2389   CALL_UTIL (close)(fd);
2390   if (res != sizeof (usage))
2391     __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n",
2392 			   SP_JCMD_CERROR, COL_ERROR_OVWWRITE, errno, ovw_name);
2393   return (hrt);
2394 }
2395 
2396 void
__collector_dlog(int tflag,int level,char * format,...)2397 __collector_dlog (int tflag, int level, char *format, ...)
2398 {
2399   if ((tflag & SP_DUMP_FLAG) == 0)
2400     {
2401       if (level > __collector_tracelevel)
2402 	return;
2403     }
2404   else if ((tflag & collector_debug_opt) == 0)
2405     return;
2406 
2407   /* In most cases this allocation should suffice */
2408   int bufsz = CALL_UTIL (strlen)(format) + 128;
2409   char *buf = (char*) alloca (bufsz);
2410   char *p = buf;
2411   int left = bufsz;
2412   if ((tflag & SP_DUMP_NOHEADER) == 0)
2413     {
2414       p += CALL_UTIL (snprintf)(p, left, "P%d,L%02u,t%02lu",
2415 				(int) getpid (),
2416 				(unsigned int) __collector_lwp_self (),
2417 				__collector_no_threads ? 0 : __collector_thr_self ());
2418       left = bufsz - (p - buf);
2419       if (tflag)
2420 	{
2421 	  hrtime_t ts = GETRELTIME ();
2422 	  p += CALL_UTIL (snprintf)(p, left, " %u.%09u ", (unsigned) (ts / NANOSEC), (unsigned) (ts % NANOSEC));
2423 	}
2424       else
2425 	p += CALL_UTIL (snprintf)(p, left, ": ");
2426       left = bufsz - (p - buf);
2427     }
2428 
2429   va_list va;
2430   va_start (va, format);
2431   int nbufsz = CALL_UTIL (vsnprintf)(p, left, format, va);
2432   va_end (va);
2433 
2434   if (nbufsz >= left)
2435     {
2436       /* Allocate a new buffer */
2437       nbufsz += 1; /* add the terminating null byte */
2438       char *nbuf = (char*) alloca (nbufsz + (p - buf));
2439       __collector_memcpy (nbuf, buf, p - buf);
2440       p = nbuf + (p - buf);
2441 
2442       va_start (va, format);
2443       nbufsz = CALL_UTIL (vsnprintf)(p, nbufsz, format, va);
2444       va_end (va);
2445       buf = nbuf;
2446     }
2447   CALL_UTIL (write)(2, buf, CALL_UTIL (strlen)(buf));
2448 }
2449 
2450 /*============================================================*/
2451 #if ! ARCH(SPARC)   /* !sparc-Linux */
2452 /*
2453  * Routines for handling _exit and _Exit
2454  */
2455 /*------------------------------------------------------------- _exit */
2456 
2457 #define CALL_REAL(x) (*(int(*)())__real_##x)
2458 #define NULL_PTR(x) ( __real_##x == NULL )
2459 
2460 static void *__real__exit = NULL; /* libc only: _exit */
2461 static void *__real__Exit = NULL; /* libc only: _Exit */
2462 void _exit () __attribute__ ((weak, alias ("__collector_exit")));
2463 void _Exit () __attribute__ ((weak, alias ("__collector_Exit")));
2464 
2465 void
__collector_exit(int status)2466 __collector_exit (int status)
2467 {
2468   if (NULL_PTR (_exit))
2469     {
2470       __real__exit = dlsym (RTLD_NEXT, "_exit");
2471       if (__real__exit == NULL)
2472 	__real__exit = dlsym (RTLD_DEFAULT, "_exit");
2473     }
2474   TprintfT (DBG_LT1, "__collector_exit() interposing @0x%p __real__exit\n", __real__exit);
2475   __collector_terminate_expt ();
2476   TprintfT (DBG_LT1, "__collector_exit(): experiment terminated\n");
2477   CALL_REAL (_exit)(status); // this will exit the process
2478 }
2479 
2480 void
__collector_Exit(int status)2481 __collector_Exit (int status)
2482 {
2483   if (NULL_PTR (_Exit))
2484     {
2485       __real__Exit = dlsym (RTLD_NEXT, "_Exit");
2486       if (__real__Exit == NULL)
2487 	__real__Exit = dlsym (RTLD_DEFAULT, "_exit");
2488     }
2489   TprintfT (DBG_LT1, "__collector_Exit() interposing @0x%p __real__Exit\n", __real__Exit);
2490   __collector_terminate_expt ();
2491   TprintfT (DBG_LT1, "__collector_Exit(): experiment terminated\n");
2492   CALL_REAL (_Exit)(status); // this will exit the process
2493 }
2494 #endif /* !sparc-Linux */
2495