xref: /netbsd-src/external/gpl3/binutils/dist/gprofng/libcollector/unwind.c (revision cb63e24e8d6aae7ddac1859a9015f48b1d8bd90e)
1 /* Copyright (C) 2021-2024 Free Software Foundation, Inc.
2    Contributed by Oracle.
3 
4    This file is part of GNU Binutils.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software
18    Foundation, 51 Franklin Street - Fifth Floor, Boston,
19    MA 02110-1301, USA.  */
20 
21 #include "config.h"
22 #include <alloca.h>
23 #include <dlfcn.h>
24 #include <stdlib.h>
25 #include <signal.h>
26 #include <unistd.h>
27 #include <pthread.h>
28 
29 #include "gp-defs.h"
30 #include "collector.h"
31 #include "gp-experiment.h"
32 #include "memmgr.h"
33 #include "tsd.h"
34 
35 /* Get dynamic module interface*/
36 #include "collector_module.h"
37 
38 /* Get definitions for SP_LEAF_CHECK_MARKER, SP_TRUNC_STACK_MARKER */
39 #include "data_pckts.h"
40 
41 #if ARCH(SPARC)
42 struct frame
43 {
44   long fr_local[8];         /* saved locals */
45   long fr_arg[6];           /* saved arguments [0 - 5] */
46   struct frame *fr_savfp;   /* saved frame pointer */
47   long fr_savpc;            /* saved program counter */
48 #if WSIZE(32)
49   char *fr_stret;           /* struct return addr */
50 #endif
51   long fr_argd[6];          /* arg dump area */
52   long fr_argx[1];          /* array of args past the sixth */
53 };
54 
55 #elif ARCH(Intel)
56 struct frame
57 {
58   unsigned long fr_savfp;
59   unsigned long fr_savpc;
60 };
61 #endif
62 
63 /* Set the debug trace level */
64 #define DBG_LT0 0
65 #define DBG_LT1	1
66 #define DBG_LT2	2
67 #define DBG_LT3	3
68 
69 int (*__collector_VM_ReadByteInstruction)(unsigned char *) = NULL;
70 #define VM_NO_ACCESS        (-1)
71 #define VM_NOT_VM_MEMORY    (-2)
72 #define VM_NOT_X_SEGMENT    (-3)
73 
74 #define isInside(p, bgn, end) ((p) >= (bgn) && (p) < (end))
75 
76 /*
77  * Weed through all the arch dependent stuff to get the right definition
78  * for 'pc' in the ucontext structure.  The system header files are mess
79  * dealing with all the arch (just look for PC, R_PC, REG_PC).
80  *
81  */
82 
83 #if ARCH(SPARC)
84 
85 #define IN_BARRIER(x) \
86 	( barrier_hdl && \
87 	  (unsigned long)x >= barrier_hdl && \
88 	  (unsigned long)x < barrier_hdlx )
89 static unsigned long barrier_hdl = 0;
90 static unsigned long barrier_hdlx = 0;
91 
92 #if WSIZE(64)
93 #define STACK_BIAS 2047
94 #define IN_TRAP_HANDLER(x) \
95 	( misalign_hdl && \
96 	  (unsigned long)x >= misalign_hdl && \
97 	  (unsigned long)x < misalign_hdlx )
98 static unsigned long misalign_hdl = 0;
99 static unsigned long misalign_hdlx = 0;
100 #elif  WSIZE(32)
101 #define STACK_BIAS 0
102 #endif
103 
104 #if WSIZE(64)
105 #define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[(reg)])
106 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_O6])
107 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_PC])
108 #else
109 #define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.gregs[(reg)])
110 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_O6])
111 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_PC])
112 #endif
113 
114 #elif ARCH(Intel)
115 #include "opcodes/disassemble.h"
116 
117 static int
fprintf_func(void * arg ATTRIBUTE_UNUSED,const char * fmt ATTRIBUTE_UNUSED,...)118 fprintf_func (void *arg ATTRIBUTE_UNUSED, const char *fmt ATTRIBUTE_UNUSED, ...)
119 {
120   return 0;
121 }
122 
123 static int
fprintf_styled_func(void * arg ATTRIBUTE_UNUSED,enum disassembler_style st ATTRIBUTE_UNUSED,const char * fmt ATTRIBUTE_UNUSED,...)124 fprintf_styled_func (void *arg ATTRIBUTE_UNUSED,
125 		      enum disassembler_style st ATTRIBUTE_UNUSED,
126 		      const char *fmt ATTRIBUTE_UNUSED, ...)
127 {
128   return 0;
129 }
130 
131 /* Get LENGTH bytes from info's buffer, at target address memaddr.
132    Transfer them to myaddr.  */
133 static int
read_memory_func(bfd_vma memaddr,bfd_byte * myaddr,unsigned int length,disassemble_info * info)134 read_memory_func (bfd_vma memaddr, bfd_byte *myaddr, unsigned int length,
135 		  disassemble_info *info)
136 {
137   unsigned int opb = info->octets_per_byte;
138   size_t end_addr_offset = length / opb;
139   size_t max_addr_offset = info->buffer_length / opb;
140   size_t octets = (memaddr - info->buffer_vma) * opb;
141   if (memaddr < info->buffer_vma
142       || memaddr - info->buffer_vma > max_addr_offset
143       || memaddr - info->buffer_vma + end_addr_offset > max_addr_offset
144       || (info->stop_vma && (memaddr >= info->stop_vma
145 			     || memaddr + end_addr_offset > info->stop_vma)))
146     return -1;
147   memcpy (myaddr, info->buffer + octets, length);
148   return 0;
149 }
150 
151 static void
print_address_func(bfd_vma addr ATTRIBUTE_UNUSED,disassemble_info * info ATTRIBUTE_UNUSED)152 print_address_func (bfd_vma addr ATTRIBUTE_UNUSED,
153 		    disassemble_info *info ATTRIBUTE_UNUSED) { }
154 
155 static asymbol *
symbol_at_address_func(bfd_vma addr ATTRIBUTE_UNUSED,disassemble_info * info ATTRIBUTE_UNUSED)156 symbol_at_address_func (bfd_vma addr ATTRIBUTE_UNUSED,
157 			disassemble_info *info ATTRIBUTE_UNUSED)
158 {
159   return NULL;
160 }
161 
162 static bfd_boolean
symbol_is_valid(asymbol * sym ATTRIBUTE_UNUSED,disassemble_info * info ATTRIBUTE_UNUSED)163 symbol_is_valid (asymbol *sym ATTRIBUTE_UNUSED,
164 		 disassemble_info *info ATTRIBUTE_UNUSED)
165 {
166   return TRUE;
167 }
168 
169 static void
memory_error_func(int status ATTRIBUTE_UNUSED,bfd_vma addr ATTRIBUTE_UNUSED,disassemble_info * info ATTRIBUTE_UNUSED)170 memory_error_func (int status ATTRIBUTE_UNUSED, bfd_vma addr ATTRIBUTE_UNUSED,
171 		   disassemble_info *info ATTRIBUTE_UNUSED) { }
172 
173 
174 #if WSIZE(32)
175 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EIP])
176 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_ESP])
177 #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EBP])
178 
179 #elif WSIZE(64)
180 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP])
181 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RSP])
182 #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RBP])
183 #endif /* WSIZE() */
184 
185 #elif ARCH(Aarch64)
186 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[15])
187 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[13])
188 #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[14])
189 #endif /* ARCH() */
190 
191 /*
192  * FILL_CONTEXT() for all platforms
193  * Could use getcontext() except:
194  * - it's not guaranteed to be async signal safe
195  * - it's a system call and not that lightweight
196  * - it's not portable as of POSIX.1-2008
197  * So we just use low-level mechanisms to fill in the few fields we need.
198  */
199 #if ARCH(SPARC)
200 #if WSIZE(32)
201 #define FILL_CONTEXT(context) \
202 	{ \
203 	greg_t fp; \
204 	__asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \
205 	__asm__ __volatile__( "ta 3" ); \
206 	GET_SP(context) = fp; \
207 	GET_PC(context) = (greg_t)0; \
208 	}
209 
210 #elif WSIZE(64)
211 #define FILL_CONTEXT(context) \
212 	{ \
213 	    greg_t fp; \
214 	    __asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \
215 	    __asm__ __volatile__( "flushw" ); \
216 	    GET_SP(context) = fp; \
217 	    GET_PC(context) = (greg_t)0; \
218 	}
219 #endif /* WSIZE() */
220 
221 #elif ARCH(Intel)
222 #define FILL_CONTEXT(context) \
223 	{ \
224 	    context->uc_link = NULL; \
225 	    void *sp = __collector_getsp(); \
226 	    GET_SP(context) = (intptr_t)sp; \
227 	    GET_FP(context) = (intptr_t)__collector_getfp(); \
228 	    GET_PC(context) = (intptr_t)__collector_getpc(); \
229 	    context->uc_stack.ss_sp = sp; \
230 	    context->uc_stack.ss_size = 0x100000; \
231 	}
232 
233 #elif ARCH(Aarch64)
234 #if defined(__MUSL_LIBC)
235 typedef uint64_t __u64;
236 #endif
237 
238 #define FILL_CONTEXT(context) \
239     { CALL_UTIL (getcontext) (context);  \
240       context->uc_mcontext.sp = (__u64) __builtin_frame_address(0); \
241     }
242 
243 #endif /* ARCH() */
244 
245 static int
getByteInstruction(unsigned char * p)246 getByteInstruction (unsigned char *p)
247 {
248   if (__collector_VM_ReadByteInstruction)
249     {
250       int v = __collector_VM_ReadByteInstruction (p);
251       if (v != VM_NOT_VM_MEMORY)
252 	return v;
253     }
254   return *p;
255 }
256 
257 struct DataHandle *dhndl = NULL;
258 
259 static unsigned unwind_key = COLLECTOR_TSD_INVALID_KEY;
260 
261 /* To support two OpenMP API's we use a pointer
262  * to the actual function.
263  */
264 int (*__collector_omp_stack_trace)(char*, int, hrtime_t, void*) = NULL;
265 int (*__collector_mpi_stack_trace)(char*, int, hrtime_t) = NULL;
266 
267 #define DEFAULT_MAX_NFRAMES 256
268 static int max_native_nframes = DEFAULT_MAX_NFRAMES;
269 static int max_java_nframes = DEFAULT_MAX_NFRAMES;
270 
271 #define NATIVE_FRAME_BYTES(nframes) ( ((nframes)+1) * sizeof(long)          )
272 #define JAVA_FRAME_BYTES(nframes)   ( ((nframes)+1) * sizeof(long) * 2 + 16 )
273 #define OVERHEAD_BYTES ( 2 * sizeof(long) + 2 * sizeof(Stack_info) )
274 
275 #define ROOT_UID	801425552975190205ULL
276 #define ROOT_UID_INV	92251691606677ULL
277 #define ROOT_IDX	13907816567264074199ULL
278 #define ROOT_IDX_INV	2075111ULL
279 #define	UIDTableSize	1048576
280 static volatile uint64_t *UIDTable = NULL;
281 static volatile int seen_omp = 0;
282 
283 static int stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode);
284 static FrameInfo compute_uid (Frame_packet *frp);
285 static int omp_no_walk = 0;
286 
287 #if ARCH(Intel)
288 #define ValTableSize    1048576
289 #define OmpValTableSize 65536
290 static unsigned long *AddrTable_RA_FROMFP = NULL; // Cache for RA_FROMFP pcs
291 static unsigned long *AddrTable_RA_EOSTCK = NULL; // Cache for RA_EOSTCK pcs
292 static struct WalkContext *OmpCurCtxs = NULL;
293 static struct WalkContext *OmpCtxs = NULL;
294 static uint32_t *OmpVals = NULL;
295 static unsigned long *OmpRAs = NULL;
296 static unsigned long adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend);
297 static int parse_x86_AVX_instruction (unsigned char *pc);
298 
299 struct WalkContext
300 {
301   unsigned long pc;
302   unsigned long sp;
303   unsigned long fp;
304   unsigned long ln;
305   unsigned long sbase; /* stack boundary */
306   unsigned long tbgn;  /* current memory segment start */
307   unsigned long tend;  /* current memory segment end */
308 };
309 #endif
310 
311 #if defined(DEBUG) && ARCH(Intel)
312 #include <execinfo.h>
313 
314 static void
dump_stack(int nline)315 dump_stack (int nline)
316 {
317   if ((__collector_tracelevel & SP_DUMP_STACK) == 0)
318     return;
319 
320   enum Constexpr { MAX_SIZE = 1024 };
321   void *array[MAX_SIZE];
322   size_t sz = backtrace (array, MAX_SIZE);
323   char **strings = backtrace_symbols (array, sz);
324   DprintfT (SP_DUMP_STACK, "\ndump_stack: %d size=%d\n", nline, (int) sz);
325   for (int i = 0; i < sz; i++)
326     DprintfT (SP_DUMP_STACK, "  %3d:  %p %s\n", i, array[i],
327 	     strings[i] ? strings[i] : "???");
328 }
329 
330 #define dump_targets(nline, ntrg, targets) \
331     if ((__collector_tracelevel & SP_DUMP_UNWIND) != 0) \
332 	for(int i = 0; i < ntrg; i++) \
333 	     DprintfT (SP_DUMP_UNWIND, "  %2d: 0x%lx\n", i, (long) targets[i])
334 #else
335 #define dump_stack(x)
336 #define dump_targets(nline, ntrg, targets)
337 #endif
338 
339 void
__collector_ext_unwind_key_init(int isPthread,void * stack)340 __collector_ext_unwind_key_init (int isPthread, void * stack)
341 {
342   void * ptr = __collector_tsd_get_by_key (unwind_key);
343   if (ptr == NULL)
344     {
345       TprintfT (DBG_LT2, "__collector_ext_unwind_key_init: cannot get tsd\n");
346       return;
347     }
348   if (isPthread)
349     {
350       size_t stack_size = 0;
351       void *stack_addr = 0;
352       pthread_t pthread = pthread_self ();
353       pthread_attr_t attr;
354       int err = pthread_getattr_np (pthread, &attr);
355       TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: pthread: 0x%lx err: %d\n", pthread, err);
356       if (err == 0)
357 	{
358 	  err = pthread_attr_getstack (&attr, &stack_addr, &stack_size);
359 	  if (err == 0)
360 	    stack_addr = (char*) stack_addr + stack_size;
361 	  TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: stack_size=0x%lx eos=%p err=%d\n",
362 		    (long) stack_size, stack_addr, err);
363 	  err = pthread_attr_destroy (&attr);
364 	  TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: destroy: %d\n", err);
365 	}
366       *(void**) ptr = stack_addr;
367     }
368   else
369     *(void**) ptr = stack;  // cloned thread
370 }
371 
372 void
__collector_ext_unwind_init(int record)373 __collector_ext_unwind_init (int record)
374 {
375   int sz = UIDTableSize * sizeof (*UIDTable);
376   UIDTable = (uint64_t*) __collector_allocCSize (__collector_heap, sz, 1);
377   if (UIDTable == NULL)
378     {
379       __collector_terminate_expt ();
380       return;
381     }
382   CALL_UTIL (memset)((void*) UIDTable, 0, sz);
383 
384   char *str = CALL_UTIL (getenv)("GPROFNG_JAVA_MAX_CALL_STACK_DEPTH");
385   if (str != NULL && *str != 0)
386     {
387       char *endptr;
388       int n = CALL_UTIL (strtol)(str, &endptr, 0);
389       if (endptr != str && n >= 0)
390 	{
391 	  if (n < 5)
392 	    n = 5;
393 	  if (n > MAX_STACKDEPTH)
394 	    n = MAX_STACKDEPTH;
395 	  max_java_nframes = n;
396 	}
397     }
398 
399   str = CALL_UTIL (getenv)("GPROFNG_MAX_CALL_STACK_DEPTH");
400   if (str != NULL && *str != 0)
401     {
402       char *endptr = str;
403       int n = CALL_UTIL (strtol)(str, &endptr, 0);
404       if (endptr != str && n >= 0)
405 	{
406 	  if (n < 5)
407 	    n = 5;
408 	  if (n > MAX_STACKDEPTH)
409 	    n = MAX_STACKDEPTH;
410 	  max_native_nframes = n;
411 	}
412     }
413 
414   TprintfT (DBG_LT0, "GPROFNG_MAX_CALL_STACK_DEPTH=%d  GPROFNG_JAVA_MAX_CALL_STACK_DEPTH=%d\n",
415 	    max_native_nframes, max_java_nframes);
416   omp_no_walk = 1;
417 
418   if (__collector_VM_ReadByteInstruction == NULL)
419     __collector_VM_ReadByteInstruction = (int(*)()) dlsym (RTLD_DEFAULT, "Async_VM_ReadByteInstruction");
420 
421 #if ARCH(SPARC)
422 #if WSIZE(64)
423   misalign_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler");
424   misalign_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler_end");
425   if (misalign_hdlx == 0)
426     misalign_hdlx = misalign_hdl + 292;
427   barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_");
428   barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_");
429   if (barrier_hdlx == 0)
430     barrier_hdl = 0;
431 #else
432   barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_");
433   barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_");
434   if (barrier_hdlx == 0)
435     barrier_hdl = 0;
436 #endif /* WSIZE() */
437 
438 #elif ARCH(Intel)
439   sz = ValTableSize * sizeof (*AddrTable_RA_FROMFP);
440   AddrTable_RA_FROMFP = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
441   sz = ValTableSize * sizeof (*AddrTable_RA_EOSTCK);
442   AddrTable_RA_EOSTCK = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
443   if (omp_no_walk && (__collector_omp_stack_trace != NULL || __collector_mpi_stack_trace != NULL))
444     {
445       sz = OmpValTableSize * sizeof (*OmpCurCtxs);
446       OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
447       sz = OmpValTableSize * sizeof (*OmpCtxs);
448       OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
449       sz = OmpValTableSize * sizeof (*OmpVals);
450       OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1);
451       sz = OmpValTableSize * sizeof (*OmpRAs);
452       OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
453       if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
454 	{
455 	  TprintfT (0, "unwind_init() ERROR: failed; terminating experiment\n");
456 	  __collector_terminate_expt ();
457 	  return;
458 	}
459     }
460 #endif /* ARCH() */
461 
462   if (record)
463     {
464       dhndl = __collector_create_handle (SP_FRINFO_FILE);
465       __collector_log_write ("<%s name=\"%s\" format=\"binary\"/>\n", SP_TAG_DATAPTR, SP_FRINFO_FILE);
466     }
467 
468   unwind_key = __collector_tsd_create_key (sizeof (void*), NULL, NULL);
469   if (unwind_key == COLLECTOR_TSD_INVALID_KEY)
470     {
471       TprintfT (0, "unwind_init: ERROR: TSD key create failed.\n");
472       __collector_log_write ("<%s kind=\"%s\" id=\"%d\">TSD key not created</%s>\n",
473 			     SP_TAG_EVENT, SP_JCMD_CERROR, COL_ERROR_GENERAL, SP_TAG_EVENT);
474       return;
475     }
476   TprintfT (0, "unwind_init() completed normally\n");
477   return;
478 }
479 
480 void
__collector_ext_unwind_close()481 __collector_ext_unwind_close ()
482 {
483   __collector_delete_handle (dhndl);
484   dhndl = NULL;
485 }
486 
487 void*
__collector_ext_return_address(unsigned level)488 __collector_ext_return_address (unsigned level)
489 {
490   if (NULL == UIDTable)  //unwind not initialized yet
491     return NULL;
492   unsigned size = (level + 4) * sizeof (long); // need to strip __collector_get_return_address and its caller
493   ucontext_t context;
494   FILL_CONTEXT ((&context));
495   char* buf = (char*) alloca (size);
496   if (buf == NULL)
497     {
498       TprintfT (DBG_LT0, "__collector_get_return_address: ERROR: alloca(%d) fails\n", size);
499       return NULL;
500     }
501   int sz = stack_unwind (buf, size, NULL, NULL, &context, 0);
502   if (sz < (level + 3) * sizeof (long))
503     {
504       TprintfT (DBG_LT0, "__collector_get_return_address: size=%d, but stack_unwind returns %d\n", size, sz);
505       return NULL;
506     }
507   long *lbuf = (long*) buf;
508   TprintfT (DBG_LT2, "__collector_get_return_address: return %lx\n", lbuf[level + 2]);
509   return (void *) (lbuf[level + 2]);
510 }
511 /*
512  *  Collector interface method getFrameInfo
513  */
514 FrameInfo
__collector_get_frame_info(hrtime_t ts,int mode,void * arg)515 __collector_get_frame_info (hrtime_t ts, int mode, void *arg)
516 {
517   ucontext_t *context = NULL;
518   void *bptr = NULL;
519   CM_Array *array = NULL;
520 
521   int unwind_mode = 0;
522   int do_walk = 1;
523 
524   if (mode & FRINFO_NO_WALK)
525     do_walk = 0;
526   int bmode = mode & 0xffff;
527   int pseudo_context = 0;
528   if (bmode == FRINFO_FROM_STACK_ARG || bmode == FRINFO_FROM_STACK)
529     {
530       bptr = arg;
531       context = (ucontext_t*) alloca (sizeof (ucontext_t));
532       FILL_CONTEXT (context);
533       unwind_mode |= bmode;
534     }
535   else if (bmode == FRINFO_FROM_UC)
536     {
537       context = (ucontext_t*) arg;
538       if (context == NULL)
539 	return (FrameInfo) 0;
540       if (GET_SP (context) == 0)
541 	pseudo_context = 1;
542     }
543   else if (bmode == FRINFO_FROM_ARRAY)
544     {
545       array = (CM_Array*) arg;
546       if (array == NULL || array->length <= 0)
547 	return (FrameInfo) 0;
548     }
549   else
550     return (FrameInfo) 0;
551 
552   int max_frame_size = OVERHEAD_BYTES + NATIVE_FRAME_BYTES (max_native_nframes);
553   if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context)
554     max_frame_size += JAVA_FRAME_BYTES (max_java_nframes);
555 
556   Frame_packet *frpckt = alloca (sizeof (Frame_packet) + max_frame_size);
557   frpckt->type = FRAME_PCKT;
558   frpckt->hsize = sizeof (Frame_packet);
559 
560   char *d = (char*) (frpckt + 1);
561   int size = max_frame_size;
562 
563 #define MIN(a,b) ((a)<(b)?(a):(b))
564 #if defined(GPROFNG_JAVA_PROFILING)
565   /* get Java info */
566   if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context)
567     {
568       /* use only 2/3 of the buffer and leave the rest for the native stack */
569       int tmpsz = MIN (size, JAVA_FRAME_BYTES (max_java_nframes));
570       if (tmpsz > 0)
571 	{
572 	  int sz = __collector_ext_jstack_unwind (d, tmpsz, context);
573 	  d += sz;
574 	  size -= sz;
575 	}
576     }
577 #endif
578 
579   /* get native stack */
580   if (context)
581     {
582       Stack_info *sinfo = (Stack_info*) d;
583       int sz = sizeof (Stack_info);
584       d += sz;
585       size -= sz;
586 #if ARCH(Intel)
587       if (omp_no_walk == 0)
588 	do_walk = 1;
589 #endif
590       if (do_walk == 0)
591 	unwind_mode |= FRINFO_NO_WALK;
592 
593       int tmpsz = MIN (size, NATIVE_FRAME_BYTES (max_native_nframes));
594       if (tmpsz > 0)
595 	{
596 	  sz = stack_unwind (d, tmpsz, bptr, NULL, context, unwind_mode);
597 	  d += sz;
598 	  size -= sz;
599 	}
600       sinfo->kind = STACK_INFO;
601       sinfo->hsize = (d - (char*) sinfo);
602     }
603 
604   /* create a stack image from user data */
605   if (array && array->length > 0)
606     {
607       Stack_info *sinfo = (Stack_info*) d;
608       int sz = sizeof (Stack_info);
609       d += sz;
610       size -= sz;
611       sz = array->length;
612       if (sz > size)
613 	sz = size;  // YXXX should we mark this with truncation frame?
614       __collector_memcpy (d, array->bytes, sz);
615       d += sz;
616       size -= sz;
617       sinfo->kind = STACK_INFO;
618       sinfo->hsize = (d - (char*) sinfo);
619     }
620 
621   /* Compute the total size */
622   frpckt->tsize = d - (char*) frpckt;
623   FrameInfo uid = compute_uid (frpckt);
624   return uid;
625 }
626 
627 FrameInfo
compute_uid(Frame_packet * frp)628 compute_uid (Frame_packet *frp)
629 {
630   uint64_t idxs[LAST_INFO];
631   uint64_t uid = ROOT_UID;
632   uint64_t idx = ROOT_IDX;
633 
634   Common_info *cinfo = (Common_info*) ((char*) frp + frp->hsize);
635   char *end = (char*) frp + frp->tsize;
636   for (;;)
637     {
638       if ((char*) cinfo >= end || cinfo->hsize == 0 ||
639 	  (char*) cinfo + cinfo->hsize > end)
640 	break;
641 
642       /* Start with a different value to avoid matching with uid */
643       uint64_t uidt = 1;
644       uint64_t idxt = 1;
645       long *ptr = (long*) ((char*) cinfo + cinfo->hsize);
646       long *bnd = (long*) ((char*) cinfo + sizeof (Common_info));
647       TprintfT (DBG_LT2, "compute_uid: Cnt=%ld: ", (long) cinfo->hsize);
648       while (ptr > bnd)
649 	{
650 	  long val = *(--ptr);
651 	  tprintf (DBG_LT2, "0x%8.8llx ", (unsigned long long) val);
652 	  uidt = (uidt + val) * ROOT_UID;
653 	  idxt = (idxt + val) * ROOT_IDX;
654 	  uid = (uid + val) * ROOT_UID;
655 	  idx = (idx + val) * ROOT_IDX;
656 	}
657       if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO)
658 	{
659 	  cinfo->uid = uidt;
660 	  idxs[cinfo->kind] = idxt;
661 	}
662       cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize);
663     }
664   tprintf (DBG_LT2, "\n");
665 
666   /* Check if we have already recorded that uid.
667    * The following fragment contains benign data races.
668    * It's important, though, that all reads from UIDTable
669    * happen before writes.
670    */
671   int found1 = 0;
672   int idx1 = (int) ((idx >> 44) % UIDTableSize);
673   if (UIDTable[idx1] == uid)
674     found1 = 1;
675   int found2 = 0;
676   int idx2 = (int) ((idx >> 24) % UIDTableSize);
677   if (UIDTable[idx2] == uid)
678     found2 = 1;
679   int found3 = 0;
680   int idx3 = (int) ((idx >> 4) % UIDTableSize);
681   if (UIDTable[idx3] == uid)
682     found3 = 1;
683   if (!found1)
684     UIDTable[idx1] = uid;
685   if (!found2)
686     UIDTable[idx2] = uid;
687   if (!found3)
688     UIDTable[idx3] = uid;
689 
690   if (found1 || found2 || found3)
691     return (FrameInfo) uid;
692   frp->uid = uid;
693 
694   /* Compress info's */
695   cinfo = (Common_info*) ((char*) frp + frp->hsize);
696   for (;;)
697     {
698       if ((char*) cinfo >= end || cinfo->hsize == 0 ||
699 	  (char*) cinfo + cinfo->hsize > end)
700 	break;
701       if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO)
702 	{
703 	  long *ptr = (long*) ((char*) cinfo + sizeof (Common_info));
704 	  long *bnd = (long*) ((char*) cinfo + cinfo->hsize);
705 	  uint64_t uidt = cinfo->uid;
706 	  uint64_t idxt = idxs[cinfo->kind];
707 	  int found = 0;
708 	  int first = 1;
709 	  while (ptr < bnd - 1)
710 	    {
711 	      int idx1 = (int) ((idxt >> 44) % UIDTableSize);
712 	      if (UIDTable[idx1] == uidt)
713 		{
714 		  found = 1;
715 		  break;
716 		}
717 	      else if (first)
718 		{
719 		  first = 0;
720 		  UIDTable[idx1] = uidt;
721 		}
722 	      long val = *ptr++;
723 	      uidt = uidt * ROOT_UID_INV - val;
724 	      idxt = idxt * ROOT_IDX_INV - val;
725 	    }
726 	  if (found)
727 	    {
728 	      char *d = (char*) ptr;
729 	      char *s = (char*) bnd;
730 	      if (!first)
731 		{
732 		  int i;
733 		  for (i = 0; i<sizeof (uidt); i++)
734 		    {
735 		      *d++ = (char) uidt;
736 		      uidt = uidt >> 8;
737 		    }
738 		}
739 	      int delta = s - d;
740 	      while (s < end)
741 		*d++ = *s++;
742 	      cinfo->kind |= COMPRESSED_INFO;
743 	      cinfo->hsize -= delta;
744 	      frp->tsize -= delta;
745 	      end -= delta;
746 	    }
747 	}
748       cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize);
749     }
750   __collector_write_packet (dhndl, (CM_Packet*) frp);
751   return (FrameInfo) uid;
752 }
753 
754 FrameInfo
__collector_getUID(CM_Array * arg,FrameInfo suid)755 __collector_getUID (CM_Array *arg, FrameInfo suid)
756 {
757   if (arg->length % sizeof (long) != 0 ||
758       (long) arg->bytes % sizeof (long) != 0)
759     return (FrameInfo) - 1;
760   if (arg->length == 0)
761     return suid;
762 
763   uint64_t uid = suid ? suid : 1;
764   uint64_t idx = suid ? suid : 1;
765   long *ptr = (long*) ((char*) arg->bytes + arg->length);
766   long *bnd = (long*) (arg->bytes);
767   while (ptr > bnd)
768     {
769       long val = *(--ptr);
770       uid = (uid + val) * ROOT_UID;
771       idx = (idx + val) * ROOT_IDX;
772     }
773 
774   /* Check if we have already recorded that uid.
775    * The following fragment contains benign data races.
776    * It's important, though, that all reads from UIDTable
777    * happen before writes.
778    */
779   int found1 = 0;
780   int idx1 = (int) ((idx >> 44) % UIDTableSize);
781   if (UIDTable[idx1] == uid)
782     found1 = 1;
783   int found2 = 0;
784   int idx2 = (int) ((idx >> 24) % UIDTableSize);
785   if (UIDTable[idx2] == uid)
786     found2 = 1;
787   int found3 = 0;
788   int idx3 = (int) ((idx >> 4) % UIDTableSize);
789   if (UIDTable[idx3] == uid)
790     found3 = 1;
791 
792   if (!found1)
793     UIDTable[idx1] = uid;
794   if (!found2)
795     UIDTable[idx2] = uid;
796   if (!found3)
797     UIDTable[idx3] = uid;
798   if (found1 || found2 || found3)
799     return (FrameInfo) uid;
800 
801   int sz = sizeof (Uid_packet) + arg->length;
802   if (suid)
803     sz += sizeof (suid);
804   Uid_packet *uidp = alloca (sz);
805   uidp->tsize = sz;
806   uidp->type = UID_PCKT;
807   uidp->flags = 0;
808   uidp->uid = uid;
809 
810   /* Compress */
811   ptr = (long*) (arg->bytes);
812   bnd = (long*) ((char*) arg->bytes + arg->length);
813   long *dst = (long*) (uidp + 1);
814   uint64_t uidt = uid;
815   uint64_t idxt = idx;
816   uint64_t luid = suid; /* link uid */
817 
818   while (ptr < bnd)
819     {
820 
821       long val = *ptr++;
822       *dst++ = val;
823 
824       if ((bnd - ptr) > sizeof (uidt))
825 	{
826 	  uidt = uidt * ROOT_UID_INV - val;
827 	  idxt = idxt * ROOT_IDX_INV - val;
828 	  int idx1 = (int) ((idxt >> 44) % UIDTableSize);
829 	  if (UIDTable[idx1] == uidt)
830 	    {
831 	      luid = uidt;
832 	      break;
833 	    }
834 	}
835     }
836   if (luid)
837     {
838       char *d = (char*) dst;
839       for (int i = 0; i<sizeof (luid); i++)
840 	{
841 	  *d++ = (char) luid;
842 	  luid = luid >> 8;
843 	}
844       uidp->flags |= COMPRESSED_INFO;
845       uidp->tsize = d - (char*) uidp;
846     }
847   __collector_write_packet (dhndl, (CM_Packet*) uidp);
848 
849   return (FrameInfo) uid;
850 }
851 
852 int
__collector_getStackTrace(void * buf,int size,void * bptr,void * eptr,void * arg)853 __collector_getStackTrace (void *buf, int size, void *bptr, void *eptr, void *arg)
854 {
855   if (arg == (void*) __collector_omp_stack_trace)
856     seen_omp = 1;
857   int do_walk = 1;
858   if (arg == NULL || arg == (void*) __collector_omp_stack_trace)
859     {
860       do_walk = (arg == (void*) __collector_omp_stack_trace && omp_no_walk) ? 0 : 1;
861       ucontext_t *context = (ucontext_t*) alloca (sizeof (ucontext_t));
862       FILL_CONTEXT (context);
863       arg = context;
864     }
865   int unwind_mode = 0;
866   if (do_walk == 0)
867     unwind_mode |= FRINFO_NO_WALK;
868   return stack_unwind (buf, size, bptr, eptr, arg, unwind_mode);
869 }
870 
871 #if ARCH(SPARC)
872 /*
873  * These are important data structures taken from the header files reg.h and
874  * ucontext.h. They are used for the stack trace algorithm explained below.
875  *
876  *	typedef struct ucontext {
877  * 		u_long		uc_flags;
878  * 		struct ucontext	*uc_link;
879  * 		usigset_t   	uc_sigmask;
880  * 		stack_t 	uc_stack;
881  * 		mcontext_t 	uc_mcontext;
882  * 		long		uc_filler[23];
883  * 	} ucontext_t;
884  *
885  *	#define	SPARC_MAXREGWINDOW	31
886  *
887  *	struct	rwindow {
888  *		greg_t	rw_local[8];
889  *		greg_t	rw_in[8];
890  *	};
891  *
892  *	#define	rw_fp	rw_in[6]
893  *	#define	rw_rtn	rw_in[7]
894  *
895  *	struct gwindows {
896  *		int		wbcnt;
897  *		int		*spbuf[SPARC_MAXREGWINDOW];
898  *		struct rwindow	wbuf[SPARC_MAXREGWINDOW];
899  *	};
900  *
901  *	typedef struct gwindows	gwindows_t;
902  *
903  *	typedef struct {
904  *		gregset_t	gregs;
905  *		gwindows_t	*gwins;
906  *		fpregset_t	fpregs;
907  *		long		filler[21];
908  *	} mcontext_t;
909  *
910  * The stack would look like this when SIGPROF occurrs.
911  *
912  *	------------------------- <- high memory
913  *	|			|
914  *	|			|
915  *	-------------------------
916  *	|			|
917  *	------------------------- <- fp' <-|
918  *	|			|	   |
919  *		:	:	 	   |
920  *	|			|	   |
921  *	-------------------------	   |
922  *	|	fp		|----------|
923  *	|			|
924  *	------------------------- <- sp'
925  *	|			|		             |	|
926  *	| 	gwins		| <- saved stack pointers &  |  |
927  *	|			|    register windows	     |  |- mcontext
928  *	-------------------------			     |  |
929  *	|	gregs		| <- saved registers	     |  |
930  *	-------------------------			     |
931  *	|			|			     |- ucontext
932  *	------------------------- <- ucp (ucontext pointer)  |
933  *	|			|				|
934  *	|			|				|- siginfo
935  *	------------------------- <- sip (siginfo pointer)	|
936  *	|			|
937  *	------------------------- <- sp
938  *
939  * Then the signal handler is called with:
940  *	handler( signo, sip, uip );
941  * When gwins is null, all the stack frames are saved in the user stack.
942  * In that case we can find sp' from gregs and walk the stack for a backtrace.
943  * However, if gwins is not null we will have a more complicated case.
944  * Wbcnt(in gwins) tells you how many saved register windows are valid.
945  * This is important because the kernel does not allocate the entire array.
946  * And the top most frame is saved in the lowest index element. The next
947  * paragraph explains the possible causes.
948  *
949  * There are two routines in the kernel to flush out user register windows.
950  *	flush_user_windows and flush_user_windows_to_stack
951  * The first routine will not cause a page fault. Therefore if the user
952  * stack is not in memory, the register windows will be saved to the pcb.
953  * This can happen when the kernel is trying to deliver a signal and
954  * the user stack got swap out. The kernel will then build a new context for
955  * the signal handler and the saved register windows will
956  * be copied to the ucontext as show above. On the other hand,
957  * flush_user_windows_to_stack can cause a page fault, and if it failed
958  * then there is something wrong (stack overflow, misalign).
959  * The first saved register window does not necessary correspond to the
960  * first stack frame. So the current stack pointer must be compare with
961  * the stack pointers in spbuf to find a match.
962  *
963  * We will also follow the uc_link field in ucontext to trace also nested
964  * signal stack frames.
965  *
966  */
967 
968 /* Dealing with trap handlers.
969  * When a user defined trap handler is invoked the return address
970  * (or actually the address of an instruction that raised the trap)
971  * is passed to the trap handler in %l6, whereas saved %o7 contains
972  * garbage. First, we need to find out if a particular pc belongs
973  * to the trap handler, and if so, take the %l6 value from the stack rather
974  * than %o7 from either the stack or the register.
975  * There are three possible situations represented
976  * by the following stacks:
977  *
978  *   MARKER		MARKER			MARKER
979  *   trap handler pc	__func pc before 'save'	__func pc after 'save'
980  *   %l6		%o7 from reg		%o7 (garbage)
981  *   ...		%l6			trap handler pc
982  *			...			%l6
983  *						...
984  * where __func is a function called from the trap handler.
985  *
986  * Currently this is implemented to only deal with __misalign_trap_handler
987  * set for v9 FORTRAN applications. Implementation of IN_TRAP_HANDLER
988  * macro shows it. A general solution is postponed.
989  */
990 
991 /* Special handling of unwind through the parallel loop barrier code:
992  *
993  *  The library defines two symbols, __mt_EndOfTask_Barrier_ and
994  *	__mt_EndOfTask_Barrier_Dummy_ representing the first word of
995  *	the barrier sychronization code, and the first word following
996  *	it.  Whenever the leaf PC is between these two symbols,
997  *	the unwind code is special-cased as follows:
998  *	The __mt_EndOfTask_Barrier_ function is guaranteed to be a leaf
999  *	function, so its return address is in a register, not saved on
1000  *	the stack.
1001  *
1002  *    MARKER
1003  *    __mt_EndOfTask_Barrier_ PC -- the leaf PC
1004  *    loop body function address for the task -- implied caller of __mt_EndOfTask_Barrier_
1005  *	    this address is taken from the %O0 register
1006  *    {mt_master or mt_slave} -- real caller of __mt_EndOfTask_Barrier_
1007  *     ...
1008  *
1009  *  With this trick, the analyzer will show the time in the barrier
1010  *	attributed to the loop at the end of which the barrier synchronization
1011  *	is taking place.  That loop body routine, will be shown as called
1012  *	from the function from which it was extracted, which will be shown
1013  *	as called from the real caller, either the slave or master library routine.
1014  */
1015 
1016 /*
1017  * These no-fault-load (0x82) assembly functions are courtesy of Rob Gardner.
1018  *
1019  * Note that 0x82 is ASI_PNF.  See
1020  *   http://lxr.free-electrons.com/source/arch/sparc/include/uapi/asm/asi.h#L134
1021  *   ASI  address space identifier; PNF  primary no fault
1022  */
1023 
1024 /* load an int from an address */
1025 
1026 /* if the address is illegal, return a 0 */
1027 static int
SPARC_no_fault_load_int(void * addr)1028 SPARC_no_fault_load_int (void *addr)
1029 {
1030   int val;
1031   __asm__ __volatile__(
1032 		       "lda [%1] 0x82, %0\n\t"
1033 		       : "=r" (val)
1034 		       : "r" (addr)
1035 		       );
1036 
1037   return val;
1038 }
1039 
1040 /* check if an address is invalid
1041  *
1042  * A no-fault load of an illegal address still faults, but it does so silently to the calling process.
1043  * It returns a 0, but so could a load of a legal address.
1044  * So, we time the load.  A "fast" load must be a successful load.
1045  * A "slow" load is probably a fault.
1046  * Since it could also be a cache/TLB miss or other abnormality,
1047  * it's safest to retry a slow load.
1048  * The cost of trying a valid address should be some nanosecs.
1049  * The cost of trying an invalid address up to 10 times could be some microsecs.
1050  */
1051 #if 0
1052 static
1053 int invalid_SPARC_addr(void *addr)
1054 {
1055     long t1, t2;
1056     int i;
1057 
1058     for (i=0; i<10; i++) {
1059       __asm__ __volatile__(
1060 	"rd %%tick, %0\n\t"
1061 	"lduba [%2] 0x82, %%g0\n\t"
1062 	"rd %%tick, %1\n\t"
1063 	: "=r" (t1), "=r" (t2)
1064 	: "r" (addr) );
1065       if ( (t2 - t1) < 100 )
1066 	return 0;
1067     }
1068     return 1;
1069 }
1070 #endif
1071 
1072 /*
1073  * The standard SPARC procedure-calling convention is that the
1074  * calling PC (for determining the return address when the procedure
1075  * is finished) is placed in register %o7.  A called procedure
1076  * typically executes a "save" instruction that shifts the register
1077  * window, and %o7 becomes %i7.
1078  *
1079  * Optimized leaf procedures do not shift the register window.
1080  * They assume the return address will remain %o7.  So when
1081  * we process a leaf PC, we walk instructions to see if there
1082  * is a call, restore, or other instruction that would indicate
1083  * we can IGNORE %o7 because this is NOT a leaf procedure.
1084  *
1085  * If a limited instruction walk uncovers no such hint, we save
1086  * not only the PC but the %o7 value as well... just to be safe.
1087  * Later, in DBE post-processing of the call stacks, we decide
1088  * whether any recorded %o7 value should be used as a caller
1089  * frame or should be discarded.
1090  */
1091 
1092 #define IS_ILLTRAP(x) (((x) & 0xc1c00000) == 0)
1093 #define IS_SAVE(x)    (((x) & 0xc1f80000) == 0x81e00000)
1094 #define IS_MOVO7R(x)  (((x) & 0xc1f8201f) == 0x8160000f)
1095 #define IS_MOVRO7(x)  (((x) & 0xfff82000) == 0x9f600000)
1096 #define IS_ORRG0O7(x) (((x) & 0xff78201f) == 0x9e100000)
1097 #define IS_ORG0RO7(x) (((x) & 0xff7fe000) == 0x9e100000)
1098 #define IS_ORG0O7R(x) (((x) & 0xc17fe01f) == 0x8010000f)
1099 #define IS_ORO7G0R(x) (((x) & 0xc17fe01f) == 0x8013c000)
1100 #define IS_RESTORE(x) (((x) & 0xc1f80000) == 0x81e80000)
1101 #define IS_RET(x)     ((x) == 0x81c7e008)
1102 #define IS_RETL(x)    ((x) == 0x81c3e008)
1103 #define IS_RETURN(x)  (((x) & 0xc1f80000) == 0x81c80000)
1104 #define IS_BRANCH(x)  ((((x) & 0xc0000000) == 0) && (((x) & 0x01c00000) != 0x01000000))
1105 #define IS_CALL(x)    (((x) & 0xc0000000) == 0x40000000)
1106 #define IS_LDO7(x)    (((x) & 0xfff80000) == 0xde000000)
1107 
1108 static long pagesize = 0;
1109 
1110 static int
process_leaf(long * lbuf,int ind,int lsize,void * context)1111 process_leaf (long *lbuf, int ind, int lsize, void *context)
1112 {
1113   greg_t pc = GET_PC (context);
1114   greg_t o7 = GET_GREG (context, REG_O7);
1115 
1116   /* omazur: TBR START -- not used */
1117   if (IN_BARRIER (pc))
1118     {
1119       if (ind < lsize)
1120 	lbuf[ind++] = pc;
1121       if (ind < lsize)
1122 	lbuf[ind++] = GET_GREG (context, REG_O0);
1123       return ind;
1124     }
1125   /* omazur: TBR END */
1126 #if WSIZE(64)
1127   if (IN_TRAP_HANDLER (pc))
1128     {
1129       if (ind < lsize)
1130 	lbuf[ind++] = pc;
1131       return ind;
1132     }
1133 #endif
1134   unsigned *instrp = (unsigned *) pc;
1135   unsigned *end_addr = instrp + 20;
1136   while (instrp < end_addr)
1137     {
1138       unsigned instr = *instrp++;
1139       if (IS_ILLTRAP (instr))
1140 	break;
1141       else if (IS_SAVE (instr))
1142 	{
1143 	  if (ind < lsize)
1144 	    lbuf[ind++] = pc;
1145 	  if (o7 && ind < lsize)
1146 	    lbuf[ind++] = o7;
1147 	  return ind;
1148 	}
1149       else if (IS_MOVO7R (instr) || IS_ORG0O7R (instr) || IS_ORO7G0R (instr))
1150 	break;
1151       else if (IS_MOVRO7 (instr) || IS_ORG0RO7 (instr))
1152 	{
1153 	  int rs2 = (instr & 0x1f) + REG_G1 - 1;
1154 	  o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0;
1155 	  break;
1156 	}
1157       else if (IS_ORRG0O7 (instr))
1158 	{
1159 	  int rs2 = ((instr & 0x7c000) >> 14) + REG_G1 - 1;
1160 	  o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0;
1161 	  break;
1162 	}
1163       else if (IS_RESTORE (instr))
1164 	{
1165 	  o7 = 0;
1166 	  break;
1167 	}
1168       else if (IS_RETURN (instr))
1169 	{
1170 	  o7 = 0;
1171 	  break;
1172 	}
1173       else if (IS_RET (instr))
1174 	{
1175 	  o7 = 0;
1176 	  break;
1177 	}
1178       else if (IS_RETL (instr))
1179 	{
1180 	  /* process delay slot */
1181 	  instr = *instrp++;
1182 	  if (IS_RESTORE (instr))
1183 	    o7 = 0;
1184 	  break;
1185 	}
1186       else if (IS_BRANCH (instr))
1187 	{
1188 	  unsigned *backbegin = ((unsigned *) pc - 1);
1189 	  unsigned *backend = backbegin - 12 + (instrp - (unsigned *) pc);
1190 	  while (backbegin > backend)
1191 	    {
1192 	      // 21920143 stack unwind: SPARC process_leaf backtracks too far
1193 	      /*
1194 	       * We've already dereferenced backbegin+1.
1195 	       * So if backbegin is on the same page, we're fine.
1196 	       * If we've gone to a different page, possibly things are not fine.
1197 	       * We don't really know how to test that.
1198 	       * Let's just assume the worst:  that dereferencing backbegin would segv.
1199 	       * We won't know if we're in a leaf function or not.
1200 	       */
1201 	      if (pagesize == 0)
1202 		pagesize = CALL_UTIL (sysconf)(_SC_PAGESIZE);
1203 	      if ((((long) (backbegin + 1)) & (pagesize - 1)) < sizeof (unsigned*))
1204 		break;
1205 	      unsigned backinstr = *backbegin--;
1206 	      if (IS_LDO7 (backinstr))
1207 		{
1208 		  o7 = 0;
1209 		  break;
1210 		}
1211 	      else if (IS_ILLTRAP (backinstr))
1212 		break;
1213 	      else if (IS_RETURN (backinstr))
1214 		break;
1215 	      else if (IS_RET (backinstr))
1216 		break;
1217 	      else if (IS_RETL (backinstr))
1218 		break;
1219 	      else if (IS_CALL (backinstr))
1220 		break;
1221 	      else if (IS_SAVE (backinstr))
1222 		{
1223 		  o7 = 0;
1224 		  break;
1225 		}
1226 	    }
1227 	  break;
1228 	}
1229       else if (IS_CALL (instr))
1230 	o7 = 0;
1231     }
1232 
1233 #if WSIZE(64)
1234   if (o7 != 0 && ((long) o7) < 32 && ((long) o7) > -32)
1235     {
1236       /* 20924821 SEGV in unwind code on SPARC/Linux
1237        * We've seen this condition in some SPARC-Linux runs.
1238        * o7 is non-zero but not a valid address.
1239        * Values like 4 or -7 have been seen.
1240        * Let's check if o7 is unreasonably small.
1241        * If so, set to 0 so that it won't be recorded.
1242        * Otherwise, there is risk of it being dereferenced in process_sigreturn().
1243        */
1244       // __collector_log_write("<event kind=\"%s\" id=\"%d\">time %lld, internal debug unwind at leaf; o7 = %ld, pc = %x</event>\n",
1245       //       SP_JCMD_COMMENT, COL_COMMENT_NONE, __collector_gethrtime() - __collector_start_time, (long) o7, pc );
1246       o7 = 0;
1247     }
1248 #endif
1249 
1250   if (o7)
1251     {
1252       if (ind < lsize)
1253 	lbuf[ind++] = SP_LEAF_CHECK_MARKER;
1254       if (ind < lsize)
1255 	lbuf[ind++] = pc;
1256       if (ind < lsize)
1257 	lbuf[ind++] = o7;
1258     }
1259   else if (ind < lsize)
1260     lbuf[ind++] = pc;
1261   return ind;
1262 }
1263 
1264 #if WSIZE(64)
1265 // detect signal handler
1266 static int
process_sigreturn(long * lbuf,int ind,int lsize,unsigned char * tpc,struct frame ** pfp,void * bptr,int extra_frame)1267 process_sigreturn (long *lbuf, int ind, int lsize, unsigned char * tpc,
1268 		   struct frame **pfp, void * bptr, int extra_frame)
1269 {
1270   // cheap checks whether tpc is obviously not an instruction address
1271   if ((4096 > (unsigned long) tpc) // the first page is off limits
1272       || (3 & (unsigned long) tpc))
1273     return ind;  // the address is not aligned
1274 
1275   // get the instruction at tpc, skipping over as many as 7 nop's (0x01000000)
1276   int insn, i;
1277   for (i = 0; i < 7; i++)
1278     {
1279       insn = SPARC_no_fault_load_int ((void *) tpc);
1280       if (insn != 0x01000000)
1281 	break;
1282       tpc += 4;
1283     }
1284 
1285   // we're not expecting 0 (and it could mean an illegal address)
1286   if (insn == 0)
1287     return ind;
1288 
1289   // We are looking for __rt_sigreturn_stub with the instruction
1290   //     0x82102065 : mov 0x65 /* __NR_rt_sigreturn */, %g1
1291   if (insn == 0x82102065)
1292     {
1293       /*
1294        * according to linux kernel source code,
1295        * syscall(_NR_rt_sigreturn) uses the following data in stack:
1296        * struct rt_signal_frame {
1297        *     struct sparc_stackf     ss;
1298        *     siginfo_t               info;
1299        *     struct pt_regs          regs;
1300        *     ....};
1301        * sizeof(struct sparc_stackf) is 192;
1302        * sizeof(siginfo_t) is 128;
1303        * we need to get the register values from regs, which is defined as:
1304        * struct pt_regs {
1305        *     unsigned long u_regs[16];
1306        *     unsigned long tstate;
1307        *     unsigned long tpc;
1308        *     unsigned long tnpc;
1309        *     ....};
1310        * pc and fp register has offset of 120 and 112;
1311        * the pc of kill() is stored in tnpc, whose offest is 136.
1312        */
1313       greg_t pc = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 136));
1314       greg_t pc1 = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 120));
1315       (*pfp) = *((struct frame**) ((char*) ((*pfp)) + 192 + 128 + 112));
1316       if (pc && pc1)
1317 	{
1318 	  if (bptr != NULL && extra_frame && ((char*) (*pfp) + STACK_BIAS) < (char*) bptr && ind < 2)
1319 	    {
1320 	      lbuf[0] = pc1;
1321 	      if (ind == 0)
1322 		ind++;
1323 	    }
1324 	  if (bptr == NULL || ((char*) (*pfp) + STACK_BIAS) >= (char*) bptr)
1325 	    {
1326 	      if (ind < lsize)
1327 		lbuf[ind++] = (unsigned long) tpc;
1328 	      if (ind < lsize)
1329 		lbuf[ind++] = pc;
1330 	      if (ind < lsize)
1331 		lbuf[ind++] = pc1;
1332 	    }
1333 	}
1334       DprintfT (SP_DUMP_UNWIND, "unwind.c: resolved sigreturn pc=0x%lx, pc1=0x%lx, fp=0x%lx\n", pc, pc1, *(pfp));
1335     }
1336   return ind;
1337 }
1338 #endif
1339 
1340 /*
1341  * int stack_unwind( char *buf, int size, ucontext_t *context )
1342  *	This routine looks into the mcontext and
1343  *	trace stack frames to record return addresses.
1344  */
1345 int
stack_unwind(char * buf,int size,void * bptr,void * eptr,ucontext_t * context,int mode)1346 stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
1347 {
1348   /*
1349    * trace the stack frames from user stack.
1350    * We are assuming that the frame pointer and return address
1351    * are null when we are at the top level.
1352    */
1353   long *lbuf = (long*) buf;
1354   int lsize = size / sizeof (long);
1355   struct frame *fp = (struct frame *) GET_SP (context); /* frame pointer */
1356   greg_t pc; /* program counter */
1357   int extra_frame = 0;
1358   if ((mode & 0xffff) == FRINFO_FROM_STACK)
1359     extra_frame = 1;
1360 
1361   int ind = 0;
1362   if (bptr == NULL)
1363     ind = process_leaf (lbuf, ind, lsize, context);
1364 
1365   int extra_frame = 0;
1366   if ((mode & 0xffff) == FRINFO_FROM_STACK)
1367     extra_frame = 1;
1368   int ind = 0;
1369   if (bptr == NULL)
1370     ind = process_leaf (lbuf, ind, lsize, context);
1371 
1372   while (fp)
1373     {
1374       if (ind >= lsize)
1375 	break;
1376       fp = (struct frame *) ((char *) fp + STACK_BIAS);
1377       if (eptr && fp >= (struct frame *) eptr)
1378 	{
1379 	  ind = ind >= 2 ? ind - 2 : 0;
1380 	  break;
1381 	}
1382 #if WSIZE(64) // detect signal handler
1383       unsigned char * tpc = ((unsigned char*) (fp->fr_savpc));
1384       struct frame * tfp = (struct frame*) ((char*) (fp->fr_savfp) + STACK_BIAS);
1385       int old_ind = ind;
1386       ind = process_sigreturn (lbuf, old_ind, lsize, tpc, &tfp, bptr, extra_frame);
1387       if (ind != old_ind)
1388 	{
1389 	  pc = (greg_t) tpc;
1390 	  fp = tfp;
1391 	}
1392       else
1393 #endif
1394 	{
1395 #if WSIZE(64)
1396 	  if (IN_TRAP_HANDLER (lbuf[ind - 1]))
1397 	    pc = fp->fr_local[6];
1398 	  else
1399 	    pc = fp->fr_savpc;
1400 #else
1401 	  pc = fp->fr_savpc;
1402 #endif
1403 	  fp = fp->fr_savfp;
1404 	  if (pc)
1405 	    {
1406 	      if (bptr != NULL && extra_frame && ((char*) fp + STACK_BIAS) < (char*) bptr && ind < 2)
1407 		{
1408 		  lbuf[0] = pc;
1409 		  if (ind == 0)
1410 		    ind++;
1411 		}
1412 	      if (bptr == NULL || ((char*) fp + STACK_BIAS) >= (char*) bptr)
1413 		lbuf[ind++] = pc;
1414 	    }
1415 	}
1416 
1417       /* 4616238: _door_return may have a frame that has non-zero
1418        * saved stack pointer and zero pc
1419        */
1420       if (pc == (greg_t) NULL)
1421 	break;
1422     }
1423 
1424   if (ind >= lsize)
1425     { /* truncated stack handling */
1426       ind = lsize - 1;
1427       lbuf[ind++] = SP_TRUNC_STACK_MARKER;
1428     }
1429   return ind * sizeof (long);
1430 }
1431 
1432 #elif ARCH(Intel)
1433 
1434 /* get __NR_<syscall_name> constants */
1435 #include <syscall.h>
1436 
1437 /*
1438  * From uts/intel/ia32/os/sendsig.c:
1439  *
1440  * An amd64 signal frame looks like this on the stack:
1441  *
1442  * old %rsp:
1443  *		<128 bytes of untouched stack space>
1444  *		<a siginfo_t [optional]>
1445  *		<a ucontext_t>
1446  *		<siginfo_t *>
1447  *		<signal number>
1448  * new %rsp:	<return address (deliberately invalid)>
1449  *
1450  * The signal number and siginfo_t pointer are only pushed onto the stack in
1451  * order to allow stack backtraces.  The actual signal handling code expects the
1452  * arguments in registers.
1453  *
1454  * An i386 SVR4/ABI signal frame looks like this on the stack:
1455  *
1456  * old %esp:
1457  *		<a siginfo32_t [optional]>
1458  *		<a ucontext32_t>
1459  *		<pointer to that ucontext32_t>
1460  *		<pointer to that siginfo32_t>
1461  *		<signo>
1462  * new %esp:	<return address (deliberately invalid)>
1463  */
1464 
1465 #if WSIZE(32)
1466 #define OPC_REG(x)      ((x)&0x7)
1467 #define MRM_REGD(x)     (((x)>>3)&0x7)
1468 #define MRM_REGS(x)     ((x)&0x7)
1469 #define RED_ZONE        0
1470 #elif WSIZE(64)
1471 #define OPC_REG(x)      (B|((x)&0x7))
1472 #define MRM_REGD(x)     (R|(((x)>>3)&0x7))
1473 #define MRM_REGS(x)     (B|((x)&0x7))
1474 #define RED_ZONE        16
1475 #endif
1476 #define MRM_EXT(x)      (((x)>>3)&0x7)
1477 #define MRM_MOD(x)      ((x)&0xc0)
1478 
1479 #define RAX             0
1480 #define RDX             2
1481 #define RSP             4
1482 #define RBP             5
1483 
1484 struct AdvWalkContext
1485 {
1486   unsigned char *pc;
1487   unsigned long *sp;
1488   unsigned long *sp_safe;
1489   unsigned long *fp;
1490   unsigned long *fp_sav;
1491   unsigned long *fp_loc;
1492   unsigned long rax;
1493   unsigned long rdx;
1494   unsigned long ra_sav;
1495   unsigned long *ra_loc;
1496   unsigned long regs[16];
1497   int tidx;         /* targets table index */
1498   uint32_t cval;    /* cache value */
1499 };
1500 
1501 static unsigned long
getRegVal(struct AdvWalkContext * cur,int r,int * undefRez)1502 getRegVal (struct AdvWalkContext *cur, int r, int *undefRez)
1503 {
1504   if (cur->regs[r] == 0)
1505     {
1506       if (r == RBP)
1507 	{
1508 	  tprintf (DBG_LT3, "getRegVal: returns cur->regs[RBP]=0x%lx  cur->pc=0x%lx\n",
1509 		   (unsigned long) cur->fp, (unsigned long) cur->pc);
1510 	  return (unsigned long) cur->fp;
1511 	}
1512       *undefRez = 1;
1513     }
1514   tprintf (DBG_LT3, "getRegVal: cur->regs[%d]=0x%lx  cur->pc=0x%lx\n",
1515 	   r, (unsigned long) cur->regs[r], (unsigned long) cur->pc);
1516   return cur->regs[r];
1517 }
1518 
1519 static unsigned char *
check_modrm(unsigned char * pc)1520 check_modrm (unsigned char *pc)
1521 {
1522   unsigned char modrm = *pc++;
1523   unsigned char mod = MRM_MOD (modrm);
1524   if (mod == 0xc0)
1525     return pc;
1526   unsigned char regs = modrm & 0x07;
1527   if (regs == RSP)
1528     {
1529       if (mod == 0x40)
1530 	return pc + 2;  // SIB + disp8
1531       if (mod == 0x80)
1532 	return pc + 5;  // SIB + disp32
1533       return pc + 1;    // SIB
1534     }
1535   if (mod == 0x0)
1536     {
1537       if (regs == RBP)
1538 	pc += 4; // disp32
1539     }
1540   else if (mod == 0x40)
1541     pc += 1; /* byte */
1542   else if (mod == 0x80)
1543     pc += 4; /* word */
1544   return pc;
1545 }
1546 
1547 static int
read_int(unsigned char * pc,int w)1548 read_int (unsigned char *pc, int w)
1549 {
1550   if (w == 1)
1551     return *((char *) pc);
1552   if (w == 2)
1553     return *(short*) pc;
1554   return *(int*) pc;
1555 }
1556 
1557 /* Return codes */
1558 enum
1559 {
1560   RA_FAILURE = 0,
1561   RA_SUCCESS,
1562   RA_END_OF_STACK,
1563   RA_SIGRETURN,
1564   RA_RT_SIGRETURN
1565 };
1566 
1567 /* Cache value encodings */
1568 static const uint32_t RA_FROMFP = (uint32_t) - 1; /* get the RA from the frame pointer */
1569 static const uint32_t RA_EOSTCK = (uint32_t) - 2; /* end-of-stack */
1570 
1571 
1572 #define MAXCTX         16
1573 #define MAXTRGTS       64
1574 #define MAXJMPREG       2
1575 #define MAXJMPREGCTX    3
1576 
1577 #define DELETE_CURCTX()  __collector_memcpy (cur, buf + (--nctx), sizeof (*cur))
1578 
1579 /**
1580  * Look for pc in AddrTable_RA_FROMFP and in AddrTable_RA_EOSTCK
1581  * @param wctx
1582  * @return
1583  */
1584 static int
cache_get(struct WalkContext * wctx)1585 cache_get (struct WalkContext *wctx)
1586 {
1587   unsigned long addr;
1588   if (AddrTable_RA_FROMFP != NULL)
1589     {
1590       uint64_t idx = wctx->pc % ValTableSize;
1591       addr = AddrTable_RA_FROMFP[ idx ];
1592       if (addr == wctx->pc)
1593 	{ // Found in AddrTable_RA_FROMFP
1594 	  unsigned long *sp = NULL;
1595 	  unsigned long fp = wctx->fp;
1596 	  /* validate fp before use */
1597 	  if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp))
1598 	    return RA_FAILURE;
1599 	  sp = (unsigned long *) fp;
1600 	  fp = *sp++;
1601 	  unsigned long ra = *sp++;
1602 	  unsigned long tbgn = wctx->tbgn;
1603 	  unsigned long tend = wctx->tend;
1604 	  if (ra < tbgn || ra >= tend)
1605 	    if (!__collector_check_segment (ra, &tbgn, &tend, 0))
1606 	      return RA_FAILURE;
1607 	  unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
1608 	  if (npc == 0)
1609 	    return RA_FAILURE;
1610 	  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached pc=0x%lX\n", __LINE__, npc);
1611 	  wctx->pc = npc;
1612 	  wctx->sp = (unsigned long) sp;
1613 	  wctx->fp = fp;
1614 	  wctx->tbgn = tbgn;
1615 	  wctx->tend = tend;
1616 	  return RA_SUCCESS;
1617 	}
1618     }
1619   if (NULL == AddrTable_RA_EOSTCK)
1620     return RA_FAILURE;
1621   uint64_t idx = wctx->pc % ValTableSize;
1622   addr = AddrTable_RA_EOSTCK[ idx ];
1623   if (addr != wctx->pc)
1624     return RA_FAILURE;
1625   DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached RA_END_OF_STACK\n", __LINE__);
1626   return RA_END_OF_STACK;
1627 }
1628 /**
1629  * Save pc in RA_FROMFP or RA_EOSTCK cache depending on val
1630  * @param wctx
1631  */
1632 static void
cache_put(struct WalkContext * wctx,const uint32_t val)1633 cache_put (struct WalkContext *wctx, const uint32_t val)
1634 {
1635   if (RA_FROMFP == val)
1636     {
1637       // save pc in RA_FROMFP cache
1638       if (NULL != AddrTable_RA_FROMFP)
1639 	{
1640 	  uint64_t idx = wctx->pc % ValTableSize;
1641 	  AddrTable_RA_FROMFP[ idx ] = wctx->pc;
1642 	  if (NULL != AddrTable_RA_EOSTCK)
1643 	    if (AddrTable_RA_EOSTCK[ idx ] == wctx->pc)
1644 	      // invalidate pc in RA_EOSTCK cache
1645 	      AddrTable_RA_EOSTCK[ idx ] = 0;
1646 	}
1647       return;
1648     }
1649   if (RA_EOSTCK == val)
1650     {
1651       // save pc in RA_EOSTCK cache
1652       if (NULL != AddrTable_RA_EOSTCK)
1653 	{
1654 	  uint64_t idx = wctx->pc % ValTableSize;
1655 	  AddrTable_RA_EOSTCK[ idx ] = wctx->pc;
1656 	  if (NULL != AddrTable_RA_FROMFP)
1657 	    {
1658 	      if (AddrTable_RA_FROMFP[ idx ] == wctx->pc)
1659 		// invalidate pc in RA_FROMFP cache
1660 		AddrTable_RA_FROMFP[ idx ] = 0;
1661 	    }
1662 	}
1663       return;
1664     }
1665 }
1666 
1667 static int
process_return_real(struct WalkContext * wctx,struct AdvWalkContext * cur,int cache_on)1668 process_return_real (struct WalkContext *wctx, struct AdvWalkContext *cur, int cache_on)
1669 {
1670   if ((unsigned long) cur->sp >= wctx->sbase ||
1671       (unsigned long) cur->sp < wctx->sp)
1672     {
1673       DprintfT (SP_DUMP_UNWIND, "unwind.c: not in stack: %p [0x%lX-0x%lX]\n",
1674 		cur->sp, wctx->sp, wctx->sbase);
1675       return RA_FAILURE;
1676     }
1677 
1678   unsigned long ra;
1679   if (cur->sp == cur->ra_loc)
1680     {
1681       ra = cur->ra_sav;
1682       cur->sp++;
1683     }
1684   else if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase)
1685     ra = *cur->sp++;
1686   else
1687     {
1688       DprintfT (SP_DUMP_UNWIND, "unwind.c: not safe: %p >= %p\n", cur->sp, cur->sp_safe);
1689       return RA_FAILURE;
1690     }
1691   if (ra == 0)
1692     {
1693       if (cache_on)
1694 	cache_put (wctx, RA_EOSTCK);
1695       wctx->pc = ra;
1696       wctx->sp = (unsigned long) cur->sp;
1697       wctx->fp = (unsigned long) cur->fp;
1698       DprintfT (SP_DUMP_UNWIND, "unwind.c:%d RA_END_OF_STACK\n", __LINE__);
1699       return RA_END_OF_STACK;
1700     }
1701 
1702   unsigned long tbgn = wctx->tbgn;
1703   unsigned long tend = wctx->tend;
1704   if (ra < tbgn || ra >= tend)
1705     {
1706       if (!__collector_check_segment (ra, &tbgn, &tend, 0))
1707 	{
1708 	  DprintfT (SP_DUMP_UNWIND, "unwind.c: not in segment: 0x%lX [0x%lX-0x%lX]\n",
1709 		    ra, wctx->tbgn, wctx->tend);
1710 	  return RA_FAILURE;
1711 	}
1712     }
1713 
1714   if (cur->cval == RA_FROMFP)
1715     {
1716       if (wctx->fp == (unsigned long) (cur->sp - 2))
1717 	{
1718 	  if (cache_on)
1719 	    cache_put (wctx, RA_FROMFP);
1720 	}
1721       else
1722 	cur->cval = 0;
1723     }
1724 
1725   unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
1726   if (npc == 0)
1727     {
1728       if (cur->cval == RA_FROMFP)
1729 	{
1730 	  /* We have another evidence that we can trust this RA */
1731 	  DprintfT (SP_DUMP_UNWIND, "unwind.c: trusted fp, pc = 0x%lX\n", wctx->pc);
1732 	  wctx->pc = ra;
1733 	}
1734       else
1735 	{
1736 	  DprintfT (SP_DUMP_UNWIND, "unwind.c: 0 after adjustment\n");
1737 	  return RA_FAILURE;
1738 	}
1739     }
1740   else
1741     wctx->pc = npc;
1742   wctx->sp = (unsigned long) cur->sp;
1743   wctx->fp = (unsigned long) cur->fp;
1744   wctx->tbgn = tbgn;
1745   wctx->tend = tend;
1746   return RA_SUCCESS;
1747 }
1748 
1749 static int
process_return(struct WalkContext * wctx,struct AdvWalkContext * cur)1750 process_return (struct WalkContext *wctx, struct AdvWalkContext *cur)
1751 {
1752   return process_return_real (wctx, cur, 1);
1753 }
1754 
1755 static void
omp_cache_put(unsigned long * cur_sp_safe,struct WalkContext * wctx_pc_save,struct WalkContext * wctx,uint32_t val)1756 omp_cache_put (unsigned long *cur_sp_safe, struct WalkContext * wctx_pc_save,
1757 	       struct WalkContext *wctx, uint32_t val)
1758 {
1759   if (omp_no_walk && (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL))
1760     {
1761       size_t sz = OmpValTableSize * sizeof (*OmpCurCtxs);
1762       OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
1763       sz = OmpValTableSize * sizeof (*OmpCtxs);
1764       OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
1765       sz = OmpValTableSize * sizeof (*OmpVals);
1766       OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1);
1767       sz = OmpValTableSize * sizeof (*OmpRAs);
1768       OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
1769     }
1770   if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
1771     return;
1772 
1773 #define USE_18434988_OMP_CACHE_WORKAROUND
1774 #ifndef USE_18434988_OMP_CACHE_WORKAROUND
1775   uint64_t idx = wctx_pc_save->pc * ROOT_IDX;
1776   OmpVals[ idx % OmpValTableSize ] = val;
1777   idx = (idx + val) * ROOT_IDX;
1778   __collector_memcpy (&(OmpCurCtxs[ idx % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext));
1779   idx = (idx + val) * ROOT_IDX;
1780   __collector_memcpy (&(OmpCtxs[ idx % OmpValTableSize ]), wctx, sizeof (struct WalkContext));
1781 #endif
1782   unsigned long *sp = NULL;
1783   unsigned long fp = wctx_pc_save->fp;
1784   int from_fp = 0;
1785   if (val == RA_END_OF_STACK)
1786     {
1787       sp = (unsigned long *) (wctx->sp);
1788       sp--;
1789       TprintfT (DBG_LT1, "omp_cache_put: get sp from EOS, sp=%p\n", sp);
1790     }
1791   else
1792     {
1793       if (fp < wctx_pc_save->sp || fp >= wctx_pc_save->sbase - sizeof (*sp))
1794 	{
1795 	  sp = (unsigned long *) (wctx->sp);
1796 	  sp--;
1797 	  TprintfT (DBG_LT1, "omp_cache_put: get sp from sp, sp=%p\n", sp);
1798 	}
1799       else
1800 	{
1801 	  TprintfT (DBG_LT1, "omp_cache_put: get sp from fp=0x%lx\n", fp);
1802 	  sp = (unsigned long *) fp;
1803 	  from_fp = 1;
1804 	}
1805     }
1806 
1807   if (sp < cur_sp_safe || ((unsigned long) sp >= wctx->sbase))
1808     return;
1809 
1810   unsigned long ra = *sp++;
1811   if (from_fp)
1812     {
1813       unsigned long tbgn = wctx_pc_save->tbgn;
1814       unsigned long tend = wctx_pc_save->tend;
1815       if (ra < tbgn || ra >= tend)
1816 	{
1817 	  sp = (unsigned long *) (wctx->sp);
1818 	  sp--;
1819 	  ra = *sp++;
1820 	}
1821     }
1822 #ifdef USE_18434988_OMP_CACHE_WORKAROUND
1823   uint64_t idx1 = wctx_pc_save->pc * ROOT_IDX;
1824   uint64_t idx2 = (idx1 + val) * ROOT_IDX;
1825   uint64_t idx3 = (idx2 + val) * ROOT_IDX;
1826   uint64_t idx4 = (idx3 + val) * ROOT_IDX;
1827   OmpRAs [ idx4 % OmpValTableSize ] = 0; // lock
1828   OmpVals[ idx1 % OmpValTableSize ] = val;
1829   __collector_memcpy (&(OmpCurCtxs[ idx2 % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext));
1830   __collector_memcpy (&(OmpCtxs [ idx3 % OmpValTableSize ]), wctx, sizeof (struct WalkContext));
1831   OmpRAs [ idx4 % OmpValTableSize ] = ra;
1832 #else
1833   idx = (idx + val) * ROOT_IDX;
1834   OmpRAs[ idx % OmpValTableSize ] = ra;
1835 #endif
1836   TprintfT (DBG_LT1, "omp_cache_put: pc=0x%lx\n", wctx_pc_save->pc);
1837 }
1838 
1839 /*
1840  *  See bug 17166877 - malloc_internal unwind failure.
1841  *  Sometimes there are several calls right after ret, like:
1842  *      leave
1843  *      ret
1844  *      call xxx
1845  *      call xxxx
1846  *      call xxxxx
1847  *  If they are also jump targets, we should better not
1848  *  create new jump context for those, since they may
1849  *  end up into some other function.
1850  */
1851 static int
is_after_ret(unsigned char * npc)1852 is_after_ret (unsigned char * npc)
1853 {
1854   if (*npc != 0xe8)
1855     return 0;
1856   unsigned char * onpc = npc;
1857   int ncall = 1;
1858   int maxsteps = 10;
1859   int mincalls = 3;
1860   int steps = 0;
1861   while (*(npc - 5) == 0xe8 && steps < maxsteps)
1862     {
1863       npc -= 5;
1864       ncall++;
1865       steps++;
1866     }
1867   if (*(npc - 1) != 0xc3 || *(npc - 2) != 0xc9)
1868     return 0;
1869   steps = 0;
1870   while (*(onpc + 5) == 0xe8 && steps < maxsteps)
1871     {
1872       onpc += 5;
1873       ncall++;
1874       steps++;
1875     }
1876   if (ncall < mincalls)
1877     return 0;
1878   return 1;
1879 }
1880 
1881 static int
find_i386_ret_addr(struct WalkContext * wctx,int do_walk)1882 find_i386_ret_addr (struct WalkContext *wctx, int do_walk)
1883 {
1884   if (wctx->sp == 0)
1885     // Some artificial contexts may have %sp set to 0. See SETFUNCTIONCONTEXT()
1886     return RA_FAILURE;
1887 
1888   /* Check cached values */
1889   int retc = cache_get (wctx);
1890   if (retc != RA_FAILURE)
1891     return retc;
1892 
1893   /* An attempt to perform code analysis for call stack tracing */
1894   unsigned char opcode;
1895   unsigned char extop;
1896   unsigned char extop2;
1897   unsigned char modrm;
1898   int imm8; /* immediate operand, byte */
1899   int immv; /* immediate operand, word(2) or doubleword(4) */
1900   int reg; /* register code */
1901 
1902   /* Buffer for branch targets (analysis stoppers) */
1903   unsigned char *targets[MAXTRGTS];
1904   int ntrg = 0; /* number of entries in the table */
1905   targets[ntrg++] = (unsigned char*) wctx->pc;
1906   targets[ntrg++] = (unsigned char*) - 1;
1907 
1908   struct AdvWalkContext buf[MAXCTX];
1909   struct AdvWalkContext *cur = buf;
1910   CALL_UTIL (memset)((void*) cur, 0, sizeof (*cur));
1911 
1912   cur->pc = (unsigned char*) wctx->pc;
1913   cur->sp = (unsigned long*) wctx->sp;
1914   cur->sp_safe = cur->sp - RED_ZONE; /* allow for the 128-byte red zone on amd64 */
1915   cur->fp = (unsigned long*) wctx->fp;
1916   cur->tidx = 1;
1917   DprintfT (SP_DUMP_UNWIND, "\nstack_unwind (x86 walk):%d %p start\n", __LINE__, cur->pc);
1918 
1919   int nctx = 1; /* number of contexts being processed */
1920   int cnt = 8192; /* number of instructions to analyse */
1921 
1922   /*
1923    * The basic idea of our x86 stack unwind is that we don't know
1924    * if we can trust the frame-pointer register.  So we walk
1925    * instructions to find a return instruction, at which point
1926    * we know the return address is on the top of the stack, etc.
1927    *
1928    * A severe challenge to walking x86 instructions is when we
1929    * encounter "jmp *(reg)" instructions, where we are expected
1930    * to jump to the (unknown-to-us) contents of a register.
1931    *
1932    * The "jmp_reg" code here attempts to keep track of the
1933    * context for such a jump, deferring any handling of such
1934    * a difficult case.  We continue with other contexts, hoping
1935    * that some other walk will take us to a return instruction.
1936    *
1937    * If no other walk helps, we return to "jmp_reg" contexts.
1938    * While we don't know the jump target, it is possible that the
1939    * bytes immediately following the jmp_reg instruction represent
1940    * one possible target, as might be the case when a "switch"
1941    * statement is compiled.
1942    *
1943    * Unfortunately, the bytes following a "jmp_reg" instruction might
1944    * instead be a jump target from somewhere else -- execution might
1945    * never "fall through" from the preceding "jmp_reg".  Those bytes
1946    * might not even be instructions at all.  There are many uses of
1947    * jmp_reg instructions beyond just compiling switch statements.
1948    *
1949    * So walking the bytes after a "jmp_reg" instruction can lead
1950    * to bugs and undefined behavior, including SEGV and core dump.
1951    *
1952    * We currently do not really understand the "jmp_reg" code below.
1953    */
1954   int jmp_reg_switch_mode = 0;
1955   int num_jmp_reg = 0; // number of jmp *reg met when switch mode is off or when in current switch case
1956   int total_num_jmp_reg = 0; // number of total jmp *reg met
1957   struct AdvWalkContext * jmp_reg_ctx[MAXJMPREG]; // context of jmp *reg met when switch mode is off or when in current switch case
1958   struct AdvWalkContext * jmp_reg_switch_ctx[MAXJMPREG]; // context of jmp *reg used in switch cases
1959   struct AdvWalkContext * jmp_reg_switch_backup_ctx = NULL; // context of the first jmp *reg used in switch cases
1960 
1961   int cur_jmp_reg_switch = 0; // current switch table
1962   int num_jmp_reg_switch = 0; // number of switch table
1963   int jmp_reg_switch_case = 0; // case number in current switch table
1964   unsigned char * jmp_reg_switch_pc = NULL; // the start pc of current switch case
1965   unsigned char * jmp_reg_switch_pc_old = NULL; // backup for deleteing context of jump target
1966   unsigned char * jmp_reg_switch_base = NULL; // start pc for checking offsets
1967   int max_jmp_reg_switch_case = 2;
1968 #if WSIZE(32)
1969   int max_switch_pc_offset = 512;
1970 #else // WSIZE(64)
1971   int max_switch_pc_offset = 1024;
1972 #endif
1973   int expected_num_jmp_reg = 1; // should be smaller than MAXJMPREG
1974   int max_num_jmp_reg_seen = 4; // try to resolve return if there are so many such instructions
1975 
1976 
1977   int save_ctx = 0; // flag to save walk context in the cache to speed up unwind
1978   struct WalkContext wctx_pc_save;
1979   if (do_walk == 0)
1980     // do_walk is the flag indicating not walking through the instructions, resolving the RA from the stack fp first
1981     __collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext));
1982 
1983 startWalk:
1984   if (do_walk == 0)
1985     { // try to resolve RA from stack frame pointer
1986       if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
1987 	{
1988 	  do_walk = 1;
1989 	  goto startWalk;
1990 	}
1991       // before goto checkFP, try the RA from cache (key: WalkContext -> value: caller's WalkContext))
1992       uint64_t idx = wctx->pc * ROOT_IDX;
1993       uint32_t val = OmpVals[idx % OmpValTableSize];
1994       idx = (idx + val) * ROOT_IDX;
1995 #ifdef USE_18434988_OMP_CACHE_WORKAROUND
1996       // Check ra: if it is 0 - then cache is invalid
1997       uint64_t idx4;
1998       idx4 = (idx + val) * ROOT_IDX;
1999       idx4 = (idx4 + val) * ROOT_IDX;
2000       if (0 == OmpRAs[ idx4 % OmpValTableSize ])  // Invalid cache
2001 	goto checkFP;
2002 #endif
2003       struct WalkContext saved_ctx;
2004       __collector_memcpy (&saved_ctx, &OmpCurCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2005       if (wctx->pc == saved_ctx.pc
2006 	  && wctx->sp == saved_ctx.sp
2007 	  && wctx->fp == saved_ctx.fp
2008 	  && wctx->tbgn == saved_ctx.tbgn
2009 	  && wctx->tend == saved_ctx.tend)
2010 	{ // key match, RA may be valid
2011 	  idx = (idx + val) * ROOT_IDX;
2012 	  unsigned long *sp = NULL;
2013 	  unsigned long fp = wctx->fp;
2014 	  int from_fp = 0;
2015 	  if (val == RA_END_OF_STACK)
2016 	    {
2017 	      DprintfT (SP_DUMP_UNWIND, "find_i386_ret_addr:%d -- RA_END_OF_STACK: pc=0x%lx\n", __LINE__, wctx->pc);
2018 	      __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2019 	      return val;
2020 	    }
2021 	  else
2022 	    {
2023 	      if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp))
2024 		{
2025 		  TprintfT (DBG_LT1, "omp_cache_get -- wrong fp: pc=0x%lx\n", wctx->pc);
2026 		  sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp);
2027 		  sp--;
2028 		  if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase)
2029 		    {
2030 		      goto checkFP;
2031 		    }
2032 		  unsigned long ra = *sp;
2033 		  uint64_t idx2 = (idx + val) * ROOT_IDX;
2034 		  if (OmpRAs[ idx2 % OmpValTableSize ] == ra)
2035 		    {
2036 		      __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2037 		      TprintfT (DBG_LT1, "omp_cache_get -- ra match with target sp: pc=0x%lx, ra=0x%lx, val=%d\n", wctx->pc, ra, val);
2038 		      return val;
2039 		    }
2040 		  TprintfT (DBG_LT1, "omp_cache_get -- ra mismatch: ra=0x%lx, expected ra=0x%lx, val=%d\n", ra, OmpRAs[ idx2 % OmpValTableSize ], val);
2041 		  goto checkFP;
2042 		}
2043 	      sp = (unsigned long *) fp;
2044 	      from_fp = 1;
2045 	    }
2046 
2047 	  uint64_t idx2 = (idx + val) * ROOT_IDX;
2048 	  unsigned long ra = *sp++;
2049 	  if (from_fp)
2050 	    {
2051 	      unsigned long tbgn = wctx->tbgn;
2052 	      unsigned long tend = wctx->tend;
2053 	      if (ra < tbgn || ra >= tend)
2054 		{
2055 		  sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp);
2056 		  sp--;
2057 		  //if (sp < cur->sp_safe - 16 || (unsigned long)sp >= wctx->sbase - sizeof(*sp)) {
2058 		  // The check above was replaced with the check below,
2059 		  // because we do not know why "- 16" and "- sizeof(*sp)" was used.
2060 		  if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase)
2061 		    goto checkFP;
2062 		  else
2063 		    ra = *sp;
2064 		}
2065 	    }
2066 	  if (OmpRAs[ idx2 % OmpValTableSize ] == ra)
2067 	    {
2068 	      TprintfT (DBG_LT1, "omp_cache_get -- ra match: pc=0x%lx\n", wctx->pc);
2069 	      __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2070 	      return val;
2071 	    }
2072 	}
2073       goto checkFP;
2074     }
2075   else
2076     {
2077       CALL_UTIL (memset)(jmp_reg_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *));
2078       CALL_UTIL (memset)(jmp_reg_switch_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *));
2079     }
2080   while (cnt--)
2081     {
2082       if (nctx == 0 && (num_jmp_reg == expected_num_jmp_reg || jmp_reg_switch_mode == 1))
2083 	{ // no context available, try jmp switch mode
2084 	  int i = 0;
2085 	  if (num_jmp_reg == expected_num_jmp_reg)
2086 	    jmp_reg_switch_mode = 0; // first jmp reg expected, restart switch mode
2087 	  DprintfT (SP_DUMP_UNWIND, "unwind.c: begin switch mode, num_jmp_reg = %d, jmp_reg_switch_backup_ctx=%p, jmp_reg_switch_case=%d, jmp_reg_switch_mode=%d.\n",
2088 		    num_jmp_reg, jmp_reg_switch_backup_ctx, jmp_reg_switch_case, jmp_reg_switch_mode);
2089 	  // the ideal asm of switch is
2090 	  //   jmp reg
2091 	  //   ...//case 1
2092 	  //   ret
2093 	  //   ...//case 2
2094 	  //   ret
2095 	  //   ...//etc
2096 	  if (jmp_reg_switch_mode == 0)
2097 	    {
2098 	      num_jmp_reg_switch = num_jmp_reg; // backup num_jmp_reg
2099 	      jmp_reg_switch_mode = 1; // begin switch mode
2100 	      for (i = 0; i < num_jmp_reg_switch; i++)
2101 		{
2102 		  if (jmp_reg_switch_ctx[i] == NULL)
2103 		    jmp_reg_switch_ctx[i] = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_ctx[i]));
2104 		  if (jmp_reg_switch_ctx[i] != NULL)
2105 		    { // backup jmp_reg_ctx
2106 		      __collector_memcpy (jmp_reg_switch_ctx[i], jmp_reg_ctx[i], sizeof (*jmp_reg_switch_ctx[i]));
2107 		      cur_jmp_reg_switch = 0; // reset the current switch table
2108 		      jmp_reg_switch_case = 0; // reset the case number in current switch table
2109 		    }
2110 		}
2111 	      if (jmp_reg_switch_backup_ctx == NULL)
2112 		{ // only backup when the first jmp *reg is met for restoring later, if switch mode fails to resolve RA
2113 		  jmp_reg_switch_backup_ctx = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_backup_ctx));
2114 		  if (jmp_reg_switch_backup_ctx != NULL)
2115 		    __collector_memcpy (jmp_reg_switch_backup_ctx, cur, sizeof (*cur));
2116 		  DprintfT (SP_DUMP_UNWIND, "unwind.c: back up context for switch mode.\n");
2117 		}
2118 	    }
2119 	  if (jmp_reg_switch_mode == 1)
2120 	    { // in the process of trying switch cases
2121 	      if (cur_jmp_reg_switch == num_jmp_reg_switch)
2122 		{
2123 		  DprintfT (SP_DUMP_UNWIND, "unwind.c: have tried all switch with max_jmp_reg_switch_case for each\n");
2124 		  if (jmp_reg_switch_backup_ctx != NULL)
2125 		    __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
2126 		  int rc = process_return_real (wctx, cur, 0);
2127 		  if (rc == RA_SUCCESS)
2128 		    {
2129 		      if (save_ctx)
2130 			omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
2131 		      return rc;
2132 		    }
2133 		  break; // have tried all switch with max_jmp_reg_switch_case for each, goto checkFP
2134 		}
2135 	      unsigned char *npc = jmp_reg_switch_ctx[cur_jmp_reg_switch]->pc;
2136 	      if (jmp_reg_switch_case == 0)
2137 		// first switch case
2138 		npc = check_modrm (npc); // pc next to "jmp reg" instruction
2139 	      else if (jmp_reg_switch_pc != NULL)
2140 		npc = jmp_reg_switch_pc; // // pc next to "ret" instruction of previous case
2141 	      else
2142 		{
2143 		  DprintfT (SP_DUMP_UNWIND, "unwind.c: unexpected jum switch mode situation, jmp_reg_switch_case=%d, jmp_reg_switch_pc=%p\n",
2144 			    jmp_reg_switch_case, jmp_reg_switch_pc);
2145 		  break; //goto checkFP
2146 		}
2147 	      jmp_reg_switch_base = npc;
2148 	      struct AdvWalkContext *new = buf + nctx;
2149 	      nctx += 1;
2150 	      __collector_memcpy (new, jmp_reg_switch_ctx[cur_jmp_reg_switch], sizeof (*new));
2151 	      new->pc = npc;
2152 	      cur = new; /* advance the new context first */
2153 	      jmp_reg_switch_pc = NULL;
2154 	      jmp_reg_switch_case++;
2155 	      if (jmp_reg_switch_case == max_jmp_reg_switch_case)
2156 		{ // done many cases, change to another switch table
2157 		  cur_jmp_reg_switch++;
2158 		  jmp_reg_switch_case = 0;
2159 		}
2160 	    }
2161 	  num_jmp_reg = 0;
2162 	}
2163       if (jmp_reg_switch_mode == 1)
2164 	{ // when processing switch cases, check pc each time
2165 	  unsigned long tbgn = wctx->tbgn;
2166 	  unsigned long tend = wctx->tend;
2167 	  if ((unsigned long) (cur->pc) < tbgn || (unsigned long) (cur->pc) >= tend)
2168 	    {
2169 	      DprintfT (SP_DUMP_UNWIND, "unwind.c: pc out of range, pc=0x%lx\n", (unsigned long) (cur->pc));
2170 	      break;
2171 	    }
2172 	  if (jmp_reg_switch_base != NULL && cur->pc > jmp_reg_switch_base + max_switch_pc_offset)
2173 	    {
2174 	      DprintfT (SP_DUMP_UNWIND, "unwind.c: limit the walk offset after jmp reg instruction\n");
2175 	      if (jmp_reg_switch_backup_ctx != NULL)
2176 		__collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
2177 	      int rc = process_return_real (wctx, cur, 0);
2178 	      if (rc == RA_SUCCESS)
2179 		{
2180 		  if (save_ctx)
2181 		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
2182 		  return rc;
2183 		}
2184 	      break; // limit the walk offset after jmp reg instruction, got checkFP
2185 	    }
2186 	}
2187 
2188       if (nctx == 0)
2189 	break;
2190 //      dump_targets (__LINE__, ntrg, targets);
2191       while (cur->pc > targets[cur->tidx])
2192 	cur->tidx += 1;
2193       if (cur->pc == targets[cur->tidx])
2194 	{
2195 	  /* Stop analysis. Delete context. */
2196 	  if (jmp_reg_switch_mode == 0 || cur->pc != jmp_reg_switch_pc_old)
2197 	    {
2198 	      if (jmp_reg_switch_mode == 1 && nctx == 1 && jmp_reg_switch_pc == NULL)
2199 		{
2200 		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d old target, cur->pc=%p, jmp_reg_switch_pc=%p, nctx=%d\n",
2201 			    __LINE__, cur->pc, jmp_reg_switch_pc, nctx);
2202 		  jmp_reg_switch_pc = cur->pc; // save cp before delete context, may be used as a start of switch case
2203 		  jmp_reg_switch_pc_old = jmp_reg_switch_pc;
2204 		}
2205 	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, old target.\n", __LINE__);
2206 	      DELETE_CURCTX ();
2207 	      if (cur >= buf + nctx)
2208 		cur = buf;
2209 	      continue;
2210 	    }
2211 	  if (jmp_reg_switch_mode == 1 && cur->pc == jmp_reg_switch_pc_old)
2212 	    jmp_reg_switch_pc_old = NULL; // reset jmp_reg_switch_pc_old to delete the context later when cur->pc != jmp_reg_switch_pc_old
2213 	}
2214 
2215       /* let's walk the next x86 instruction */
2216       DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cur:%ld pc=0x%lx %02x %02x %02x %02x %02x %02x %02x sp=0x%lx\n",
2217 	       __LINE__, (long) (cur - buf), (unsigned long) cur->pc,
2218 	       (int) cur->pc[0], (int) cur->pc[1], (int) cur->pc[2],
2219 	       (int) cur->pc[3], (int) cur->pc[4], (int) cur->pc[5],
2220 	       (int) cur->pc[6], (unsigned long) cur->sp);
2221       int v = 4; /* Operand size */
2222       int a = 4; /* Address size */
2223       /* int W = 0;	   REX.W bit */
2224 #if WSIZE(64)
2225       int R = 0; /* REX.R bit */
2226 #endif
2227       int X = 0; /* REX.X bit */
2228       int B = 0; /* REX.B bit */
2229       /* Check prefixes */
2230       int done = 0;
2231       while (!done)
2232 	{
2233 	  opcode = *cur->pc++;
2234 	  switch (opcode)
2235 	    {
2236 	    case 0x66: /* opd size override */
2237 	      v = 2;
2238 	      break;
2239 	    case 0x67: /*addr size override */
2240 	      a = 2;
2241 	      break;
2242 #if WSIZE(64)
2243 	    case 0x40: /* REX */
2244 	    case 0x41:
2245 	    case 0x42:
2246 	    case 0x43:
2247 	    case 0x44:
2248 	    case 0x45:
2249 	    case 0x46:
2250 	    case 0x47:
2251 	    case 0x48:
2252 	    case 0x49:
2253 	    case 0x4a:
2254 	    case 0x4b:
2255 	    case 0x4c:
2256 	    case 0x4d:
2257 	    case 0x4e:
2258 	    case 0x4f:
2259 	      B = (opcode & 0x1) ? 8 : 0;
2260 	      X = (opcode & 0x2) ? 8 : 0;
2261 	      R = (opcode & 0x4) ? 8 : 0;
2262 	      if (opcode & 0x8)  /* 64 bit operand size */
2263 		v = 8;
2264 	      opcode = *cur->pc++;
2265 	      done = 1;
2266 	      break;
2267 #endif
2268 	    default:
2269 	      done = 1;
2270 	      break;
2271 	    }
2272 	}
2273       int z = (v == 8) ? 4 : v;
2274       switch (opcode)
2275 	{
2276 	case 0x0: /* add Eb,Gb */
2277 	case 0x01: /* add Ev,Gv */
2278 	case 0x02: /* add Gb,Eb */
2279 	case 0x03: /* add Gv,Ev */
2280 	  cur->pc = check_modrm (cur->pc);
2281 	  break;
2282 	case 0x04: /* add %al,Ib */
2283 	  cur->pc += 1;
2284 	  break;
2285 	case 0x05: /* add %eax,Iz */
2286 	  cur->pc += z;
2287 	  break;
2288 	case 0x06: /* push es */
2289 	  cur->sp -= 1;
2290 	  break;
2291 	case 0x07: /* pop es */
2292 	  cur->sp += 1;
2293 	  if (cur->sp - RED_ZONE > cur->sp_safe)
2294 	    cur->sp_safe = cur->sp - RED_ZONE;
2295 	  break;
2296 	case 0x08: /* or Eb,Gb */
2297 	case 0x09: /* or Ev,Gv */
2298 	case 0x0a: /* or Gb,Eb */
2299 	case 0x0b: /* or Gv,Ev */
2300 	  cur->pc = check_modrm (cur->pc);
2301 	  break;
2302 	case 0x0c: /* or %al,Ib */
2303 	  cur->pc += 1;
2304 	  break;
2305 	case 0x0d: /* or %eax,Iz */
2306 	  cur->pc += z;
2307 	  break;
2308 	case 0x0e: /* push cs */
2309 	  cur->sp -= 1;
2310 	  break;
2311 	case 0x0f: /* two-byte opcodes */
2312 	  extop = *cur->pc++;
2313 	  switch (extop)
2314 	    { /* RTM or HLE */
2315 	    case 0x01:
2316 	      extop2 = *cur->pc;
2317 	      switch (extop2)
2318 		{
2319 		case 0xd5: /* xend */
2320 		case 0xd6: /* xtest */
2321 		  cur->pc++;
2322 		  break;
2323 		default:
2324 		  break;
2325 		}
2326 	      break;
2327 	    case 0x03:
2328 	      cur->pc = check_modrm (cur->pc);
2329 	      break;
2330 	    case 0x0b:
2331 	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, undefined instruction. opcode=0x%02x\n",
2332 		       __LINE__, (int) opcode);
2333 	      DELETE_CURCTX ();
2334 	      break;
2335 	    case 0x05: /* syscall */
2336 	    case 0x34: /* sysenter */
2337 	      if (cur->rax == __NR_exit)
2338 		{
2339 		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n",
2340 			   __LINE__, (int) opcode);
2341 		  DELETE_CURCTX ();
2342 		  break;
2343 		}
2344 	      else if (cur->rax == __NR_rt_sigreturn)
2345 		{
2346 		  if (jmp_reg_switch_mode == 1)
2347 		    {
2348 		      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0x%02x\n",
2349 			       __LINE__, (int) opcode);
2350 		      goto checkFP;
2351 		    }
2352 		  wctx->sp = (unsigned long) cur->sp;
2353 		  if (save_ctx)
2354 		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN);
2355 		  return RA_RT_SIGRETURN;
2356 		}
2357 #if WSIZE(32)
2358 	      else if (cur->rax == __NR_sigreturn)
2359 		{
2360 		  if (jmp_reg_switch_mode == 1)
2361 		    {
2362 		      DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0x34\n");
2363 		      goto checkFP;
2364 		    }
2365 		  wctx->sp = (unsigned long) cur->sp;
2366 		  if (save_ctx)
2367 		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN);
2368 		  return RA_SIGRETURN;
2369 		}
2370 #endif
2371 	      /* Check for Linus' trick in the vsyscall page */
2372 	      while (*cur->pc == 0x90)  /* nop */
2373 		cur->pc++;
2374 	      if (*cur->pc == 0xeb)  /* jmp imm8 */
2375 		cur->pc += 2;
2376 	      break;
2377 	    case 0x0d: /* nop Ev */
2378 	      cur->pc = check_modrm (cur->pc);
2379 	      break;
2380 	    case 0x10: /* xmm Vq,Wq */
2381 	    case 0x11:
2382 	    case 0x12:
2383 	    case 0x13:
2384 	    case 0x14:
2385 	    case 0x15:
2386 	    case 0x16:
2387 	    case 0x17:
2388 	      cur->pc = check_modrm (cur->pc);
2389 	      break;
2390 	    case 0x18: /* prefetch */
2391 	      cur->pc = check_modrm (cur->pc);
2392 	      break;
2393 	    case 0x1E: /* endbr64/endbr32 (f3 0f 1e .. ) is parsing as repz nop edx */
2394 	      cur->pc += 2;
2395 	      break;
2396 	    case 0x1f: /* nop Ev */
2397 	      cur->pc = check_modrm (cur->pc);
2398 	      break;
2399 	    case 0x28: /* xmm Vq,Wq */
2400 	    case 0x29:
2401 	    case 0x2a:
2402 	    case 0x2b:
2403 	    case 0x2c:
2404 	    case 0x2d:
2405 	    case 0x2e:
2406 	    case 0x2f:
2407 	      cur->pc = check_modrm (cur->pc);
2408 	      break;
2409 	    case 0x30: /* wrmsr */
2410 	    case 0x31: /* rdtsc */
2411 	    case 0x32: /* rdmsr */
2412 	    case 0x33: /* rdpmc */
2413 	      break;
2414 	      /* case 0x34: sysenter (see above) */
2415 	    case 0x38: case 0x3a:
2416 	      extop2 = *cur->pc++;
2417 	      cur->pc = check_modrm (cur->pc);
2418 	      // 21275311 Unwind failure in native stack for java application running on jdk8
2419 	      // Three-byte opcodes "66 0f 3a ??" should consume an additional "immediate" byte.
2420 	      if (extop == 0x3a)
2421 		cur->pc++;
2422 	      break;
2423 	    case 0x40: case 0x41: case 0x42: case 0x43: /* CMOVcc Gv,Ev */
2424 	    case 0x44: case 0x45: case 0x46: case 0x47:
2425 	    case 0x48: case 0x49: case 0x4a: case 0x4b:
2426 	    case 0x4c: case 0x4d: case 0x4e: case 0x4f:
2427 	      cur->pc = check_modrm (cur->pc);
2428 	      break;
2429 	    case 0x50: case 0x51: case 0x52: case 0x53:
2430 	    case 0x54: case 0x55: case 0x56: case 0x57:
2431 	    case 0x58: case 0x59: case 0x5a: case 0x5b:
2432 	    case 0x5c: case 0x5d: case 0x5e: case 0x5f:
2433 	    case 0x60: case 0x61: case 0x62: case 0x63:
2434 	    case 0x64: case 0x65: case 0x66: case 0x67:
2435 	    case 0x68: case 0x69: case 0x6a: case 0x6b:
2436 	    case 0x6c: case 0x6d: case 0x6e: case 0x6f:
2437 	      cur->pc = check_modrm (cur->pc);
2438 	      break;
2439 	    case 0x70: case 0x71: case 0x72: case 0x73:
2440 	      cur->pc = check_modrm (cur->pc) + 1;
2441 	      break;
2442 	    case 0x74: case 0x75: case 0x76:
2443 	      cur->pc = check_modrm (cur->pc);
2444 	      break;
2445 	    case 0x77:
2446 	      break;
2447 	    case 0x7c: case 0x7d: case 0x7e: case 0x7f:
2448 	      cur->pc = check_modrm (cur->pc);
2449 	      break;
2450 	    case 0x80: case 0x81: case 0x82: case 0x83: /* Jcc Jz */
2451 	    case 0x84: case 0x85: case 0x86: case 0x87:
2452 	    case 0x88: case 0x89: case 0x8a: case 0x8b:
2453 	    case 0x8c: case 0x8d: case 0x8e: case 0x8f:
2454 	      immv = read_int (cur->pc, z);
2455 	      cur->pc += z;
2456 	      if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
2457 		{
2458 		  int tidx = 0;
2459 		  unsigned char *npc = cur->pc + immv;
2460 		  if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend)
2461 		    {
2462 		      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n",
2463 			       __LINE__, (int) opcode);
2464 		      DELETE_CURCTX ();
2465 		      break;
2466 		    }
2467 		  if (is_after_ret (npc))
2468 		    break;
2469 		  while (npc > targets[tidx])
2470 		    tidx += 1;
2471 		  if (npc != targets[tidx])
2472 		    {
2473 		      if (ntrg < MAXTRGTS)
2474 			{
2475 			  for (int i = 0; i < nctx; i++)
2476 			    if (buf[i].tidx >= tidx)
2477 			      buf[i].tidx++;
2478 
2479 			  /* insert a new target */
2480 			  for (int i = ntrg; i > tidx; i--)
2481 			    targets[i] = targets[i - 1];
2482 			  ntrg += 1;
2483 			  targets[tidx++] = npc;
2484 			}
2485 		      else
2486 			DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg=max(%d)\n",
2487 				  __LINE__, ntrg);
2488 		      struct AdvWalkContext *new = buf + nctx;
2489 		      nctx += 1;
2490 		      __collector_memcpy (new, cur, sizeof (*new));
2491 		      new->pc = npc;
2492 		      new->tidx = tidx;
2493 		      cur = new; /* advance the new context first */
2494 		      continue;
2495 		    }
2496 		}
2497 	      else
2498 		DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx=max(%d)\n",
2499 			  __LINE__, ntrg);
2500 	      break;
2501 	    case 0x90: case 0x91: case 0x92: case 0x93: /* setcc Eb */
2502 	    case 0x94: case 0x95: case 0x96: case 0x97:
2503 	    case 0x98: case 0x99: case 0x9a: case 0x9b:
2504 	    case 0x9c: case 0x9d: case 0x9e: case 0x9f:
2505 	      cur->pc = check_modrm (cur->pc);
2506 	      break;
2507 	    case 0xa0: /* push fs */
2508 	      cur->sp -= 1;
2509 	      break;
2510 	    case 0xa1: /* pop fs */
2511 	      cur->sp += 1;
2512 	      if (cur->sp - RED_ZONE > cur->sp_safe)
2513 		cur->sp_safe = cur->sp - RED_ZONE;
2514 	      break;
2515 	    case 0xa2: /* cpuid */
2516 	      break;
2517 	    case 0xa3: /* bt Ev,Gv */
2518 	      cur->pc = check_modrm (cur->pc);
2519 	      break;
2520 	    case 0xa4: /* shld Ev,Gv,Ib */
2521 	      cur->pc = check_modrm (cur->pc);
2522 	      cur->pc += 1;
2523 	      break;
2524 	    case 0xa5: /* shld Ev,Gv,%cl */
2525 	      cur->pc = check_modrm (cur->pc);
2526 	      break;
2527 	    case 0xa8: /* push gs */
2528 	      cur->sp -= 1;
2529 	      break;
2530 	    case 0xa9: /* pop gs */
2531 	      cur->sp += 1;
2532 	      if (cur->sp - RED_ZONE > cur->sp_safe)
2533 		cur->sp_safe = cur->sp - RED_ZONE;
2534 	      break;
2535 	    case 0xaa: /* rsm */
2536 	      break;
2537 	    case 0xab: /* bts Ev,Gv */
2538 	      cur->pc = check_modrm (cur->pc);
2539 	      break;
2540 	    case 0xac: /* shrd Ev,Gv,Ib */
2541 	      cur->pc = check_modrm (cur->pc);
2542 	      cur->pc += 1;
2543 	      break;
2544 	    case 0xad: /* shrd Ev,Gv,%cl */
2545 	      cur->pc = check_modrm (cur->pc);
2546 	      break;
2547 	    case 0xae: /* group15 */
2548 	      cur->pc = check_modrm (cur->pc);
2549 	      break;
2550 	    case 0xaf: /* imul Gv,Ev */
2551 	      cur->pc = check_modrm (cur->pc);
2552 	      break;
2553 	    case 0xb1: /* cmpxchg Ev,Gv */
2554 	      cur->pc = check_modrm (cur->pc);
2555 	      break;
2556 	    case 0xb3:
2557 	    case 0xb6: /* movzx Gv,Eb */
2558 	    case 0xb7: /* movzx Gv,Ew */
2559 	      cur->pc = check_modrm (cur->pc);
2560 	      break;
2561 	    case 0xba: /* group8 Ev,Ib */
2562 	      cur->pc = check_modrm (cur->pc);
2563 	      cur->pc += 1;
2564 	      break;
2565 	    case 0xbb: /* btc Ev,Gv */
2566 	    case 0xbc: /* bsf Gv,Ev */
2567 	    case 0xbd: /* bsr Gv,Ev */
2568 	      cur->pc = check_modrm (cur->pc);
2569 	      break;
2570 	    case 0xbe: /* movsx Gv,Eb */
2571 	    case 0xbf: /* movsx Gv,Ew */
2572 	      cur->pc = check_modrm (cur->pc);
2573 	      break;
2574 	    case 0xc0: /* xadd Eb,Gb */
2575 	    case 0xc1: /* xadd Ev,Gv */
2576 	      cur->pc = check_modrm (cur->pc);
2577 	      break;
2578 	    case 0xc2: /* cmpps V,W,Ib */
2579 	      cur->pc = check_modrm (cur->pc);
2580 	      cur->pc += 1;
2581 	      break;
2582 	    case 0xc3: /* movnti M,G */
2583 	      cur->pc = check_modrm (cur->pc);
2584 	      break;
2585 	    case 0xc6: /* shufps V,W,Ib */
2586 	      cur->pc = check_modrm (cur->pc);
2587 	      cur->pc += 1;
2588 	      break;
2589 	    case 0xc7: /* RDRAND */
2590 	      cur->pc = check_modrm (cur->pc);
2591 	      break;
2592 	    case 0xc8: case 0xc9: case 0xca: case 0xcb: /* bswap */
2593 	    case 0xcc: case 0xcd: case 0xce: case 0xcf:
2594 	      break;
2595 	    case 0xd0: case 0xd1: case 0xd2: case 0xd3:
2596 	    case 0xd4: case 0xd5: case 0xd6: case 0xd7:
2597 	    case 0xd8: case 0xd9: case 0xda: case 0xdb:
2598 	    case 0xdc: case 0xdd: case 0xde: case 0xdf:
2599 	    case 0xe0: case 0xe1: case 0xe2: case 0xe3:
2600 	    case 0xe4: case 0xe5: case 0xe6: case 0xe7:
2601 	    case 0xe8: case 0xe9: case 0xea: case 0xeb:
2602 	    case 0xec: case 0xed: case 0xee: case 0xef:
2603 	    case 0xf0: case 0xf1: case 0xf2: case 0xf3:
2604 	    case 0xf4: case 0xf5: case 0xf6: case 0xf7:
2605 	    case 0xf8: case 0xf9: case 0xfa: case 0xfb:
2606 	    case 0xfc: case 0xfd: case 0xfe: case 0xff:
2607 	      cur->pc = check_modrm (cur->pc);
2608 	      break;
2609 	    default:
2610 	      if (jmp_reg_switch_mode == 1 && extop == 0x0b)
2611 		DprintfT (SP_DUMP_UNWIND, "unwind.c:%d invalid opcode ub2: 0x0f %x jmp_reg_switch_mode=%d\n",
2612 			  __LINE__, (int) extop, jmp_reg_switch_mode);
2613 	      else
2614 		{
2615 		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x0f %x jmp_reg_switch_mode=%d\n",
2616 			    __LINE__, (int) extop, jmp_reg_switch_mode);
2617 		  DELETE_CURCTX ();
2618 		}
2619 	      break;
2620 	    }
2621 	  break;
2622 	case 0x10: /* adc Eb,Gb */
2623 	case 0x11: /* adc Ev,Gv */
2624 	case 0x12: /* adc Gb,Eb */
2625 	case 0x13: /* adc Gv,Ev */
2626 	  cur->pc = check_modrm (cur->pc);
2627 	  break;
2628 	case 0x14: /* adc %al,Ib */
2629 	  cur->pc += 1;
2630 	  break;
2631 	case 0x15: /* adc %eax,Iz */
2632 	  cur->pc += z;
2633 	  break;
2634 	case 0x16: /* push ss */
2635 	  cur->sp -= 1;
2636 	  break;
2637 	case 0x17: /* pop ss */
2638 	  cur->sp += 1;
2639 	  if (cur->sp - RED_ZONE > cur->sp_safe)
2640 	    cur->sp_safe = cur->sp - RED_ZONE;
2641 	  break;
2642 	case 0x18: /* sbb Eb,Gb */
2643 	case 0x19: /* sbb Ev,Gv */
2644 	case 0x1a: /* sbb Gb,Eb */
2645 	case 0x1b: /* sbb Gv,Ev */
2646 	  cur->pc = check_modrm (cur->pc);
2647 	  break;
2648 	case 0x1c: /* sbb %al,Ib */
2649 	  cur->pc += 1;
2650 	  break;
2651 	case 0x1d: /* sbb %eax,Iz */
2652 	  cur->pc += z;
2653 	  break;
2654 	case 0x1e: /* push ds */
2655 	  cur->sp -= 1;
2656 	  break;
2657 	case 0x1f: /* pop ds */
2658 	  cur->sp += 1;
2659 	  if (cur->sp - RED_ZONE > cur->sp_safe)
2660 	    cur->sp_safe = cur->sp - RED_ZONE;
2661 	  break;
2662 	case 0x20: /* and Eb,Gb */
2663 	case 0x21: /* and Ev,Gv */
2664 	case 0x22: /* and Gb,Eb */
2665 	case 0x23: /* and Gv,Ev */
2666 	  cur->pc = check_modrm (cur->pc);
2667 	  break;
2668 	case 0x24: /* and %al,Ib */
2669 	  cur->pc += 1;
2670 	  break;
2671 	case 0x25: /* and %eax,Iz */
2672 	  cur->pc += z;
2673 	  break;
2674 	case 0x26: /* seg=es prefix */
2675 	  break;
2676 	case 0x27: /* daa */
2677 	  break;
2678 	case 0x28: /* sub Eb,Gb */
2679 	case 0x29: /* sub Ev,Gv */
2680 	case 0x2a: /* sub Gb,Eb */
2681 	case 0x2b: /* sub Gv,Ev */
2682 	  cur->pc = check_modrm (cur->pc);
2683 	  break;
2684 	case 0x2c: /* sub %al,Ib */
2685 	  cur->pc += 1;
2686 	  break;
2687 	case 0x2d: /* sub %eax,Iz */
2688 	  cur->pc += z;
2689 	  break;
2690 	case 0x2e: /* seg=cs prefix */
2691 	  break;
2692 	case 0x2f: /* das */
2693 	  break;
2694 	case 0x30: /* xor Eb,Gb */
2695 	case 0x31: /* xor Ev,Gv */
2696 	case 0x32: /* xor Gb,Eb */
2697 	case 0x33: /* xor Gv,Ev */
2698 	  cur->pc = check_modrm (cur->pc);
2699 	  break;
2700 	case 0x34: /* xor %al,Ib */
2701 	  cur->pc += 1;
2702 	  break;
2703 	case 0x35: /* xor %eax,Iz */
2704 	  cur->pc += z;
2705 	  break;
2706 	case 0x36: /* seg=ss prefix */
2707 	  break;
2708 	case 0x37: /* aaa */
2709 	  break;
2710 	case 0x38: /* cmp Eb,Gb */
2711 	case 0x39: /* cmp Ev,Gv */
2712 	case 0x3a: /* cmp Gb,Eb */
2713 	case 0x3b: /* cmp Gv,Ev */
2714 	  cur->pc = check_modrm (cur->pc);
2715 	  break;
2716 	case 0x3c: /* cmp %al,Ib */
2717 	  cur->pc += 1;
2718 	  break;
2719 	case 0x3d: /* cmp %eax,Iz */
2720 	  cur->pc += z;
2721 	  break;
2722 	case 0x3e: /* seg=ds prefix */
2723 	  break;
2724 	case 0x3f: /* aas */
2725 	  break;
2726 #if WSIZE(32)
2727 	case 0x40: /* inc %eax */
2728 	case 0x41: /* inc %ecx */
2729 	case 0x42: /* inc %edx */
2730 	case 0x43: /* inc %ebx */
2731 	  break;
2732 	case 0x44: /* inc %esp */
2733 	  /* Can't be a valid stack pointer - delete context */
2734 	  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x44.\n", __LINE__);
2735 	  DELETE_CURCTX ();
2736 	  break;
2737 	case 0x45: /* inc %ebp */
2738 	case 0x46: /* inc %esi */
2739 	case 0x47: /* inc %edi */
2740 	case 0x48: /* dec %eax */
2741 	case 0x49: /* dec %ecx */
2742 	case 0x4a: /* dec %edx */
2743 	case 0x4b: /* dec %ebx */
2744 	  break;
2745 	case 0x4c: /* dec %esp */
2746 	  /* Can't be a valid stack pointer - delete context */
2747 	  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x4c.\n", __LINE__);
2748 	  DELETE_CURCTX ();
2749 	  break;
2750 	case 0x4d: /* dec %ebp */
2751 	case 0x4e: /* dec %esi */
2752 	case 0x4f: /* dec %edi */
2753 	  break;
2754 #endif
2755 	case 0x50: /* push %eax */
2756 	case 0x51: /* push %ecx */
2757 	case 0x52: /* push %edx */
2758 	case 0x53: /* push %ebx */
2759 	case 0x54: /* push %esp */
2760 	case 0x55: /* push %ebp */
2761 	case 0x56: /* push %esi */
2762 	case 0x57: /* push %edi */
2763 	  cur->sp -= 1;
2764 	  reg = OPC_REG (opcode);
2765 	  if (reg == RBP)
2766 	    {
2767 #if 0
2768 	      /* Don't do this check yet. Affects tail calls. */
2769 	      /* avoid other function's prologue */
2770 	      if ((cur->pc[0] == 0x89 && cur->pc[1] == 0xe5) ||
2771 		  (cur->pc[0] == 0x8b && cur->pc[1] == 0xec))
2772 		{
2773 		  /* mov %esp,%ebp */
2774 		  DELETE_CURCTX ();
2775 		  break;
2776 		}
2777 #endif
2778 	      if (cur->fp_loc == NULL)
2779 		{
2780 		  cur->fp_loc = cur->sp;
2781 		  cur->fp_sav = cur->fp;
2782 		}
2783 	    }
2784 	  break;
2785 	case 0x58: /* pop %eax */
2786 	case 0x59: /* pop %ecx */
2787 	case 0x5a: /* pop %edx */
2788 	case 0x5b: /* pop %ebx */
2789 	case 0x5c: /* pop %esp */
2790 	case 0x5d: /* pop %ebp */
2791 	case 0x5e: /* pop %esi */
2792 	case 0x5f: /* pop %edi */
2793 	  reg = OPC_REG (opcode);
2794 	  cur->regs[reg] = 0;
2795 	  if (isInside ((unsigned long) cur->sp, (unsigned long) cur->sp_safe, wctx->sbase))
2796 	    cur->regs[reg] = *cur->sp;
2797 	  DprintfT (SP_DUMP_UNWIND, "stack_unwind:%d cur->regs[%d]=0x%lx\n",
2798 		   __LINE__, reg, (unsigned long) cur->regs[reg]);
2799 	  if (reg == RDX)
2800 	    {
2801 	      if (cur->sp >= cur->sp_safe &&
2802 		  (unsigned long) cur->sp < wctx->sbase)
2803 		cur->rdx = *cur->sp;
2804 	    }
2805 	  else if (reg == RBP)
2806 	    {
2807 	      if (cur->fp_loc == cur->sp)
2808 		{
2809 		  cur->fp = cur->fp_sav;
2810 		  cur->fp_loc = NULL;
2811 		}
2812 	      else if (cur->sp >= cur->sp_safe &&
2813 		       (unsigned long) cur->sp < wctx->sbase)
2814 		cur->fp = (unsigned long*) (*cur->sp);
2815 	    }
2816 	  else if (reg == RSP)
2817 	    {
2818 	      /* f.e. JVM I2CAdapter */
2819 	      if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase)
2820 		{
2821 		  unsigned long *nsp = (unsigned long*) (*cur->sp);
2822 		  if (nsp >= cur->sp && nsp <= cur->fp)
2823 		    {
2824 		      cur->sp = nsp;
2825 		    }
2826 		  else
2827 		    {
2828 		      DprintfT (SP_DUMP_UNWIND, "stack_unwind%d give up return address, opcode=0x%02x\n",
2829 			       __LINE__, opcode);
2830 		      goto checkFP;
2831 		    }
2832 		}
2833 	      else
2834 		{
2835 		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode=0x%02x\n",
2836 			    __LINE__, opcode);
2837 		  goto checkFP;
2838 		}
2839 	      break;
2840 	    }
2841 	  cur->sp += 1;
2842 	  if (cur->sp - RED_ZONE > cur->sp_safe)
2843 	    {
2844 	      cur->sp_safe = cur->sp - RED_ZONE;
2845 	    }
2846 	  break;
2847 	case 0x60: /* pusha(d) */
2848 	  cur->sp -= 8;
2849 	  break;
2850 	case 0x61: /* popa(d) */
2851 	  cur->sp += 8;
2852 	  if (cur->sp - RED_ZONE > cur->sp_safe)
2853 	    cur->sp_safe = cur->sp - RED_ZONE;
2854 	  break;
2855 	case 0x62: /* group AVX, 4-bytes EVEX prefix */
2856 	  {
2857 	    unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
2858 	    int len = parse_x86_AVX_instruction (pc);
2859 	    if (len < 4)
2860 	      {
2861 		DELETE_CURCTX ();
2862 	      }
2863 	    else
2864 	      {
2865 		pc += len;
2866 		cur->pc = pc;
2867 	      }
2868 	  }
2869 	  break;
2870 	case 0x63: /* arpl Ew,Gw (32) movsxd Gv,Ev (64)*/
2871 	  cur->pc = check_modrm (cur->pc);
2872 	  break;
2873 	case 0x64: /* seg=fs prefix */
2874 	case 0x65: /* seg=gs prefix */
2875 	  break;
2876 	case 0x66: /* opd size override */
2877 	case 0x67: /* addr size override */
2878 	  break;
2879 	case 0x68: /* push Iz */
2880 	  cur->sp = (unsigned long*) ((long) cur->sp - z);
2881 	  cur->pc += z;
2882 	  break;
2883 	case 0x69: /* imul Gv,Ev,Iz */
2884 	  cur->pc = check_modrm (cur->pc);
2885 	  cur->pc += z;
2886 	  break;
2887 	case 0x6a: /* push Ib */
2888 	  cur->sp = (unsigned long*) ((long) cur->sp - v);
2889 	  cur->pc += 1;
2890 	  break;
2891 	case 0x6b: /* imul Gv,Ev,Ib */
2892 	  cur->pc = check_modrm (cur->pc);
2893 	  cur->pc += 1;
2894 	  break;
2895 	case 0x6c: case 0x6d: case 0x6e: case 0x6f:
2896 	  cur->pc = check_modrm (cur->pc);
2897 	  break;
2898 	case 0x70: /* jo Jb */
2899 	case 0x71: /* jno Jb */
2900 	case 0x72: /* jb Jb */
2901 	case 0x73: /* jnb Jb */
2902 	case 0x74: /* jz Jb */
2903 	case 0x75: /* jnz Jb */
2904 	case 0x76: /* jna Jb */
2905 	case 0x77: /* ja Jb */
2906 	case 0x78: /* js Jb */
2907 	case 0x79: /* jns Jb */
2908 	case 0x7a: /* jp Jb */
2909 	case 0x7b: /* jnp Jb */
2910 	case 0x7c: /* jl Jb */
2911 	case 0x7d: /* jge Jb */
2912 	case 0x7e: /* jle Jb */
2913 	case 0x7f: /* jg Jb */
2914 	  imm8 = *(char*) cur->pc++;
2915 	  if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
2916 	    {
2917 	      int tidx = 0;
2918 	      unsigned char *npc = cur->pc + imm8;
2919 	      if (is_after_ret (npc))
2920 		break;
2921 	      while (npc > targets[tidx])
2922 		tidx += 1;
2923 	      if (npc != targets[tidx])
2924 		{
2925 		  if (ntrg < MAXTRGTS)
2926 		    {
2927 		      for (int i = 0; i < nctx; i++)
2928 			if (buf[i].tidx >= tidx)
2929 			  buf[i].tidx++;
2930 
2931 		      /* insert a new target */
2932 		      for (int i = ntrg; i > tidx; i--)
2933 			targets[i] = targets[i - 1];
2934 		      ntrg += 1;
2935 		      targets[tidx++] = npc;
2936 		    }
2937 		  else
2938 		    DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg(%d)=max\n", __LINE__, ntrg);
2939 		  struct AdvWalkContext *new = buf + nctx;
2940 		  nctx += 1;
2941 		  __collector_memcpy (new, cur, sizeof (*new));
2942 		  new->pc = npc;
2943 		  new->tidx = tidx;
2944 		  cur = new; /* advance the new context first */
2945 		  continue;
2946 		}
2947 	    }
2948 	  else
2949 	    DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx(%d)=max\n", __LINE__, nctx);
2950 	  break;
2951 	case 0x80: /* group1 Eb,Ib */
2952 	  cur->pc = check_modrm (cur->pc);
2953 	  cur->pc += 1;
2954 	  break;
2955 	case 0x81: /* group1 Ev,Iz */
2956 	  modrm = *cur->pc;
2957 	  if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RSP)
2958 	    {
2959 	      int immz = read_int (cur->pc + 1, z);
2960 	      extop = MRM_EXT (modrm);
2961 	      if (extop == 0) /* add  imm32,%esp */
2962 		cur->sp = (unsigned long*) ((long) cur->sp + immz);
2963 	      else if (extop == 4) /* and imm32,%esp */
2964 		cur->sp = (unsigned long*) ((long) cur->sp & immz);
2965 	      else if (extop == 5) /* sub imm32,%esp */
2966 		cur->sp = (unsigned long*) ((long) cur->sp - immz);
2967 	      if (cur->sp - RED_ZONE > cur->sp_safe)
2968 		cur->sp_safe = cur->sp - RED_ZONE;
2969 	    }
2970 	  cur->pc = check_modrm (cur->pc);
2971 	  cur->pc += z;
2972 	  break;
2973 	case 0x82: /* group1 Eb,Ib */
2974 	  cur->pc = check_modrm (cur->pc);
2975 	  cur->pc += 1;
2976 	  break;
2977 	case 0x83: /* group1 Ev,Ib */
2978 	  modrm = *cur->pc;
2979 	  if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RSP)
2980 	    {
2981 	      imm8 = (char) cur->pc[1]; /* sign extension */
2982 	      extop = MRM_EXT (modrm);
2983 	      if (extop == 0) /* add  imm8,%esp */
2984 		cur->sp = (unsigned long*) ((long) cur->sp + imm8);
2985 	      else if (extop == 4) /* and imm8,%esp */
2986 		  cur->sp = (unsigned long*) ((long) cur->sp & imm8);
2987 	      else if (extop == 5) /* sub imm8,%esp */
2988 		cur->sp = (unsigned long*) ((long) cur->sp - imm8);
2989 	      if (cur->sp - RED_ZONE > cur->sp_safe)
2990 		cur->sp_safe = cur->sp - RED_ZONE;
2991 	    }
2992 	  cur->pc = check_modrm (cur->pc);
2993 	  cur->pc += 1;
2994 	  break;
2995 	case 0x84: /* test Eb,Gb */
2996 	case 0x85: /* test Ev,Gv */
2997 	case 0x86: /* xchg Eb,Gb */
2998 	case 0x87: /* xchg Ev,Gv */
2999 	  cur->pc = check_modrm (cur->pc);
3000 	  break;
3001 	case 0x88: /* mov Eb,Gb */
3002 	  cur->pc = check_modrm (cur->pc);
3003 	  break;
3004 	case 0x89: /* mov Ev,Gv */
3005 	  modrm = *cur->pc;
3006 	  if (MRM_MOD (modrm) == 0xc0)
3007 	    {
3008 	      if (MRM_REGS (modrm) == RBP && MRM_REGD (modrm) == RSP)
3009 		/* movl %esp,%ebp */
3010 		cur->fp = cur->sp;
3011 	      else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3012 		{ /* mov %ebp,%esp */
3013 		  cur->sp = cur->fp;
3014 		  if (cur->sp - RED_ZONE > cur->sp_safe)
3015 		    cur->sp_safe = cur->sp - RED_ZONE;
3016 		  if (wctx->fp == (unsigned long) cur->sp)
3017 		    cur->cval = RA_FROMFP;
3018 		}
3019 	    }
3020 	  else if (MRM_MOD (modrm) == 0x80)
3021 	    {
3022 	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3023 		{
3024 		  if (cur->pc[1] == 0x24)
3025 		    { /* mov %ebp,disp32(%esp) - JVM */
3026 		      immv = read_int (cur->pc + 2, 4);
3027 		      cur->fp_loc = (unsigned long*) ((char*) cur->sp + immv);
3028 		      cur->fp_sav = cur->fp;
3029 		    }
3030 		}
3031 	    }
3032 	  else if (MRM_MOD (modrm) == 0x40)
3033 	    {
3034 	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RDX)
3035 		{
3036 		  if (cur->pc[1] == 0x24 && cur->pc[2] == 0x0)
3037 		    { /* movl %edx,0(%esp) */
3038 		      cur->ra_loc = cur->sp;
3039 		      cur->ra_sav = cur->rdx;
3040 		    }
3041 		}
3042 	      else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3043 		{
3044 		  if (cur->pc[1] == 0x24)
3045 		    { /* mov %ebp,disp8(%esp) - JVM */
3046 		      imm8 = ((char*) (cur->pc))[2];
3047 		      cur->fp_loc = (unsigned long*) ((char*) cur->sp + imm8);
3048 		      cur->fp_sav = cur->fp;
3049 		    }
3050 		}
3051 	    }
3052 	  else if (MRM_MOD (modrm) == 0x0)
3053 	    {
3054 	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3055 		{
3056 		  if (cur->pc[1] == 0x24)
3057 		    { /* mov %ebp,(%esp) */
3058 		      cur->fp_loc = cur->sp;
3059 		      cur->fp_sav = cur->fp;
3060 		    }
3061 		}
3062 	      else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RDX)
3063 		{
3064 		  if (cur->pc[1] == 0x24)
3065 		    { /* movl %edx,(%esp) */
3066 		      cur->ra_loc = cur->sp;
3067 		      cur->ra_sav = cur->rdx;
3068 		    }
3069 		}
3070 	    }
3071 	  cur->pc = check_modrm (cur->pc);
3072 	  break;
3073 	case 0x8a: /* mov Gb,Eb */
3074 	  cur->pc = check_modrm (cur->pc);
3075 	  break;
3076 	case 0x8b: /* mov Gv,Ev */
3077 	  modrm = *cur->pc;
3078 	  if (MRM_MOD (modrm) == 0xc0)
3079 	    {
3080 	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3081 		/* mov %esp,%ebp */
3082 		cur->fp = cur->sp;
3083 	      else if (MRM_REGS (modrm) == RBP && MRM_REGD (modrm) == RSP)
3084 		{ /* mov %ebp,%esp */
3085 		  cur->sp = cur->fp;
3086 		  if (cur->sp - RED_ZONE > cur->sp_safe)
3087 		    cur->sp_safe = cur->sp - RED_ZONE;
3088 		  if (wctx->fp == (unsigned long) cur->sp)
3089 		    cur->cval = RA_FROMFP;
3090 		}
3091 	    }
3092 	  else if (MRM_MOD (modrm) == 0x80)
3093 	    {
3094 	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3095 		{
3096 		  if (cur->pc[1] == 0x24)
3097 		    { /* mov disp32(%esp),%ebp */
3098 		      immv = read_int (cur->pc + 2, 4);
3099 		      unsigned long *ptr = (unsigned long*) ((char*) cur->sp + immv);
3100 		      if (cur->fp_loc == ptr)
3101 			{
3102 			  cur->fp = cur->fp_sav;
3103 			  cur->fp_loc = NULL;
3104 			}
3105 		      else if (ptr >= cur->sp_safe && (unsigned long) ptr < wctx->sbase)
3106 			cur->fp = (unsigned long*) (*ptr);
3107 		    }
3108 		}
3109 	    }
3110 	  else if (MRM_MOD (modrm) == 0x40)
3111 	    {
3112 	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3113 		{
3114 		  if (cur->pc[1] == 0x24)
3115 		    { /* mov disp8(%esp),%ebp - JVM */
3116 		      imm8 = ((char*) (cur->pc))[2];
3117 		      unsigned long *ptr = (unsigned long*) ((char*) cur->sp + imm8);
3118 		      if (cur->fp_loc == ptr)
3119 			{
3120 			  cur->fp = cur->fp_sav;
3121 			  cur->fp_loc = NULL;
3122 			}
3123 		      else if (ptr >= cur->sp_safe && (unsigned long) ptr < wctx->sbase)
3124 			cur->fp = (unsigned long*) (*ptr);
3125 		    }
3126 		}
3127 	    }
3128 	  else if (MRM_MOD (modrm) == 0x0)
3129 	    {
3130 	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3131 		{
3132 		  if (cur->pc[1] == 0x24)
3133 		    { /* mov (%esp),%ebp */
3134 		      if (cur->fp_loc == cur->sp)
3135 			{
3136 			  cur->fp = cur->fp_sav;
3137 			  cur->fp_loc = NULL;
3138 			}
3139 		      else if (cur->sp >= cur->sp_safe &&
3140 			       (unsigned long) cur->sp < wctx->sbase)
3141 			cur->fp = (unsigned long*) *cur->sp;
3142 		    }
3143 		}
3144 	    }
3145 	  cur->pc = check_modrm (cur->pc);
3146 	  break;
3147 	case 0x8c: /* mov Mw,Sw */
3148 	  cur->pc = check_modrm (cur->pc);
3149 	  break;
3150 	case 0x8d: /* lea Gv,M */
3151 	  modrm = *cur->pc;
3152 	  if (MRM_REGD (modrm) == RSP)
3153 	    {
3154 	      unsigned char *pc = cur->pc;
3155 	      // Mez: need to use always regs[RSP/RBP] instead cur->sp(or fp):
3156 	      cur->regs[RSP] = (unsigned long) cur->sp;
3157 	      cur->regs[RBP] = (unsigned long) cur->fp;
3158 	      cur->pc++;
3159 	      int mod = (modrm >> 6) & 3;
3160 	      int r_m = modrm & 7;
3161 	      long val = 0;
3162 	      int undefRez = 0;
3163 	      if (mod == 0x3)
3164 		val = getRegVal (cur, MRM_REGS (modrm), &undefRez);
3165 	      else if (r_m == 4)
3166 		{ // SP or R12. Decode SIB-byte.
3167 		  int sib = *cur->pc++;
3168 		  int scale = 1 << (sib >> 6);
3169 		  int index = X | ((sib >> 3) & 7);
3170 		  int base = B | (sib & 7);
3171 		  if (mod == 0)
3172 		    {
3173 		      if ((base & 7) == 5)
3174 			{ // BP or R13
3175 			  if (index != 4) // SP
3176 			    val += getRegVal (cur, index, &undefRez) * scale;
3177 			  val += read_int (cur->pc, 4);
3178 			  cur->pc += 4;
3179 			}
3180 		      else
3181 			{
3182 			  val += getRegVal (cur, base, &undefRez);
3183 			  if (index != 4) // SP
3184 			    val += getRegVal (cur, index, &undefRez) * scale;
3185 			}
3186 		    }
3187 		  else
3188 		    {
3189 		      val += getRegVal (cur, base, &undefRez);
3190 		      if (index != 4) // SP
3191 			val += getRegVal (cur, index, &undefRez) * scale;
3192 		      if (mod == 1)
3193 			{
3194 			  val += read_int (cur->pc, 1);
3195 			  cur->pc++;
3196 			}
3197 		      else
3198 			{ // mod == 2
3199 			  val += read_int (cur->pc, 4);
3200 			  cur->pc += 4;
3201 			}
3202 		    }
3203 		}
3204 	      else if (mod == 0)
3205 		{
3206 		  if (r_m == 5)
3207 		    { // BP or R13
3208 		      val += read_int (cur->pc, 4);
3209 		      cur->pc += 4;
3210 		    }
3211 		  else
3212 		    val += getRegVal (cur, MRM_REGS (modrm), &undefRez);
3213 		}
3214 	      else
3215 		{ // mod == 1 || mod == 2
3216 		  val += getRegVal (cur, MRM_REGS (modrm), &undefRez);
3217 		  if (mod == 1)
3218 		    {
3219 		      val += read_int (cur->pc, 1);
3220 		      cur->pc++;
3221 		    }
3222 		  else
3223 		    { // mod == 2
3224 		      val += read_int (cur->pc, 4);
3225 		      cur->pc += 4;
3226 		    }
3227 		}
3228 	      if (undefRez)
3229 		{
3230 		  DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cannot calculate RSP. cur->pc=0x%lx val=0x%lx\n",
3231 			   __LINE__, (unsigned long) cur->pc, (unsigned long) val);
3232 		  goto checkFP;
3233 		}
3234 	      cur->regs[MRM_REGD (modrm)] = val;
3235 	      DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cur->pc=0x%lx val=0x%lx wctx->sp=0x%lx wctx->sbase=0x%lx\n",
3236 		       __LINE__, (unsigned long) cur->pc, (unsigned long) val,
3237 		       (unsigned long) wctx->sp, (unsigned long) wctx->sbase);
3238 	      if (cur->pc != check_modrm (pc))
3239 		DprintfT (SP_DUMP_UNWIND, "stack_unwind%d ERROR: cur->pc=0x%lx != check_modrm(0x%lx)=0x%lx\n",
3240 			 __LINE__, (unsigned long) cur->pc, (unsigned long) pc,
3241 			 (unsigned long) check_modrm (pc));
3242 	      if (MRM_REGD (modrm) == RSP)
3243 		{
3244 		  if (!isInside ((unsigned long) val, wctx->sp, wctx->sbase))
3245 		    {
3246 		      DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cannot calculate RSP. cur->pc=0x%lx opcode=0x%02x val=0x%lx wctx->sp=0x%lx wctx->sbase=0x%lx\n",
3247 			       __LINE__, (unsigned long) cur->pc, opcode, (unsigned long) val,
3248 			       (unsigned long) wctx->sp, (unsigned long) wctx->sbase);
3249 		      goto checkFP;
3250 		    }
3251 		  cur->sp = (unsigned long *) val;
3252 		  if (cur->sp - RED_ZONE > cur->sp_safe)
3253 		    cur->sp_safe = cur->sp - RED_ZONE;
3254 		}
3255 	    }
3256 	  else
3257 	    cur->pc = check_modrm (cur->pc);
3258 	  break;
3259 	case 0x8e: /* mov Sw,Ew */
3260 	  cur->pc = check_modrm (cur->pc);
3261 	  break;
3262 	case 0x8f: /* pop Ev */
3263 	  cur->pc = check_modrm (cur->pc);
3264 	  cur->sp += 1;
3265 	  if (cur->sp - RED_ZONE > cur->sp_safe)
3266 	    cur->sp_safe = cur->sp - RED_ZONE;
3267 	  break;
3268 	case 0x90: /* nop */
3269 	  break;
3270 	case 0x91: /* xchg %eax,%ecx */
3271 	case 0x92: /* xchg %eax,%edx */
3272 	case 0x93: /* xchg %eax,%ebx */
3273 	case 0x94: /* xchg %eax,%esp XXXX */
3274 	case 0x95: /* xchg %eax,%ebp XXXX */
3275 	case 0x96: /* xchg %eax,%esi */
3276 	case 0x97: /* xchg %eax,%edi */
3277 	  break;
3278 	case 0x98: /* cbw/cwde */
3279 	case 0x99: /* cwd/cwq */
3280 	  break;
3281 	case 0x9a: /* callf Ap */
3282 	  if (jmp_reg_switch_mode == 1)
3283 	    {
3284 	      struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3285 	      __collector_memcpy (tmpctx, cur, sizeof (*cur));
3286 	      int rc = process_return (wctx, tmpctx);
3287 	      if (rc != RA_FAILURE)
3288 		{
3289 		  if (save_ctx)
3290 		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3291 		  return rc;
3292 		}
3293 	    }
3294 	  cur->pc += 2 + a;
3295 	  break;
3296 	case 0x9b: /* fwait */
3297 	case 0x9c: /* pushf Fv */
3298 	case 0x9d: /* popf Fv */
3299 	case 0x9e: /* sahf */
3300 	case 0x9f: /* lahf */
3301 	  break;
3302 	case 0xa0: /* mov al,Ob */
3303 	case 0xa1: /* mov eax,Ov */
3304 	case 0xa2: /* mov Ob,al */
3305 	case 0xa3: /* mov Ov,eax */
3306 	  cur->pc += a;
3307 	  break;
3308 	case 0xa4: /* movsb Yb,Xb */
3309 	case 0xa5: /* movsd Yv,Xv */
3310 	case 0xa6: /* cmpsb Yb,Xb */
3311 	case 0xa7: /* cmpsd Xv,Yv */
3312 	  break;
3313 	case 0xa8: /* test al,Ib */
3314 	  cur->pc += 1;
3315 	  break;
3316 	case 0xa9: /* test eax,Iz */
3317 	  cur->pc += z;
3318 	  break;
3319 	case 0xaa: /* stosb Yb,%al */
3320 	case 0xab: /* stosd Yv,%eax */
3321 	case 0xac: /* lodsb %al,Xb */
3322 	case 0xad: /* lodsd %eax,Xv */
3323 	case 0xae: /* scasb %al,Yb */
3324 	case 0xaf: /* scasd %eax,Yv */
3325 	  break;
3326 	case 0xb0: /* mov %al,Ib */
3327 	case 0xb1: /* mov %cl,Ib */
3328 	case 0xb2: /* mov %dl,Ib */
3329 	case 0xb3: /* mov %bl,Ib */
3330 	case 0xb4: /* mov %ah,Ib */
3331 	case 0xb5: /* mov %ch,Ib */
3332 	case 0xb6: /* mov %dh,Ib */
3333 	case 0xb7: /* mov %bh,Ib */
3334 	  cur->pc += 1;
3335 	  break;
3336 	case 0xb8: /* mov Iv,%eax */
3337 	case 0xb9: /* mov Iv,%ecx */
3338 	case 0xba: /* mov Iv,%edx */
3339 	case 0xbb: /* mov Iv,%ebx */
3340 	case 0xbc: /* mov Iv,%esp */
3341 	case 0xbd: /* mov Iv,%rbp */
3342 	case 0xbe: /* mov Iv,%esi */
3343 	case 0xbf: /* mov Iv,%edi */
3344 	  reg = OPC_REG (opcode);
3345 	  if (reg == RAX)
3346 	    cur->rax = read_int (cur->pc, v);
3347 	  cur->pc += v;
3348 	  break;
3349 	case 0xc0: /* group2 Eb,Ib */
3350 	case 0xc1: /* group2 Ev,Ib */
3351 	  cur->pc = check_modrm (cur->pc) + 1;
3352 	  break;
3353 	case 0xc2: /* ret Iw */
3354 	  /* In the dynamic linker we may see that
3355 	   * the actual return address is at sp+immv,
3356 	   * while sp points to the resolved address.
3357 	   */
3358 	  {
3359 	    immv = read_int (cur->pc, 2);
3360 	    int rc = process_return (wctx, cur);
3361 	    if (rc != RA_FAILURE)
3362 	      {
3363 		if (jmp_reg_switch_mode == 1)
3364 		  {
3365 		    DprintfT (SP_DUMP_UNWIND, "stack_unwind%d give up return address under jmp switch mode, opcode = 0xc2\n", __LINE__);
3366 		    goto checkFP;
3367 		  }
3368 		wctx->sp += immv;
3369 		if (save_ctx)
3370 		  omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3371 		return rc;
3372 	      }
3373 	    DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xc2.\n", __LINE__);
3374 	    DELETE_CURCTX ();
3375 	  }
3376 	  break;
3377 	case 0xc3: /* ret */
3378 	  {
3379 	    int rc = process_return (wctx, cur);
3380 	    if (rc != RA_FAILURE)
3381 	      {
3382 		if (save_ctx)
3383 		  omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3384 		return rc;
3385 	      }
3386 	    if (jmp_reg_switch_mode == 1)
3387 	      jmp_reg_switch_pc = cur->pc;
3388 	    DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xc3.\n", __LINE__);
3389 	    DELETE_CURCTX ();
3390 	  }
3391 	  break;
3392 	case 0xc4: /* group AVX, 3-bytes VEX prefix */
3393 	  {
3394 	    unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
3395 	    int len = parse_x86_AVX_instruction (pc);
3396 	    if (len < 3)
3397 	      DELETE_CURCTX ();
3398 	    else
3399 	      {
3400 		pc += len;
3401 		cur->pc = pc;
3402 	      }
3403 	  }
3404 	  break;
3405 	case 0xc5: /* group AVX, 2-bytes VEX prefix */
3406 	  {
3407 	    unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
3408 	    int len = parse_x86_AVX_instruction (pc);
3409 	    if (len < 2)
3410 	      DELETE_CURCTX ();
3411 	    else
3412 	      {
3413 		pc += len;
3414 		cur->pc = pc;
3415 	      }
3416 	  }
3417 	  break;
3418 	case 0xc6:
3419 	  modrm = *cur->pc;
3420 	  if (modrm == 0xf8) /* xabort */
3421 	    cur->pc += 2;
3422 	  else /* mov Eb,Ib */
3423 	    cur->pc = check_modrm (cur->pc) + 1;
3424 	  break;
3425 	case 0xc7:
3426 	  modrm = *cur->pc;
3427 	  if (modrm == 0xf8) /* xbegin */
3428 	    cur->pc += v + 1;
3429 	  else
3430 	    { /* mov Ev,Iz */
3431 	      extop = MRM_EXT (modrm);
3432 	      if (extop != 0)
3433 		{
3434 		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode = 0xc7\n", __LINE__);
3435 		  goto checkFP;
3436 		}
3437 	      if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RAX)
3438 		cur->rax = read_int (cur->pc + 1, z);
3439 	      cur->pc = check_modrm (cur->pc) + z;
3440 	    }
3441 	  break;
3442 	case 0xc8: /* enter Iw,Ib */
3443 	  cur->pc += 3;
3444 	  break;
3445 	case 0xc9: /* leave */
3446 	  /* mov %ebp,%esp */
3447 	  cur->sp = cur->fp;
3448 	  /* pop %ebp */
3449 	  if (cur->fp_loc == cur->sp)
3450 	    {
3451 	      cur->fp = cur->fp_sav;
3452 	      cur->fp_loc = NULL;
3453 	    }
3454 	  else if (cur->sp >= cur->sp_safe &&
3455 		   (unsigned long) cur->sp < wctx->sbase)
3456 	    {
3457 	      cur->fp = (unsigned long*) (*cur->sp);
3458 	      if (wctx->fp == (unsigned long) cur->sp)
3459 		cur->cval = RA_FROMFP;
3460 	    }
3461 	  cur->sp += 1;
3462 	  if (cur->sp - RED_ZONE > cur->sp_safe)
3463 	    cur->sp_safe = cur->sp - RED_ZONE;
3464 	  break;
3465 	case 0xca: /* retf Iw */
3466 	  cur->pc += 2; /* XXXX process return */
3467 	  break;
3468 	case 0xcb: /* retf */
3469 	  break; /* XXXX process return */
3470 	case 0xcc: /* int 3 */
3471 	  break;
3472 	case 0xcd: /* int Ib */
3473 	  if (*cur->pc == 0x80)
3474 	    {
3475 	      if (cur->rax == __NR_exit)
3476 		{
3477 		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xcd.\n", __LINE__);
3478 		  DELETE_CURCTX ();
3479 		  break;
3480 		}
3481 	      else if (cur->rax == __NR_rt_sigreturn)
3482 		{
3483 		  if (jmp_reg_switch_mode == 1)
3484 		    {
3485 		      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0xcd\n",
3486 				__LINE__);
3487 		      goto checkFP;
3488 		    }
3489 		  wctx->sp = (unsigned long) cur->sp;
3490 		  if (save_ctx)
3491 		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN);
3492 		  return RA_RT_SIGRETURN;
3493 		}
3494 #if WSIZE(32)
3495 	      else if (cur->rax == __NR_sigreturn)
3496 		{
3497 		  if (jmp_reg_switch_mode == 1)
3498 		    {
3499 		      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode = 0xc2\n",
3500 				__LINE__);
3501 		      goto checkFP;
3502 		    }
3503 		  wctx->sp = (unsigned long) cur->sp;
3504 		  if (save_ctx)
3505 		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN);
3506 		  return RA_SIGRETURN;
3507 		}
3508 #endif
3509 	    }
3510 	  cur->pc += 1;
3511 	  break;
3512 	case 0xce: /* into */
3513 	case 0xcf: /* iret */
3514 	  break;
3515 	case 0xd0: /* shift group2 Eb,1 */
3516 	case 0xd1: /* shift group2 Ev,1 */
3517 	case 0xd2: /* shift group2 Eb,%cl */
3518 	case 0xd3: /* shift group2 Ev,%cl */
3519 	  cur->pc = check_modrm (cur->pc);
3520 	  break;
3521 	case 0xd4: /* aam Ib */
3522 	  cur->pc += 1;
3523 	  break;
3524 	case 0xd5: /* aad Ib */
3525 	  cur->pc += 1;
3526 	  break;
3527 	case 0xd6: /* falc? */
3528 	  break;
3529 	case 0xd7:
3530 	  cur->pc = check_modrm (cur->pc);
3531 	  cur->pc++;
3532 	  break;
3533 	case 0xd8: /* esc instructions */
3534 	case 0xd9:
3535 	case 0xda:
3536 	case 0xdb:
3537 	case 0xdc:
3538 	case 0xdd:
3539 	case 0xde:
3540 	case 0xdf:
3541 	  cur->pc = check_modrm (cur->pc);
3542 	  break;
3543 	case 0xe0: /* loopne Jb */
3544 	case 0xe1: /* loope Jb */
3545 	case 0xe2: /* loop Jb */
3546 	case 0xe3: /* jcxz Jb */
3547 	  imm8 = *(char*) cur->pc++;
3548 	  if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
3549 	    {
3550 	      int tidx = 0;
3551 	      unsigned char *npc = cur->pc + imm8;
3552 	      if (is_after_ret (npc))
3553 		break;
3554 	      while (npc > targets[tidx])
3555 		tidx += 1;
3556 	      if (npc != targets[tidx])
3557 		{
3558 		  if (ntrg < MAXTRGTS)
3559 		    {
3560 		      for (int i = 0; i < nctx; i++)
3561 			if (buf[i].tidx >= tidx)
3562 			  buf[i].tidx++;
3563 		      /* insert a new target */
3564 		      for (int i = ntrg; i > tidx; i--)
3565 			targets[i] = targets[i - 1];
3566 		      ntrg += 1;
3567 		      targets[tidx++] = npc;
3568 		    }
3569 		  else
3570 		    DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3571 		  struct AdvWalkContext *new = buf + nctx;
3572 		  nctx += 1;
3573 		  __collector_memcpy (new, cur, sizeof (*new));
3574 		  new->pc = npc;
3575 		  new->tidx = tidx;
3576 		  cur = new; /* advance the new context first */
3577 		  continue;
3578 		}
3579 	    }
3580 	  else
3581 	    DprintfT (SP_DUMP_UNWIND, "unwind.c: nctx = max\n");
3582 	  break;
3583 	case 0xe4: case 0xe5:
3584 	  cur->pc = check_modrm (cur->pc);
3585 	  cur->pc++;
3586 	  break;
3587 	case 0xe6: case 0xe7:
3588 	  cur->pc++;
3589 	  cur->pc = check_modrm (cur->pc);
3590 	  break;
3591 	case 0xec: case 0xed: case 0xee: case 0xef:
3592 	  cur->pc = check_modrm (cur->pc);
3593 	  break;
3594 	case 0xe8: /* call Jz (f64) */
3595 	  {
3596 	    if (jmp_reg_switch_mode == 1)
3597 	      {
3598 		struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3599 		__collector_memcpy (tmpctx, cur, sizeof (*cur));
3600 		int rc = process_return (wctx, tmpctx);
3601 		if (rc != RA_FAILURE)
3602 		  {
3603 		    if (save_ctx)
3604 		      omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3605 		    return rc;
3606 		  }
3607 	      }
3608 	    int immz = read_int (cur->pc, z);
3609 	    if (immz == 0)
3610 	      /* special case in PIC code */
3611 	      cur->sp -= 1;
3612 	    cur->pc += z;
3613 	  }
3614 	  break;
3615 	case 0xe9: /* jump Jz */
3616 	  {
3617 	    int immz = read_int (cur->pc, z);
3618 	    unsigned char *npc = cur->pc + z + immz;
3619 	    if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend)
3620 	      {
3621 		DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xe9.\n", __LINE__);
3622 		DELETE_CURCTX ();
3623 		break;
3624 	      }
3625 	    int tidx = 0;
3626 	    while (npc > targets[tidx])
3627 	      tidx += 1;
3628 	    if (npc != targets[tidx])
3629 	      {
3630 		if (ntrg < MAXTRGTS)
3631 		  {
3632 		    for (int i = 0; i < nctx; i++)
3633 		      if (buf[i].tidx >= tidx)
3634 			buf[i].tidx++;
3635 		    /* insert a new target */
3636 		    for (int i = ntrg; i > tidx; i--)
3637 		      targets[i] = targets[i - 1];
3638 		    ntrg += 1;
3639 		    targets[tidx++] = npc;
3640 		  }
3641 		else
3642 		  DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3643 		cur->pc = npc;
3644 		cur->tidx = tidx;
3645 		continue; /* advance this context first */
3646 	      }
3647 	    else
3648 	      {
3649 		/* Delete context */
3650 		DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xe9.\n", __LINE__);
3651 		DELETE_CURCTX ();
3652 	      }
3653 	  }
3654 	  break;
3655 	case 0xeb: /* jump imm8 */
3656 	  {
3657 	    imm8 = *(char*) cur->pc++;
3658 	    int tidx = 0;
3659 	    unsigned char *npc = cur->pc + imm8;
3660 	    while (npc > targets[tidx])
3661 	      tidx += 1;
3662 	    if (npc != targets[tidx])
3663 	      {
3664 		if (ntrg < MAXTRGTS)
3665 		  {
3666 		    for (int i = 0; i < nctx; i++)
3667 		      if (buf[i].tidx >= tidx)
3668 			buf[i].tidx++;
3669 		    /* insert a new target */
3670 		    for (int i = ntrg; i > tidx; i--)
3671 		      targets[i] = targets[i - 1];
3672 		    ntrg += 1;
3673 		    targets[tidx++] = npc;
3674 		  }
3675 		else
3676 		  DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3677 		cur->pc = npc;
3678 		cur->tidx = tidx;
3679 		continue; /* advance this context first */
3680 	      }
3681 	    else
3682 	      {
3683 		/* Delete context */
3684 		DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xeb.\n", __LINE__);
3685 		DELETE_CURCTX ();
3686 	      }
3687 	  }
3688 	  break;
3689 	case 0xf0: /* lock prefix */
3690 	case 0xf2: /* repne prefix */
3691 	case 0xf3: /* repz prefix */
3692 	  break;
3693 	case 0xf4: /* hlt */
3694 	  extop2 = *(cur->pc - 3);
3695 	  if (extop2 == 0x90)
3696 	    {
3697 	      // 17851712 occasional SEGV in find_i386_ret_addr in unwind.c during attach
3698 	      if (save_ctx)
3699 		omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
3700 	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK\n", __LINE__);
3701 	      return RA_END_OF_STACK;
3702 	    }
3703 	  /* We see 'hlt' in _start. Stop analysis, revert to FP */
3704 	  /* A workaround for the Linux main stack */
3705 	  if (nctx > 1)
3706 	    {
3707 	      DELETE_CURCTX ();
3708 	      break;
3709 	    }
3710 	  if (cur->fp == 0)
3711 	    {
3712 	      if (jmp_reg_switch_mode == 1)
3713 		{
3714 		  DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0xf4\n");
3715 		  goto checkFP;
3716 		}
3717 	      cache_put (wctx, RA_EOSTCK);
3718 	      wctx->pc = 0;
3719 	      wctx->sp = 0;
3720 	      wctx->fp = 0;
3721 	      if (save_ctx)
3722 		omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
3723 	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK\n", __LINE__);
3724 	      return RA_END_OF_STACK;
3725 	    }
3726 	  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode = 0xf4\n", __LINE__);
3727 	  goto checkFP;
3728 	case 0xf5: /* cmc */
3729 	  break;
3730 	case 0xf6: /* group3 Eb */
3731 	  modrm = *cur->pc;
3732 	  extop = MRM_EXT (modrm);
3733 	  cur->pc = check_modrm (cur->pc);
3734 	  if (extop == 0x0) /* test Ib */
3735 	    cur->pc += 1;
3736 	  break;
3737 	case 0xf7: /* group3 Ev */
3738 	  modrm = *cur->pc;
3739 	  extop = MRM_EXT (modrm);
3740 	  cur->pc = check_modrm (cur->pc);
3741 	  if (extop == 0x0)  /* test Iz */
3742 	    cur->pc += z;
3743 	  break;
3744 	case 0xf8: /* clc */
3745 	case 0xf9: /* stc */
3746 	case 0xfa: /* cli */
3747 	case 0xfb: /* sti */
3748 	case 0xfc: /* cld */
3749 	case 0xfd: /* std */
3750 	  break;
3751 	case 0xfe: /* group4 */
3752 	  modrm = *cur->pc;
3753 	  extop = MRM_EXT (modrm);
3754 	  switch (extop)
3755 	    {
3756 	    case 0x0: /* inc Eb */
3757 	    case 0x1: /* dec Eb */
3758 	      cur->pc = check_modrm (cur->pc);
3759 	      break;
3760 	    case 0x7:
3761 	      cur->pc = check_modrm (cur->pc);
3762 	      break;
3763 	    default:
3764 	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0xfe %x\n",
3765 			__LINE__, extop);
3766 	      DELETE_CURCTX ();
3767 	      break;
3768 	    }
3769 	  break;
3770 	case 0xff: /* group5 */
3771 	  modrm = *cur->pc;
3772 	  extop = MRM_EXT (modrm);
3773 	  switch (extop)
3774 	    {
3775 	    case 0x0: /* inc Ev */
3776 	    case 0x1: /* dec Ev */
3777 	      cur->pc = check_modrm (cur->pc);
3778 	      break;
3779 	    case 0x2: /* calln Ev */
3780 	      if (jmp_reg_switch_mode == 1)
3781 		{
3782 		  struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3783 		  __collector_memcpy (tmpctx, cur, sizeof (*cur));
3784 		  int rc = process_return (wctx, tmpctx);
3785 		  if (rc != RA_FAILURE)
3786 		    {
3787 		      if (save_ctx)
3788 			omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3789 		      return rc;
3790 		    }
3791 		}
3792 	      cur->pc = check_modrm (cur->pc);
3793 	      break;
3794 	    case 0x3: /* callf Ep */
3795 	      if (jmp_reg_switch_mode == 1)
3796 		{
3797 		  struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3798 		  __collector_memcpy (tmpctx, cur, sizeof (*cur));
3799 		  int rc = process_return (wctx, tmpctx);
3800 		  if (rc != RA_FAILURE)
3801 		    {
3802 		      if (save_ctx)
3803 			omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3804 		      return rc;
3805 		    }
3806 		}
3807 	      cur->pc = check_modrm (cur->pc); /* XXXX */
3808 	      break;
3809 	    case 0x4: /* jumpn Ev */
3810 	      /* This instruction appears in PLT or
3811 	       * in tail call optimization.
3812 	       * In both cases treat it as return.
3813 	       * Save jump *(reg) - switch, etc, for later use when no ctx left
3814 	       */
3815 	      if (modrm == 0x25 || /* jumpn *disp32 */
3816 		  MRM_MOD (modrm) == 0x40 || /* jumpn byte(reg) */
3817 		  MRM_MOD (modrm) == 0x80) /* jumpn word(reg) */
3818 		{
3819 		  DprintfT (SP_DUMP_UNWIND, "unwind.c: PLT or tail call: %p\n", cur->pc - 1);
3820 		  int rc = process_return (wctx, cur);
3821 		  if (rc != RA_FAILURE)
3822 		    {
3823 		      if (jmp_reg_switch_mode == 1 && total_num_jmp_reg < max_num_jmp_reg_seen)
3824 			{
3825 			  DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0xff\n");
3826 			  goto checkFP;
3827 			}
3828 		      if (save_ctx)
3829 			omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3830 		      return rc;
3831 		    }
3832 		}
3833 	      else if (modrm != 0x24 /*ignore SIB*/) /* jumpn *(reg) or jumpn reg */
3834 		{
3835 		  // 22846120 stack unwind does not find caller of __memcpy_ssse3_back with B64 intel-Linux
3836 		  /*
3837 		   * For now, let's deal rather narrowly with this scenario.  If:
3838 		   * - we are in the middle of an "ff e2" instruction, and
3839 		   * - the next instruction is undefined ( 0f 0b == ud2 )
3840 		   * then test return.  (Might eventually have to broaden the scope
3841 		   * of this fix to other registers/etc.)
3842 		   */
3843 		  if (cur->pc[0] == 0xe2 && cur->pc[1] == 0x0f && cur->pc[2] == 0x0b)
3844 		    {
3845 		      int rc = process_return_real (wctx, cur, 0);
3846 		      if (rc == RA_SUCCESS)
3847 			{
3848 			  if (save_ctx)
3849 			    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3850 			  return rc;
3851 			}
3852 		    }
3853 
3854 		  // 22691241 shjsynprog, jsynprog core dump from find_i386_ret_addr
3855 		  /*
3856 		   * Here is another oddity.  Java 9 seems to emit dynamically generated
3857 		   * code where a code block ends with a "jmp *reg" and then padding to a
3858 		   * multiple-of-16 boundary and then a bunch of 0s.  In this case, let's
3859 		   * not continue to walk bytes since we would be walking off the end of
3860 		   * the instructions into ... something.  Treating them as instructions
3861 		   * can lead to unexpected results, including SEGV.
3862 		   */
3863 		  /*
3864 		   * While the general problem deserves a better solution, let's look
3865 		   * here only for one particular case:
3866 		   *    0xff 0xe7               jmp *reg
3867 		   *                            nop to bring us to a multiple-of-16 boundary
3868 		   *    0x0000000000000a00      something that does not look like an instruction
3869 		   *
3870 		   * A different nop might be used depending on how much padding is needed
3871 		   * to reach that multiple-of-16 boundary.  We've seen two:
3872 		   *    0x90                    one byte
3873 		   *    0x0f 0x1f 0x40 0x00     four bytes
3874 		   */
3875 		  // confirm the instruction is 0xff 0xe7
3876 		  if (cur->pc[0] == 0xe7)
3877 		    {
3878 		      // check for correct-length nop and find next 16-byte boundary
3879 		      int found_nop = 0;
3880 		      unsigned long long *boundary = 0;
3881 		      switch ((((unsigned long) (cur->pc)) & 0xf))
3882 			{
3883 			case 0xb: // look for 4-byte nop
3884 			  if (*((unsigned *) (cur->pc + 1)) == 0x00401f0f)
3885 			    found_nop = 1;
3886 			  boundary = (unsigned long long *) (cur->pc + 5);
3887 			  break;
3888 			case 0xe: // look for 1-byte nop
3889 			  if (cur->pc[1] == 0x90)
3890 			    found_nop = 1;
3891 			  boundary = (unsigned long long *) (cur->pc + 2);
3892 			  break;
3893 			default:
3894 			  break;
3895 			}
3896 
3897 		      // if nop is found, check what's at the boundary
3898 		      if (found_nop && *boundary == 0x000000000a00)
3899 			{
3900 			  DELETE_CURCTX ();
3901 			  break;
3902 			}
3903 		    }
3904 
3905 		  DprintfT (SP_DUMP_UNWIND, "unwind.c: probably PLT or tail call or switch table: %p\n",
3906 			    cur->pc - 1);
3907 		  if (num_jmp_reg < expected_num_jmp_reg)
3908 		    {
3909 		      if (jmp_reg_ctx[num_jmp_reg] == NULL)
3910 			jmp_reg_ctx[num_jmp_reg] = (struct AdvWalkContext *) alloca (sizeof (*cur));
3911 		      if (jmp_reg_ctx[num_jmp_reg] != NULL)
3912 			__collector_memcpy (jmp_reg_ctx[num_jmp_reg], cur, sizeof (*cur));
3913 		    }
3914 		  if (num_jmp_reg < expected_num_jmp_reg ||
3915 		      (num_jmp_reg >= expected_num_jmp_reg &&
3916 		       jmp_reg_ctx[expected_num_jmp_reg - 1] != NULL &&
3917 		       cur->pc != jmp_reg_ctx[expected_num_jmp_reg - 1]->pc))
3918 		    {
3919 		      num_jmp_reg++;
3920 		      total_num_jmp_reg++;
3921 		    }
3922 		  if (jmp_reg_switch_mode == 1 && total_num_jmp_reg >= max_num_jmp_reg_seen)
3923 		    {
3924 		      int rc = process_return_real (wctx, cur, 0);
3925 		      if (rc == RA_SUCCESS)
3926 			{
3927 			  if (save_ctx)
3928 			    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3929 			  return rc;
3930 			}
3931 		    }
3932 		}
3933 	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xff.\n", __LINE__);
3934 	      DELETE_CURCTX ();
3935 	      break;
3936 	    case 0x5: /* jmpf Ep */
3937 	      cur->pc = check_modrm (cur->pc); /* XXXX */
3938 	      break;
3939 	    case 0x6: /* push Ev */
3940 	      cur->pc = check_modrm (cur->pc);
3941 	      cur->sp -= 1;
3942 	      break;
3943 	    case 0x7:
3944 	      cur->pc = check_modrm (cur->pc); /* XXXX */
3945 	      if (jmp_reg_switch_mode == 1)
3946 		{
3947 		  int rc = process_return_real (wctx, cur, 0);
3948 		  if (rc == RA_SUCCESS)
3949 		    {
3950 		      if (save_ctx)
3951 			omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3952 		      return rc;
3953 		    }
3954 		}
3955 	      break;
3956 	    default:
3957 	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0xff %x\n",
3958 			__LINE__, (int) extop);
3959 	      DELETE_CURCTX ();
3960 	      break;
3961 	    }
3962 	  break;
3963 	default:
3964 	  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x%x\n",
3965 		    __LINE__, (int) opcode);
3966 	  DELETE_CURCTX ();
3967 	  break;
3968 	}
3969 
3970       /* switch to next context */
3971       if (++cur >= buf + nctx)
3972 	cur = buf;
3973       DprintfT (SP_DUMP_UNWIND, "unwind.c:%d switch context: cur=0x%lx(%ld)  nctx=%d  cnt=%d\n",
3974 	       __LINE__, (unsigned long) cur, (long) (cur - buf), (int) nctx, (int) cnt);
3975     }
3976 
3977 checkFP:
3978   Tprintf (DBG_LT3, "find_i386_ret_addr:%d checkFP: wctx=0x%lx fp=0x%lx ln=0x%lx pc=0x%lx sbase=0x%lx sp=0x%lx tbgn=0x%lx tend=0x%lx\n",
3979 	   __LINE__, (unsigned long) wctx, (unsigned long) wctx->fp,
3980 	   (unsigned long) wctx->ln, (unsigned long) wctx->pc, (unsigned long) wctx->sbase,
3981 	   (unsigned long) wctx->sp, (unsigned long) wctx->tbgn, (unsigned long) wctx->tend);
3982 
3983   if (jmp_reg_switch_mode == 1)
3984     { // not deal with switch cases not ending with ret
3985       if (jmp_reg_switch_backup_ctx != NULL)
3986 	__collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
3987       DprintfT (SP_DUMP_UNWIND, "stack_unwind jmp reg mode on: pc = 0x%lx cnt = %d, nctx = %d\n", wctx->pc, cnt, nctx);
3988     }
3989 
3990   unsigned long *cur_fp = cur->fp;
3991   unsigned long *cur_sp = cur->sp;
3992   if (do_walk == 0)
3993     __collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext));
3994 
3995   /* Resort to the frame pointer */
3996   if (cur->fp_loc)
3997     cur->fp = cur->fp_sav;
3998   cur->sp = cur->fp;
3999   if ((unsigned long) cur->sp >= wctx->sbase ||
4000       (unsigned long) cur->sp < wctx->sp)
4001     {
4002       DprintfT (SP_DUMP_UNWIND, "unwind.c:%d do_walk=%d cur->sp=0x%p out of range. wctx->sbase=0x%lx wctx->sp=0x%lx wctx->pc=0x%lx\n",
4003 		__LINE__, (int) do_walk, cur->sp, (unsigned long) wctx->sbase,
4004 		(unsigned long) wctx->sp, (unsigned long) wctx->pc);
4005       if (do_walk == 0)
4006 	{
4007 	  cur->sp = cur_sp;
4008 	  cur->fp = cur_fp;
4009 	  do_walk = 1;
4010 	  save_ctx = 1;
4011 	  goto startWalk;
4012 	}
4013       if (save_ctx)
4014 	omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4015       return RA_FAILURE;
4016     }
4017 
4018   unsigned long fp = *cur->sp++;
4019   if (fp <= (unsigned long) cur->sp || fp >= wctx->sbase)
4020     {
4021       DprintfT (SP_DUMP_UNWIND, "unwind.c:%d fp=0x%016llx out of range. cur->sp=%p wctx->sbase=0x%lx wctx->pc=0x%lx\n",
4022 	       __LINE__, (unsigned long long) fp, cur->sp,
4023 	       (unsigned long) wctx->sbase, (unsigned long) wctx->pc);
4024       if (do_walk == 0)
4025 	{
4026 	  cur->sp = cur_sp;
4027 	  cur->fp = cur_fp;
4028 	  do_walk = 1;
4029 	  save_ctx = 1;
4030 	  goto startWalk;
4031 	}
4032       if (save_ctx)
4033 	omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4034       return RA_FAILURE;
4035     }
4036 
4037   unsigned long ra = *cur->sp++;
4038   if (ra == 0)
4039     {
4040       cache_put (wctx, RA_EOSTCK);
4041       DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK wctx->pc = 0x%lx\n", __LINE__, wctx->pc);
4042       if (save_ctx)
4043 	omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
4044       return RA_END_OF_STACK;
4045     }
4046 
4047   unsigned long tbgn = wctx->tbgn;
4048   unsigned long tend = wctx->tend;
4049   if (ra < tbgn || ra >= tend)
4050     {
4051       // We do not know yet if update_map_segments is really needed
4052       if (!__collector_check_segment (ra, &tbgn, &tend, 0))
4053 	{
4054 	  DprintfT (SP_DUMP_UNWIND, "unwind.c: __collector_check_segment fail. wctx->pc = 0x%lx\n", wctx->pc);
4055 	  if (do_walk == 0)
4056 	    {
4057 	      cur->sp = cur_sp;
4058 	      cur->fp = cur_fp;
4059 	      do_walk = 1;
4060 	      save_ctx = 1;
4061 	      goto startWalk;
4062 	    }
4063 	  if (save_ctx)
4064 	    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4065 	  return RA_FAILURE;
4066 	}
4067     }
4068 
4069   unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
4070   if (npc == 0)
4071     {
4072       DprintfT (SP_DUMP_UNWIND, "unwind.c: adjust_ret_addr fail. wctx->pc = 0x%lx\n", wctx->pc);
4073       if (do_walk == 0)
4074 	{
4075 	  cur->sp = cur_sp;
4076 	  cur->fp = cur_fp;
4077 	  do_walk = 1;
4078 	  save_ctx = 1;
4079 	  goto startWalk;
4080 	}
4081       if (save_ctx)
4082 	omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4083       return RA_FAILURE;
4084     }
4085   wctx->pc = npc;
4086   wctx->sp = (unsigned long) cur->sp;
4087   wctx->fp = fp;
4088   wctx->tbgn = tbgn;
4089   wctx->tend = tend;
4090 
4091   if (save_ctx)
4092     {
4093       omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SUCCESS);
4094       DprintfT (SP_DUMP_UNWIND, "unwind.c: cache walk context. wctx_pc_save->pc = 0x%lx\n", wctx_pc_save.pc);
4095     }
4096   return RA_SUCCESS;
4097 }
4098 
4099 /*
4100  * We have the return address, but we would like to report to the user
4101  * the calling PC, which is the instruction immediately preceding the
4102  * return address.  Unfortunately, x86 instructions can have variable
4103  * length.  So we back up 8 bytes and try to figure out where the
4104  * calling PC starts.  (FWIW, call instructions are often 5-bytes long.)
4105  */
4106 unsigned long
adjust_ret_addr(unsigned long ra,unsigned long segoff,unsigned long tend)4107 adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend)
4108 {
4109   unsigned long npc = 0;
4110   int i = segoff < 8 ? segoff : 8;
4111   for (; i > 1; i--)
4112     {
4113       unsigned char *ptr = (unsigned char*) ra - i;
4114       int z = 4;
4115       int a = 4;
4116       int done = 0;
4117       int bVal;
4118       while (!done)
4119 	{
4120 	  bVal = getByteInstruction (ptr);
4121 	  if (bVal < 0)
4122 	    return 0;
4123 	  switch (bVal)
4124 	    {
4125 	    case 0x26:
4126 	    case 0x36:
4127 #if WSIZE(64)
4128 	      ptr += 1;
4129 	      break;
4130 #endif
4131 	    case 0x64:
4132 	    case 0x65:
4133 	      bVal = getByteInstruction (ptr + 1);
4134 	      if (bVal < 0)
4135 		return 0;
4136 	      if (bVal == 0xe8)
4137 		// a workaround for bug 16193041, assuming "call Jz" has no segment override prefix
4138 	       done = 1;
4139 	      else
4140 		ptr += 1;
4141 	      break;
4142 	    case 0x66:
4143 	      z = 2;
4144 	      ptr += 1;
4145 	      break;
4146 	    case 0x67:
4147 	      a = 2;
4148 	      ptr += 1;
4149 	      break;
4150 	    default:
4151 	      done = 1;
4152 	      break;
4153 	    }
4154 	}
4155 #if WSIZE(64)
4156       bVal = getByteInstruction (ptr);
4157       if (bVal < 0)
4158 	return 0;
4159       if (bVal >= 0x40 && bVal <= 0x4f)
4160 	{ /* XXXX not all REX codes applicable */
4161 	  if (bVal & 0x8)
4162 	    z = 4;
4163 	  ptr += 1;
4164 	}
4165 #endif
4166       int opcode = getByteInstruction (ptr);
4167       if (opcode < 0)
4168 	return 0;
4169       ptr++;
4170       switch (opcode)
4171 	{
4172 	case 0xe8: /* call Jz (f64) */
4173 	  ptr += z;
4174 	  break;
4175 	case 0x9a: /* callf Ap */
4176 	  ptr += 2 + a;
4177 	  break;
4178 	case 0xff: /* calln Ev , callf Ep */
4179 	  {
4180 	    int extop = MRM_EXT (*ptr);
4181 	    if (extop == 2 || extop == 3)
4182 	      ptr = check_modrm (ptr);
4183 	  }
4184 	  break;
4185 	default:
4186 	  continue;
4187 	}
4188       if ((unsigned long) ptr == ra)
4189 	{
4190 	  npc = ra - i;
4191 	  break;
4192 	}
4193     }
4194   if (npc == 0)
4195     {
4196       unsigned char * ptr = (unsigned char *) ra;
4197 #if WSIZE(32)
4198       // test __kernel_sigreturn or __kernel_rt_sigreturn
4199       if ((ra + 7 < tend && getByteInstruction (ptr) == 0x58
4200 	   && getByteInstruction (ptr + 1) == 0xb8
4201 	   && getByteInstruction (ptr + 6) == 0xcd
4202 	   && getByteInstruction (ptr + 7) == 0x80) /* pop %eax; mov $NNNN, %eax; int */
4203 	  || (ra + 7 < tend && getByteInstruction (ptr) == 0x58
4204 	      && getByteInstruction (ptr + 1) == 0xb8
4205 	      && getByteInstruction (ptr + 6) == 0x0f
4206 	      && getByteInstruction (ptr + 7) == 0x05) /* pop %eax; mov $NNNN, %eax; syscall */
4207 	  || (ra + 6 < tend && getByteInstruction (ptr) == 0xb8
4208 	      && getByteInstruction (ptr + 5) == 0xcd
4209 	      && getByteInstruction (ptr + 6) == 0x80) /* mov $NNNN, %eax; int */
4210 	  || (ra + 6 < tend && getByteInstruction (ptr) == 0xb8
4211 	      && getByteInstruction (ptr + 5) == 0x0f
4212 	      && getByteInstruction (ptr + 6) == 0x05)) /* mov $NNNN, %eax; syscall */
4213 #else //WSIZE(64)
4214       // test __restore_rt
4215       if (ra + 8 < tend && getByteInstruction (ptr) == 0x48
4216 	  && getByteInstruction (ptr + 7) == 0x0f
4217 	  && getByteInstruction (ptr + 8) == 0x05) /* mov $NNNNNNNN, %rax; syscall */
4218 #endif
4219 	{
4220 	  npc = ra;
4221 	}
4222     }
4223   if (npc == 0 && __collector_java_mode
4224       && __collector_java_asyncgetcalltrace_loaded)
4225     { // detect jvm interpreter code for java user threads
4226       unsigned char * ptr = (unsigned char *) ra;
4227 #if WSIZE(32)
4228       // up to J170
4229       /*
4230        * ff 24 9d e0 64 02 f5    jmp     *-0xafd9b20(,%ebx,4)
4231        * 8b 4e 01                movl    1(%esi),%ecx
4232        * f7 d1                   notl    %ecx
4233        * 8b 5d ec                movl    -0x14(%ebp),%ebx
4234        * c1 e1 02                shll    $2,%ecx
4235        * eb d8                   jmp     .-0x26 [ 0x92a ]
4236        * 83 ec 08                subl    $8,%esp || 8b 65 f8                movl    -8(%ebp),%esp
4237        * */
4238       if (ra - 20 >= (ra - segoff) && ((*ptr == 0x83 && *(ptr + 1) == 0xec) || (*ptr == 0x8b && *(ptr + 1) == 0x65))
4239 	  && *(ptr - 2) == 0xeb
4240 	  && *(ptr - 5) == 0xc1 && *(ptr - 4) == 0xe1
4241 	  && *(ptr - 8) == 0x8b && *(ptr - 7) == 0x5d
4242 	  && *(ptr - 10) == 0xf7 && *(ptr - 9) == 0xd1
4243 	  && *(ptr - 13) == 0x8b && *(ptr - 12) == 0x4e
4244 	  && *(ptr - 20) == 0xff && *(ptr - 19) == 0x24 && *(ptr - 18) == 0x9d)
4245 	{
4246 	  npc = ra - 20;
4247 	}
4248       // J180 J190
4249       // ff 24 9d ** ** ** **    jmp     *-0x*******(,%ebx,4)
4250       if (npc == 0
4251 	  && ra - 7 >= (ra - segoff)
4252 	  && *(ptr - 7) == 0xff
4253 	  && *(ptr - 6) == 0x24
4254 	  && *(ptr - 5) == 0x9d)
4255 	{
4256 	  npc = ra - 7;
4257 	}
4258 #else //WSIZE(64)
4259       // up to J170
4260       /*
4261        * 41 ff 24 da             jmp     *(%r10,%rbx,8)
4262        * 41 8b 4d 01             movl    1(%r13),%ecx
4263        * f7 d1                   notl    %ecx
4264        * 48 8b 5d d8             movq    -0x28(%rbp),%rbx
4265        * c1 e1 02                shll    $2,%ecx
4266        * eb cc                   jmp     .-0x32 [ 0xd23 ]
4267        * 48 8b 65 f0             movq    -0x10(%rbp),%rsp
4268        */
4269       if (ra - 19 >= (ra - segoff) && *ptr == 0x48 && ((*(ptr + 1) == 0x8b && *(ptr + 2) == 0x65) || (*(ptr + 1) == 0x83 && *(ptr + 2) == 0xec))
4270 	  && *(ptr - 2) == 0xeb
4271 	  && *(ptr - 5) == 0xc1 && *(ptr - 4) == 0xe1
4272 	  && *(ptr - 9) == 0x48 && *(ptr - 8) == 0x8b && *(ptr - 7) == 0x5d
4273 	  && *(ptr - 11) == 0xf7 && *(ptr - 10) == 0xd1
4274 	  && *(ptr - 15) == 0x41 && *(ptr - 14) == 0x8b && *(ptr - 13) == 0x4d
4275 	  && *(ptr - 19) == 0x41 && *(ptr - 18) == 0xff)
4276 	npc = ra - 19;
4277       // J180 J190
4278       // 41 ff 24 da             jmp     *(%r10,%rbx,8)
4279       if (npc == 0
4280 	  && ra - 4 >= (ra - segoff)
4281 	  && *(ptr - 4) == 0x41
4282 	  && *(ptr - 3) == 0xff
4283 	  && *(ptr - 2) == 0x24
4284 	  && *(ptr - 1) == 0xda)
4285 	npc = ra - 4;
4286 #endif
4287     }
4288 
4289   return npc;
4290 }
4291 
4292 /*
4293  * Parses AVX instruction and returns its length.
4294  * Returns 0 if parsing failed.
4295  * https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
4296  */
4297 static int
parse_x86_AVX_instruction(unsigned char * pc)4298 parse_x86_AVX_instruction (unsigned char *pc)
4299 {
4300   /*
4301    * VEX prefix has a two-byte form (0xc5) and a three byte form (0xc4).
4302    * If an instruction syntax can be encoded using the two-byte form,
4303    * it can also be encoded using the three byte form of VEX.
4304    * The latter increases the length of the instruction by one byte.
4305    * This may be helpful in some situations for code alignment.
4306    *
4307 		     Byte 0           Byte 1              Byte 2         Byte 3
4308      (Bit Position) 7      0     7 6 5   4    0     7   6  3   2   10
4309      3-byte VEX   [ 11000100 ] [ R X B | m-mmmm ] [ W | vvvv | L | pp ]
4310 		    7      0     7   6  3   2   10
4311      2-byte VEX   [ 11000101 ] [ R | vvvv | L | pp ]
4312 		    7      0     7 6 5  4 3 2 1 0     7 6 5 4 3 2 1 0     7  6 5  4  3 2 1 0
4313      4-byte EVEX  [ 01100010 ] [ R X B R1 0 0 m m ] [ W v v v v 1 p p ] [ z L1 L B1 V1 a a a ]
4314 
4315      R: REX.R in 1's complement (inverted) form
4316 	  0: Same as REX.R=1 (64-bit mode only)
4317 	  1: Same as REX.R=0 (must be 1 in 32-bit mode)
4318 
4319      X: REX.X in 1's complement (inverted) form
4320 	  0: Same as REX.X=1 (64-bit mode only)
4321 	  1: Same as REX.X=0 (must be 1 in 32-bit mode)
4322 
4323      B: REX.B in 1's complement (inverted) form
4324 	  0: Same as REX.B=1 (64-bit mode only)
4325 	  1: Same as REX.B=0 (Ignored in 32-bit mode).
4326 
4327      W: opcode specific (use like REX.W, or used for opcode
4328 	  extension, or ignored, depending on the opcode byte)
4329 
4330      m-mmmm:
4331 	  00000: Reserved for future use (will #UD)
4332 	  00001: implied 0F leading opcode byte
4333 	  00010: implied 0F 38 leading opcode bytes
4334 	  00011: implied 0F 3A leading opcode bytes
4335 	  00100-11111: Reserved for future use (will #UD)
4336 
4337      vvvv: a register specifier (in 1's complement form) or 1111 if unused.
4338 
4339      L: Vector Length
4340 	  0: scalar or 128-bit vector
4341 	  1: 256-bit vector
4342 
4343      pp: opcode extension providing equivalent functionality of a SIMD prefix
4344 	  00: None
4345 	  01: 66
4346 	  10: F3
4347 	  11: F2
4348    *
4349    * Example: 0xc5f877L vzeroupper
4350    * VEX prefix: 0xc5 0x77
4351    * Opcode: 0xf8
4352    *
4353    */
4354   int len = 0;
4355   disassemble_info dis_info;
4356   dis_info.arch = bfd_arch_i386;
4357   dis_info.mach = bfd_mach_x86_64;
4358   dis_info.flavour = bfd_target_unknown_flavour;
4359   dis_info.endian = BFD_ENDIAN_UNKNOWN;
4360   dis_info.endian_code = dis_info.endian;
4361   dis_info.octets_per_byte = 1;
4362   dis_info.disassembler_needs_relocs = FALSE;
4363   dis_info.fprintf_func = fprintf_func;
4364   dis_info.fprintf_styled_func = fprintf_styled_func;
4365   dis_info.stream = NULL;
4366   dis_info.disassembler_options = NULL;
4367   dis_info.read_memory_func = read_memory_func;
4368   dis_info.memory_error_func = memory_error_func;
4369   dis_info.print_address_func = print_address_func;
4370   dis_info.symbol_at_address_func = symbol_at_address_func;
4371   dis_info.symbol_is_valid = symbol_is_valid;
4372   dis_info.display_endian = BFD_ENDIAN_UNKNOWN;
4373   dis_info.symtab = NULL;
4374   dis_info.symtab_size = 0;
4375   dis_info.buffer_vma = 0;
4376   dis_info.buffer = pc;
4377   dis_info.buffer_length = 8;
4378 
4379   disassembler_ftype disassemble = print_insn_i386;
4380   if (disassemble == NULL)
4381     {
4382       DprintfT (SP_DUMP_UNWIND, "parse_x86_AVX_instruction ERROR: unsupported disassemble\n");
4383       return 0;
4384     }
4385   len = disassemble (0, &dis_info);
4386   DprintfT (SP_DUMP_UNWIND, "parse_x86_AVX_instruction: returned %d  pc: %p\n", len, pc);
4387   return len;
4388 }
4389 
4390 /*
4391  * In the Intel world, a stack frame looks like this:
4392  *
4393  * %fp0->|                               |
4394  *       |-------------------------------|
4395  *       |  Args to next subroutine      |
4396  *       |-------------------------------|-\
4397  * %sp0->|  One word struct-ret address  | |
4398  *       |-------------------------------|  > minimum stack frame (8 bytes)
4399  *       |  Previous frame pointer (%fp0)| |
4400  * %fp1->|-------------------------------|-/
4401  *       |  Local variables              |
4402  * %sp1->|-------------------------------|
4403  *
4404  */
4405 
4406 int
stack_unwind(char * buf,int size,void * bptr,void * eptr,ucontext_t * context,int mode)4407 stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
4408 {
4409   long *lbuf = (long*) buf;
4410   int lsize = size / sizeof (long);
4411   int ind = 0;
4412   int do_walk = 1;
4413   int extra_frame = 0;
4414   if (mode & FRINFO_NO_WALK)
4415     do_walk = 0;
4416   if ((mode & 0xffff) == FRINFO_FROM_STACK)
4417     extra_frame = 1;
4418 
4419   /*
4420    * trace the stack frames from user stack.
4421    * We are assuming that the frame pointer and return address
4422    * are null when we are at the top level.
4423    */
4424   struct WalkContext wctx;
4425   wctx.pc = GET_PC (context);
4426   wctx.sp = GET_SP (context);
4427   wctx.fp = GET_FP (context);
4428   wctx.ln = (unsigned long) context->uc_link;
4429   unsigned long *sbase = (unsigned long*) __collector_tsd_get_by_key (unwind_key);
4430   if (sbase && *sbase > wctx.sp)
4431     wctx.sbase = *sbase;
4432   else
4433     {
4434       wctx.sbase = wctx.sp + 0x100000;
4435       if (wctx.sbase < wctx.sp)  /* overflow */
4436 	wctx.sbase = (unsigned long) - 1;
4437     }
4438   // We do not know yet if update_map_segments is really needed
4439   __collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0);
4440 
4441   for (;;)
4442     {
4443       if (ind >= lsize || wctx.pc == 0)
4444 	break;
4445       if (bptr != NULL && extra_frame && wctx.sp <= (unsigned long) bptr && ind < 2)
4446 	{
4447 	  lbuf[0] = wctx.pc;
4448 	  if (ind == 0)
4449 	    {
4450 	      ind++;
4451 	      if (ind >= lsize)
4452 		break;
4453 	    }
4454 	}
4455       if (bptr == NULL || wctx.sp > (unsigned long) bptr)
4456 	{
4457 	  lbuf[ind++] = wctx.pc;
4458 	  if (ind >= lsize)
4459 	    break;
4460 	}
4461 
4462       for (;;)
4463 	{
4464 	  if (eptr != NULL && wctx.sp >= (unsigned long) eptr)
4465 	    {
4466 	      ind = ind >= 2 ? ind - 2 : 0;
4467 	      goto exit;
4468 	    }
4469 	  int ret = find_i386_ret_addr (&wctx, do_walk);
4470 	  DprintfT (SP_DUMP_UNWIND, "stack_unwind (x86 walk):%d find_i386_ret_addr returns %d\n", __LINE__, ret);
4471 	  if (ret == RA_FAILURE)
4472 	    {
4473 	      /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4474 	      goto exit;
4475 	    }
4476 
4477 	  if (ret == RA_END_OF_STACK)
4478 	    goto exit;
4479 #if WSIZE(32)
4480 	  if (ret == RA_RT_SIGRETURN)
4481 	    {
4482 	      struct SigFrame
4483 	      {
4484 		unsigned long arg0;
4485 		unsigned long arg1;
4486 		unsigned long arg2;
4487 	      } *sframe = (struct SigFrame*) wctx.sp;
4488 	      ucontext_t *ncontext = (ucontext_t*) sframe->arg2;
4489 	      wctx.pc = GET_PC (ncontext);
4490 	      if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4491 		{
4492 		  /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4493 		  goto exit;
4494 		}
4495 	      unsigned long nsp = GET_SP (ncontext);
4496 	      /* Check the new stack pointer */
4497 	      if (nsp <= sframe->arg2 || nsp > sframe->arg2 + sizeof (ucontext_t) + 1024)
4498 		{
4499 		  /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4500 		  goto exit;
4501 		}
4502 	      wctx.sp = nsp;
4503 	      wctx.fp = GET_FP (ncontext);
4504 	      break;
4505 	    }
4506 	  else if (ret == RA_SIGRETURN)
4507 	    {
4508 	      struct sigcontext *sctx = (struct sigcontext*) wctx.sp;
4509 	      wctx.pc = sctx->eip;
4510 	      if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4511 		{
4512 		  /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4513 		  goto exit;
4514 		}
4515 	      wctx.sp = sctx->esp;
4516 	      wctx.fp = sctx->ebp;
4517 	      break;
4518 	    }
4519 #elif WSIZE(64)
4520 	  if (ret == RA_RT_SIGRETURN)
4521 	    {
4522 	      ucontext_t *ncontext = (ucontext_t*) wctx.sp;
4523 	      wctx.pc = GET_PC (ncontext);
4524 	      if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4525 		{
4526 		  /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4527 		  goto exit;
4528 		}
4529 	      unsigned long nsp = GET_SP (ncontext);
4530 	      /* Check the new stack pointer */
4531 	      if (nsp <= wctx.sp || nsp > wctx.sp + sizeof (ucontext_t) + 1024)
4532 		{
4533 		  /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4534 		  goto exit;
4535 		}
4536 	      wctx.sp = nsp;
4537 	      wctx.fp = GET_FP (ncontext);
4538 	      break;
4539 	    }
4540 #endif /* WSIZE() */
4541 	  if (bptr != NULL && extra_frame && wctx.sp <= (unsigned long) bptr && ind < 2)
4542 	    {
4543 	      lbuf[0] = wctx.pc;
4544 	      if (ind == 0)
4545 		{
4546 		  ind++;
4547 		  if (ind >= lsize)
4548 		    break;
4549 		}
4550 	    }
4551 	  if (bptr == NULL || wctx.sp > (unsigned long) bptr)
4552 	    {
4553 	      lbuf[ind++] = wctx.pc;
4554 	      if (ind >= lsize)
4555 		goto exit;
4556 	    }
4557 	}
4558     }
4559 
4560 exit:
4561 #if defined(DEBUG)
4562   if ((SP_DUMP_UNWIND & __collector_tracelevel) != 0)
4563     {
4564       DprintfT (SP_DUMP_UNWIND, "stack_unwind (x86 walk):%d found %d frames\n\n", __LINE__, ind);
4565       for (int i = 0; i < ind; i++)
4566 	DprintfT (SP_DUMP_UNWIND, "  %3d:  0x%lx\n", i, (unsigned long) lbuf[i]);
4567     }
4568 #endif
4569   dump_stack (__LINE__);
4570   if (ind >= lsize)
4571     {
4572       ind = lsize - 1;
4573       lbuf[ind++] = (unsigned long) SP_TRUNC_STACK_MARKER;
4574     }
4575   return ind * sizeof (long);
4576 }
4577 
4578 #elif ARCH(Aarch64)
4579 
4580 static int
stack_unwind(char * buf,int size,void * bptr,void * eptr,ucontext_t * context,int mode)4581 stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
4582 {
4583   if (buf && bptr && eptr && context && size + mode > 0)
4584     getByteInstruction ((unsigned char *) eptr);
4585   int ind = 0;
4586   __u64 *lbuf = (void *) buf;
4587   int lsize = size / sizeof (__u64);
4588   __u64 pc = context->uc_mcontext.pc;
4589   __u64 sp = context->uc_mcontext.sp;
4590   __u64 stack_base;
4591   unsigned long tbgn = 0;
4592   unsigned long tend = 0;
4593 
4594   unsigned long *sbase = (unsigned long*) __collector_tsd_get_by_key (unwind_key);
4595   if (sbase && *sbase > sp)
4596     stack_base = *sbase;
4597   else
4598     {
4599       stack_base = sp + 0x100000;
4600       if (stack_base < sp)  // overflow
4601 	stack_base = (__u64) -1;
4602     }
4603   DprintfT (SP_DUMP_UNWIND,
4604     "unwind.c:%d stack_unwind %2d pc=0x%llx  sp=0x%llx  stack_base=0x%llx\n",
4605     __LINE__, ind, (unsigned long long) pc, (unsigned long long) sp,
4606     (unsigned long long) stack_base);
4607 
4608   while (sp && pc)
4609   {
4610     DprintfT (SP_DUMP_UNWIND,
4611 	"unwind.c:%d stack_unwind %2d pc=0x%llx  sp=0x%llx\n",
4612 	__LINE__, ind, (unsigned long long) pc, (unsigned long long) sp);
4613 //      Dl_info dlinfo;
4614 //      if (!dladdr ((void *) pc, &dlinfo))
4615 //	break;
4616 //      DprintfT (SP_DUMP_UNWIND, "%2d: %llx <%s+%llu> (%s)\n",
4617 //		ind, (unsigned long long) pc,
4618 //		dlinfo.dli_sname ? dlinfo.dli_sname : "(?)",
4619 //		(unsigned long long) pc - (unsigned long long) dlinfo.dli_saddr,
4620 //		dlinfo.dli_fname);
4621       lbuf[ind++] = pc;
4622       if (ind >= lsize || sp >= stack_base || (sp & 15) != 0)
4623 	break;
4624       if (pc < tbgn || pc >= tend)
4625 	if (!__collector_check_segment ((unsigned long) pc, &tbgn, &tend, 0))
4626 	  {
4627 	    DprintfT (SP_DUMP_UNWIND,
4628 		     "unwind.c:%d __collector_check_segment failed. sp=0x%lx\n",
4629 		      __LINE__, (unsigned long) sp);
4630 	    break;
4631 	  }
4632       pc = ((__u64 *) sp)[1];
4633       __u64 old_sp = sp;
4634       sp = ((__u64 *) sp)[0];
4635       if (sp < old_sp)
4636 	break;
4637     }
4638   if (ind >= lsize)
4639     {
4640       ind = lsize - 1;
4641       lbuf[ind++] = (__u64) SP_TRUNC_STACK_MARKER;
4642     }
4643   return ind * sizeof (__u64);
4644 }
4645 #endif /* ARCH() */
4646