xref: /netbsd-src/external/gpl3/binutils.old/dist/gprofng/libcollector/unwind.c (revision c42dbd0ed2e61fe6eda8590caa852ccf34719964)
1 /* Copyright (C) 2021 Free Software Foundation, Inc.
2    Contributed by Oracle.
3 
4    This file is part of GNU Binutils.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software
18    Foundation, 51 Franklin Street - Fifth Floor, Boston,
19    MA 02110-1301, USA.  */
20 
21 #include "config.h"
22 #include <alloca.h>
23 #include <dlfcn.h>
24 #include <stdlib.h>
25 #include <signal.h>
26 #include <unistd.h>
27 #include <pthread.h>
28 
29 #include "gp-defs.h"
30 #include "collector.h"
31 #include "gp-experiment.h"
32 #include "memmgr.h"
33 #include "tsd.h"
34 
35 /* Get dynamic module interface*/
36 #include "collector_module.h"
37 
38 /* Get definitions for SP_LEAF_CHECK_MARKER, SP_TRUNC_STACK_MARKER */
39 #include "data_pckts.h"
40 
41 #if ARCH(SPARC)
42 struct frame
43 {
44   long fr_local[8];         /* saved locals */
45   long fr_arg[6];           /* saved arguments [0 - 5] */
46   struct frame *fr_savfp;   /* saved frame pointer */
47   long fr_savpc;            /* saved program counter */
48 #if WSIZE(32)
49   char *fr_stret;           /* struct return addr */
50 #endif
51   long fr_argd[6];          /* arg dump area */
52   long fr_argx[1];          /* array of args past the sixth */
53 };
54 
55 #elif ARCH(Intel)
56 struct frame
57 {
58   unsigned long fr_savfp;
59   unsigned long fr_savpc;
60 };
61 #endif
62 
63 /* Set the debug trace level */
64 #define DBG_LT0 0
65 #define DBG_LT1	1
66 #define DBG_LT2	2
67 #define DBG_LT3	3
68 
69 int (*__collector_VM_ReadByteInstruction)(unsigned char *) = NULL;
70 #define VM_NO_ACCESS        (-1)
71 #define VM_NOT_VM_MEMORY    (-2)
72 #define VM_NOT_X_SEGMENT    (-3)
73 
74 #define isInside(p, bgn, end) ((p) >= (bgn) && (p) < (end))
75 
76 /*
77  * Weed through all the arch dependent stuff to get the right definition
78  * for 'pc' in the ucontext structure.  The system header files are mess
79  * dealing with all the arch (just look for PC, R_PC, REG_PC).
80  *
81  */
82 
83 #if ARCH(SPARC)
84 
85 #define IN_BARRIER(x) \
86 	( barrier_hdl && \
87 	  (unsigned long)x >= barrier_hdl && \
88 	  (unsigned long)x < barrier_hdlx )
89 static unsigned long barrier_hdl = 0;
90 static unsigned long barrier_hdlx = 0;
91 
92 #if WSIZE(64)
93 #define STACK_BIAS 2047
94 #define IN_TRAP_HANDLER(x) \
95 	( misalign_hdl && \
96 	  (unsigned long)x >= misalign_hdl && \
97 	  (unsigned long)x < misalign_hdlx )
98 static unsigned long misalign_hdl = 0;
99 static unsigned long misalign_hdlx = 0;
100 #elif  WSIZE(32)
101 #define STACK_BIAS 0
102 #endif
103 
104 #if WSIZE(64)
105 #define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[(reg)])
106 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_O6])
107 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_PC])
108 #else
109 #define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.gregs[(reg)])
110 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_O6])
111 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_PC])
112 #endif
113 
114 #elif ARCH(Intel)
115 #include "opcodes/disassemble.h"
116 
117 static int
fprintf_func(void * arg ATTRIBUTE_UNUSED,const char * fmt ATTRIBUTE_UNUSED,...)118 fprintf_func (void *arg ATTRIBUTE_UNUSED, const char *fmt ATTRIBUTE_UNUSED, ...)
119 {
120   return 0;
121 }
122 
123 static int
fprintf_styled_func(void * arg ATTRIBUTE_UNUSED,enum disassembler_style st ATTRIBUTE_UNUSED,const char * fmt ATTRIBUTE_UNUSED,...)124 fprintf_styled_func (void *arg ATTRIBUTE_UNUSED,
125 		      enum disassembler_style st ATTRIBUTE_UNUSED,
126 		      const char *fmt ATTRIBUTE_UNUSED, ...)
127 {
128   return 0;
129 }
130 
131 /* Get LENGTH bytes from info's buffer, at target address memaddr.
132    Transfer them to myaddr.  */
133 static int
read_memory_func(bfd_vma memaddr,bfd_byte * myaddr,unsigned int length,disassemble_info * info)134 read_memory_func (bfd_vma memaddr, bfd_byte *myaddr, unsigned int length,
135 		  disassemble_info *info)
136 {
137   unsigned int opb = info->octets_per_byte;
138   size_t end_addr_offset = length / opb;
139   size_t max_addr_offset = info->buffer_length / opb;
140   size_t octets = (memaddr - info->buffer_vma) * opb;
141   if (memaddr < info->buffer_vma
142       || memaddr - info->buffer_vma > max_addr_offset
143       || memaddr - info->buffer_vma + end_addr_offset > max_addr_offset
144       || (info->stop_vma && (memaddr >= info->stop_vma
145 			     || memaddr + end_addr_offset > info->stop_vma)))
146     return -1;
147   memcpy (myaddr, info->buffer + octets, length);
148   return 0;
149 }
150 
151 static void
print_address_func(bfd_vma addr ATTRIBUTE_UNUSED,disassemble_info * info ATTRIBUTE_UNUSED)152 print_address_func (bfd_vma addr ATTRIBUTE_UNUSED,
153 		    disassemble_info *info ATTRIBUTE_UNUSED) { }
154 
155 static asymbol *
symbol_at_address_func(bfd_vma addr ATTRIBUTE_UNUSED,disassemble_info * info ATTRIBUTE_UNUSED)156 symbol_at_address_func (bfd_vma addr ATTRIBUTE_UNUSED,
157 			disassemble_info *info ATTRIBUTE_UNUSED)
158 {
159   return NULL;
160 }
161 
162 static bfd_boolean
symbol_is_valid(asymbol * sym ATTRIBUTE_UNUSED,disassemble_info * info ATTRIBUTE_UNUSED)163 symbol_is_valid (asymbol *sym ATTRIBUTE_UNUSED,
164 		 disassemble_info *info ATTRIBUTE_UNUSED)
165 {
166   return TRUE;
167 }
168 
169 static void
memory_error_func(int status ATTRIBUTE_UNUSED,bfd_vma addr ATTRIBUTE_UNUSED,disassemble_info * info ATTRIBUTE_UNUSED)170 memory_error_func (int status ATTRIBUTE_UNUSED, bfd_vma addr ATTRIBUTE_UNUSED,
171 		   disassemble_info *info ATTRIBUTE_UNUSED) { }
172 
173 
174 #if WSIZE(32)
175 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EIP])
176 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_ESP])
177 #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EBP])
178 
179 #elif WSIZE(64)
180 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP])
181 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RSP])
182 #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RBP])
183 #endif /* WSIZE() */
184 
185 #elif ARCH(Aarch64)
186 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[15])
187 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[13])
188 #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[14])
189 #endif /* ARCH() */
190 
191 /*
192  * FILL_CONTEXT() for all platforms
193  * Could use getcontext() except:
194  * - it's not guaranteed to be async signal safe
195  * - it's a system call and not that lightweight
196  * - it's not portable as of POSIX.1-2008
197  * So we just use low-level mechanisms to fill in the few fields we need.
198  */
199 #if ARCH(SPARC)
200 #if WSIZE(32)
201 #define FILL_CONTEXT(context) \
202 	{ \
203 	greg_t fp; \
204 	__asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \
205 	__asm__ __volatile__( "ta 3" ); \
206 	GET_SP(context) = fp; \
207 	GET_PC(context) = (greg_t)0; \
208 	}
209 
210 #elif WSIZE(64)
211 #define FILL_CONTEXT(context) \
212 	{ \
213 	    greg_t fp; \
214 	    __asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \
215 	    __asm__ __volatile__( "flushw" ); \
216 	    GET_SP(context) = fp; \
217 	    GET_PC(context) = (greg_t)0; \
218 	}
219 #endif /* WSIZE() */
220 
221 #elif ARCH(Intel)
222 #define FILL_CONTEXT(context) \
223 	{ \
224 	    context->uc_link = NULL; \
225 	    void *sp = __collector_getsp(); \
226 	    GET_SP(context) = (intptr_t)sp; \
227 	    GET_FP(context) = (intptr_t)__collector_getfp(); \
228 	    GET_PC(context) = (intptr_t)__collector_getpc(); \
229 	    context->uc_stack.ss_sp = sp; \
230 	    context->uc_stack.ss_size = 0x100000; \
231 	}
232 
233 #elif ARCH(Aarch64)
234 #define FILL_CONTEXT(context) \
235     { getcontext(context);  \
236       context->uc_mcontext.sp = (__u64) __builtin_frame_address(0); \
237     }
238 
239 #endif /* ARCH() */
240 
241 static int
getByteInstruction(unsigned char * p)242 getByteInstruction (unsigned char *p)
243 {
244   if (__collector_VM_ReadByteInstruction)
245     {
246       int v = __collector_VM_ReadByteInstruction (p);
247       if (v != VM_NOT_VM_MEMORY)
248 	return v;
249     }
250   return *p;
251 }
252 
253 struct DataHandle *dhndl = NULL;
254 
255 static unsigned unwind_key = COLLECTOR_TSD_INVALID_KEY;
256 
257 /* To support two OpenMP API's we use a pointer
258  * to the actual function.
259  */
260 int (*__collector_omp_stack_trace)(char*, int, hrtime_t, void*) = NULL;
261 int (*__collector_mpi_stack_trace)(char*, int, hrtime_t) = NULL;
262 
263 #define DEFAULT_MAX_NFRAMES 256
264 static int max_native_nframes = DEFAULT_MAX_NFRAMES;
265 static int max_java_nframes = DEFAULT_MAX_NFRAMES;
266 
267 #define NATIVE_FRAME_BYTES(nframes) ( ((nframes)+1) * sizeof(long)          )
268 #define JAVA_FRAME_BYTES(nframes)   ( ((nframes)+1) * sizeof(long) * 2 + 16 )
269 #define OVERHEAD_BYTES ( 2 * sizeof(long) + 2 * sizeof(Stack_info) )
270 
271 #define ROOT_UID	801425552975190205ULL
272 #define ROOT_UID_INV	92251691606677ULL
273 #define ROOT_IDX	13907816567264074199ULL
274 #define ROOT_IDX_INV	2075111ULL
275 #define	UIDTableSize	1048576
276 static volatile uint64_t *UIDTable = NULL;
277 static volatile int seen_omp = 0;
278 
279 static int stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode);
280 static FrameInfo compute_uid (Frame_packet *frp);
281 static int omp_no_walk = 0;
282 
283 #if ARCH(Intel)
284 #define ValTableSize    1048576
285 #define OmpValTableSize 65536
286 static unsigned long *AddrTable_RA_FROMFP = NULL; // Cache for RA_FROMFP pcs
287 static unsigned long *AddrTable_RA_EOSTCK = NULL; // Cache for RA_EOSTCK pcs
288 static struct WalkContext *OmpCurCtxs = NULL;
289 static struct WalkContext *OmpCtxs = NULL;
290 static uint32_t *OmpVals = NULL;
291 static unsigned long *OmpRAs = NULL;
292 static unsigned long adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend);
293 static int parse_x86_AVX_instruction (unsigned char *pc);
294 
295 struct WalkContext
296 {
297   unsigned long pc;
298   unsigned long sp;
299   unsigned long fp;
300   unsigned long ln;
301   unsigned long sbase; /* stack boundary */
302   unsigned long tbgn;  /* current memory segment start */
303   unsigned long tend;  /* current memory segment end */
304 };
305 #endif
306 
307 #if defined(DEBUG) && ARCH(Intel)
308 #include <execinfo.h>
309 
310 static void
dump_stack(int nline)311 dump_stack (int nline)
312 {
313   if ((__collector_tracelevel & SP_DUMP_STACK) == 0)
314     return;
315 
316   enum Constexpr { MAX_SIZE = 1024 };
317   void *array[MAX_SIZE];
318   size_t sz = backtrace (array, MAX_SIZE);
319   char **strings = backtrace_symbols (array, sz);
320   DprintfT (SP_DUMP_STACK, "\ndump_stack: %d size=%d\n", nline, (int) sz);
321   for (int i = 0; i < sz; i++)
322     DprintfT (SP_DUMP_STACK, "  %3d:  %p %s\n", i, array[i],
323 	     strings[i] ? strings[i] : "???");
324 }
325 
326 #define dump_targets(nline, ntrg, targets) \
327     if ((__collector_tracelevel & SP_DUMP_UNWIND) != 0) \
328 	for(int i = 0; i < ntrg; i++) \
329 	     DprintfT (SP_DUMP_UNWIND, "  %2d: 0x%lx\n", i, (long) targets[i])
330 #else
331 #define dump_stack(x)
332 #define dump_targets(nline, ntrg, targets)
333 #endif
334 
335 void
__collector_ext_unwind_key_init(int isPthread,void * stack)336 __collector_ext_unwind_key_init (int isPthread, void * stack)
337 {
338   void * ptr = __collector_tsd_get_by_key (unwind_key);
339   if (ptr == NULL)
340     {
341       TprintfT (DBG_LT2, "__collector_ext_unwind_key_init: cannot get tsd\n");
342       return;
343     }
344   if (isPthread)
345     {
346       size_t stack_size = 0;
347       void *stack_addr = 0;
348       pthread_t pthread = pthread_self ();
349       pthread_attr_t attr;
350       int err = pthread_getattr_np (pthread, &attr);
351       TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: pthread: 0x%lx err: %d\n", pthread, err);
352       if (err == 0)
353 	{
354 	  err = pthread_attr_getstack (&attr, &stack_addr, &stack_size);
355 	  if (err == 0)
356 	    stack_addr = (char*) stack_addr + stack_size;
357 	  TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: stack_size=0x%lx eos=%p err=%d\n",
358 		    (long) stack_size, stack_addr, err);
359 	  err = pthread_attr_destroy (&attr);
360 	  TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: destroy: %d\n", err);
361 	}
362       *(void**) ptr = stack_addr;
363     }
364   else
365     *(void**) ptr = stack;  // cloned thread
366 }
367 
368 void
__collector_ext_unwind_init(int record)369 __collector_ext_unwind_init (int record)
370 {
371   int sz = UIDTableSize * sizeof (*UIDTable);
372   UIDTable = (uint64_t*) __collector_allocCSize (__collector_heap, sz, 1);
373   if (UIDTable == NULL)
374     {
375       __collector_terminate_expt ();
376       return;
377     }
378   CALL_UTIL (memset)((void*) UIDTable, 0, sz);
379 
380   char *str = CALL_UTIL (getenv)("GPROFNG_JAVA_MAX_CALL_STACK_DEPTH");
381   if (str != NULL && *str != 0)
382     {
383       char *endptr;
384       int n = CALL_UTIL (strtol)(str, &endptr, 0);
385       if (endptr != str && n >= 0)
386 	{
387 	  if (n < 5)
388 	    n = 5;
389 	  if (n > MAX_STACKDEPTH)
390 	    n = MAX_STACKDEPTH;
391 	  max_java_nframes = n;
392 	}
393     }
394 
395   str = CALL_UTIL (getenv)("GPROFNG_MAX_CALL_STACK_DEPTH");
396   if (str != NULL && *str != 0)
397     {
398       char *endptr = str;
399       int n = CALL_UTIL (strtol)(str, &endptr, 0);
400       if (endptr != str && n >= 0)
401 	{
402 	  if (n < 5)
403 	    n = 5;
404 	  if (n > MAX_STACKDEPTH)
405 	    n = MAX_STACKDEPTH;
406 	  max_native_nframes = n;
407 	}
408     }
409 
410   TprintfT (DBG_LT0, "GPROFNG_MAX_CALL_STACK_DEPTH=%d  GPROFNG_JAVA_MAX_CALL_STACK_DEPTH=%d\n",
411 	    max_native_nframes, max_java_nframes);
412   omp_no_walk = 1;
413 
414   if (__collector_VM_ReadByteInstruction == NULL)
415     __collector_VM_ReadByteInstruction = (int(*)()) dlsym (RTLD_DEFAULT, "Async_VM_ReadByteInstruction");
416 
417 #if ARCH(SPARC)
418 #if WSIZE(64)
419   misalign_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler");
420   misalign_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler_end");
421   if (misalign_hdlx == 0)
422     misalign_hdlx = misalign_hdl + 292;
423   barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_");
424   barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_");
425   if (barrier_hdlx == 0)
426     barrier_hdl = 0;
427 #else
428   barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_");
429   barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_");
430   if (barrier_hdlx == 0)
431     barrier_hdl = 0;
432 #endif /* WSIZE() */
433 
434 #elif ARCH(Intel)
435   sz = ValTableSize * sizeof (*AddrTable_RA_FROMFP);
436   AddrTable_RA_FROMFP = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
437   sz = ValTableSize * sizeof (*AddrTable_RA_EOSTCK);
438   AddrTable_RA_EOSTCK = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
439   if (omp_no_walk && (__collector_omp_stack_trace != NULL || __collector_mpi_stack_trace != NULL))
440     {
441       sz = OmpValTableSize * sizeof (*OmpCurCtxs);
442       OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
443       sz = OmpValTableSize * sizeof (*OmpCtxs);
444       OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
445       sz = OmpValTableSize * sizeof (*OmpVals);
446       OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1);
447       sz = OmpValTableSize * sizeof (*OmpRAs);
448       OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
449       if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
450 	{
451 	  TprintfT (0, "unwind_init() ERROR: failed; terminating experiment\n");
452 	  __collector_terminate_expt ();
453 	  return;
454 	}
455     }
456 #endif /* ARCH() */
457 
458   if (record)
459     {
460       dhndl = __collector_create_handle (SP_FRINFO_FILE);
461       __collector_log_write ("<%s name=\"%s\" format=\"binary\"/>\n", SP_TAG_DATAPTR, SP_FRINFO_FILE);
462     }
463 
464   unwind_key = __collector_tsd_create_key (sizeof (void*), NULL, NULL);
465   if (unwind_key == COLLECTOR_TSD_INVALID_KEY)
466     {
467       TprintfT (0, "unwind_init: ERROR: TSD key create failed.\n");
468       __collector_log_write ("<%s kind=\"%s\" id=\"%d\">TSD key not created</%s>\n",
469 			     SP_TAG_EVENT, SP_JCMD_CERROR, COL_ERROR_GENERAL, SP_TAG_EVENT);
470       return;
471     }
472   TprintfT (0, "unwind_init() completed normally\n");
473   return;
474 }
475 
476 void
__collector_ext_unwind_close()477 __collector_ext_unwind_close ()
478 {
479   __collector_delete_handle (dhndl);
480   dhndl = NULL;
481 }
482 
483 void*
__collector_ext_return_address(unsigned level)484 __collector_ext_return_address (unsigned level)
485 {
486   if (NULL == UIDTable)  //unwind not initialized yet
487     return NULL;
488   unsigned size = (level + 4) * sizeof (long); // need to strip __collector_get_return_address and its caller
489   ucontext_t context;
490   FILL_CONTEXT ((&context));
491   char* buf = (char*) alloca (size);
492   if (buf == NULL)
493     {
494       TprintfT (DBG_LT0, "__collector_get_return_address: ERROR: alloca(%d) fails\n", size);
495       return NULL;
496     }
497   int sz = stack_unwind (buf, size, NULL, NULL, &context, 0);
498   if (sz < (level + 3) * sizeof (long))
499     {
500       TprintfT (DBG_LT0, "__collector_get_return_address: size=%d, but stack_unwind returns %d\n", size, sz);
501       return NULL;
502     }
503   long *lbuf = (long*) buf;
504   TprintfT (DBG_LT2, "__collector_get_return_address: return %lx\n", lbuf[level + 2]);
505   return (void *) (lbuf[level + 2]);
506 }
507 /*
508  *  Collector interface method getFrameInfo
509  */
510 FrameInfo
__collector_get_frame_info(hrtime_t ts,int mode,void * arg)511 __collector_get_frame_info (hrtime_t ts, int mode, void *arg)
512 {
513   ucontext_t *context = NULL;
514   void *bptr = NULL;
515   CM_Array *array = NULL;
516 
517   int unwind_mode = 0;
518   int do_walk = 1;
519 
520   if (mode & FRINFO_NO_WALK)
521     do_walk = 0;
522   int bmode = mode & 0xffff;
523   int pseudo_context = 0;
524   if (bmode == FRINFO_FROM_STACK_ARG || bmode == FRINFO_FROM_STACK)
525     {
526       bptr = arg;
527       context = (ucontext_t*) alloca (sizeof (ucontext_t));
528       FILL_CONTEXT (context);
529       unwind_mode |= bmode;
530     }
531   else if (bmode == FRINFO_FROM_UC)
532     {
533       context = (ucontext_t*) arg;
534       if (context == NULL)
535 	return (FrameInfo) 0;
536       if (GET_SP (context) == 0)
537 	pseudo_context = 1;
538     }
539   else if (bmode == FRINFO_FROM_ARRAY)
540     {
541       array = (CM_Array*) arg;
542       if (array == NULL || array->length <= 0)
543 	return (FrameInfo) 0;
544     }
545   else
546     return (FrameInfo) 0;
547 
548   int max_frame_size = OVERHEAD_BYTES + NATIVE_FRAME_BYTES (max_native_nframes);
549   if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context)
550     max_frame_size += JAVA_FRAME_BYTES (max_java_nframes);
551 
552   Frame_packet *frpckt = alloca (sizeof (Frame_packet) + max_frame_size);
553   frpckt->type = FRAME_PCKT;
554   frpckt->hsize = sizeof (Frame_packet);
555 
556   char *d = (char*) (frpckt + 1);
557   int size = max_frame_size;
558 
559 #define MIN(a,b) ((a)<(b)?(a):(b))
560   /* get Java info */
561   if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context)
562     {
563       /* use only 2/3 of the buffer and leave the rest for the native stack */
564       int tmpsz = MIN (size, JAVA_FRAME_BYTES (max_java_nframes));
565       if (tmpsz > 0)
566 	{
567 	  int sz = __collector_ext_jstack_unwind (d, tmpsz, context);
568 	  d += sz;
569 	  size -= sz;
570 	}
571     }
572 
573   /* get native stack */
574   if (context)
575     {
576       Stack_info *sinfo = (Stack_info*) d;
577       int sz = sizeof (Stack_info);
578       d += sz;
579       size -= sz;
580 #if ARCH(Intel)
581       if (omp_no_walk == 0)
582 	do_walk = 1;
583 #endif
584       if (do_walk == 0)
585 	unwind_mode |= FRINFO_NO_WALK;
586 
587       int tmpsz = MIN (size, NATIVE_FRAME_BYTES (max_native_nframes));
588       if (tmpsz > 0)
589 	{
590 	  sz = stack_unwind (d, tmpsz, bptr, NULL, context, unwind_mode);
591 	  d += sz;
592 	  size -= sz;
593 	}
594       sinfo->kind = STACK_INFO;
595       sinfo->hsize = (d - (char*) sinfo);
596     }
597 
598   /* create a stack image from user data */
599   if (array && array->length > 0)
600     {
601       Stack_info *sinfo = (Stack_info*) d;
602       int sz = sizeof (Stack_info);
603       d += sz;
604       size -= sz;
605       sz = array->length;
606       if (sz > size)
607 	sz = size;  // YXXX should we mark this with truncation frame?
608       __collector_memcpy (d, array->bytes, sz);
609       d += sz;
610       size -= sz;
611       sinfo->kind = STACK_INFO;
612       sinfo->hsize = (d - (char*) sinfo);
613     }
614 
615   /* Compute the total size */
616   frpckt->tsize = d - (char*) frpckt;
617   FrameInfo uid = compute_uid (frpckt);
618   return uid;
619 }
620 
621 FrameInfo
compute_uid(Frame_packet * frp)622 compute_uid (Frame_packet *frp)
623 {
624   uint64_t idxs[LAST_INFO];
625   uint64_t uid = ROOT_UID;
626   uint64_t idx = ROOT_IDX;
627 
628   Common_info *cinfo = (Common_info*) ((char*) frp + frp->hsize);
629   char *end = (char*) frp + frp->tsize;
630   for (;;)
631     {
632       if ((char*) cinfo >= end || cinfo->hsize == 0 ||
633 	  (char*) cinfo + cinfo->hsize > end)
634 	break;
635 
636       /* Start with a different value to avoid matching with uid */
637       uint64_t uidt = 1;
638       uint64_t idxt = 1;
639       long *ptr = (long*) ((char*) cinfo + cinfo->hsize);
640       long *bnd = (long*) ((char*) cinfo + sizeof (Common_info));
641       TprintfT (DBG_LT2, "compute_uid: Cnt=%ld: ", (long) cinfo->hsize);
642       while (ptr > bnd)
643 	{
644 	  long val = *(--ptr);
645 	  tprintf (DBG_LT2, "0x%8.8llx ", (unsigned long long) val);
646 	  uidt = (uidt + val) * ROOT_UID;
647 	  idxt = (idxt + val) * ROOT_IDX;
648 	  uid = (uid + val) * ROOT_UID;
649 	  idx = (idx + val) * ROOT_IDX;
650 	}
651       if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO)
652 	{
653 	  cinfo->uid = uidt;
654 	  idxs[cinfo->kind] = idxt;
655 	}
656       cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize);
657     }
658   tprintf (DBG_LT2, "\n");
659 
660   /* Check if we have already recorded that uid.
661    * The following fragment contains benign data races.
662    * It's important, though, that all reads from UIDTable
663    * happen before writes.
664    */
665   int found1 = 0;
666   int idx1 = (int) ((idx >> 44) % UIDTableSize);
667   if (UIDTable[idx1] == uid)
668     found1 = 1;
669   int found2 = 0;
670   int idx2 = (int) ((idx >> 24) % UIDTableSize);
671   if (UIDTable[idx2] == uid)
672     found2 = 1;
673   int found3 = 0;
674   int idx3 = (int) ((idx >> 4) % UIDTableSize);
675   if (UIDTable[idx3] == uid)
676     found3 = 1;
677   if (!found1)
678     UIDTable[idx1] = uid;
679   if (!found2)
680     UIDTable[idx2] = uid;
681   if (!found3)
682     UIDTable[idx3] = uid;
683 
684   if (found1 || found2 || found3)
685     return (FrameInfo) uid;
686   frp->uid = uid;
687 
688   /* Compress info's */
689   cinfo = (Common_info*) ((char*) frp + frp->hsize);
690   for (;;)
691     {
692       if ((char*) cinfo >= end || cinfo->hsize == 0 ||
693 	  (char*) cinfo + cinfo->hsize > end)
694 	break;
695       if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO)
696 	{
697 	  long *ptr = (long*) ((char*) cinfo + sizeof (Common_info));
698 	  long *bnd = (long*) ((char*) cinfo + cinfo->hsize);
699 	  uint64_t uidt = cinfo->uid;
700 	  uint64_t idxt = idxs[cinfo->kind];
701 	  int found = 0;
702 	  int first = 1;
703 	  while (ptr < bnd - 1)
704 	    {
705 	      int idx1 = (int) ((idxt >> 44) % UIDTableSize);
706 	      if (UIDTable[idx1] == uidt)
707 		{
708 		  found = 1;
709 		  break;
710 		}
711 	      else if (first)
712 		{
713 		  first = 0;
714 		  UIDTable[idx1] = uidt;
715 		}
716 	      long val = *ptr++;
717 	      uidt = uidt * ROOT_UID_INV - val;
718 	      idxt = idxt * ROOT_IDX_INV - val;
719 	    }
720 	  if (found)
721 	    {
722 	      char *d = (char*) ptr;
723 	      char *s = (char*) bnd;
724 	      if (!first)
725 		{
726 		  int i;
727 		  for (i = 0; i<sizeof (uidt); i++)
728 		    {
729 		      *d++ = (char) uidt;
730 		      uidt = uidt >> 8;
731 		    }
732 		}
733 	      int delta = s - d;
734 	      while (s < end)
735 		*d++ = *s++;
736 	      cinfo->kind |= COMPRESSED_INFO;
737 	      cinfo->hsize -= delta;
738 	      frp->tsize -= delta;
739 	      end -= delta;
740 	    }
741 	}
742       cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize);
743     }
744   __collector_write_packet (dhndl, (CM_Packet*) frp);
745   return (FrameInfo) uid;
746 }
747 
748 FrameInfo
__collector_getUID(CM_Array * arg,FrameInfo suid)749 __collector_getUID (CM_Array *arg, FrameInfo suid)
750 {
751   if (arg->length % sizeof (long) != 0 ||
752       (long) arg->bytes % sizeof (long) != 0)
753     return (FrameInfo) - 1;
754   if (arg->length == 0)
755     return suid;
756 
757   uint64_t uid = suid ? suid : 1;
758   uint64_t idx = suid ? suid : 1;
759   long *ptr = (long*) ((char*) arg->bytes + arg->length);
760   long *bnd = (long*) (arg->bytes);
761   while (ptr > bnd)
762     {
763       long val = *(--ptr);
764       uid = (uid + val) * ROOT_UID;
765       idx = (idx + val) * ROOT_IDX;
766     }
767 
768   /* Check if we have already recorded that uid.
769    * The following fragment contains benign data races.
770    * It's important, though, that all reads from UIDTable
771    * happen before writes.
772    */
773   int found1 = 0;
774   int idx1 = (int) ((idx >> 44) % UIDTableSize);
775   if (UIDTable[idx1] == uid)
776     found1 = 1;
777   int found2 = 0;
778   int idx2 = (int) ((idx >> 24) % UIDTableSize);
779   if (UIDTable[idx2] == uid)
780     found2 = 1;
781   int found3 = 0;
782   int idx3 = (int) ((idx >> 4) % UIDTableSize);
783   if (UIDTable[idx3] == uid)
784     found3 = 1;
785 
786   if (!found1)
787     UIDTable[idx1] = uid;
788   if (!found2)
789     UIDTable[idx2] = uid;
790   if (!found3)
791     UIDTable[idx3] = uid;
792   if (found1 || found2 || found3)
793     return (FrameInfo) uid;
794 
795   int sz = sizeof (Uid_packet) + arg->length;
796   if (suid)
797     sz += sizeof (suid);
798   Uid_packet *uidp = alloca (sz);
799   uidp->tsize = sz;
800   uidp->type = UID_PCKT;
801   uidp->flags = 0;
802   uidp->uid = uid;
803 
804   /* Compress */
805   ptr = (long*) (arg->bytes);
806   bnd = (long*) ((char*) arg->bytes + arg->length);
807   long *dst = (long*) (uidp + 1);
808   uint64_t uidt = uid;
809   uint64_t idxt = idx;
810   uint64_t luid = suid; /* link uid */
811 
812   while (ptr < bnd)
813     {
814 
815       long val = *ptr++;
816       *dst++ = val;
817 
818       if ((bnd - ptr) > sizeof (uidt))
819 	{
820 	  uidt = uidt * ROOT_UID_INV - val;
821 	  idxt = idxt * ROOT_IDX_INV - val;
822 	  int idx1 = (int) ((idxt >> 44) % UIDTableSize);
823 	  if (UIDTable[idx1] == uidt)
824 	    {
825 	      luid = uidt;
826 	      break;
827 	    }
828 	}
829     }
830   if (luid)
831     {
832       char *d = (char*) dst;
833       for (int i = 0; i<sizeof (luid); i++)
834 	{
835 	  *d++ = (char) luid;
836 	  luid = luid >> 8;
837 	}
838       uidp->flags |= COMPRESSED_INFO;
839       uidp->tsize = d - (char*) uidp;
840     }
841   __collector_write_packet (dhndl, (CM_Packet*) uidp);
842 
843   return (FrameInfo) uid;
844 }
845 
846 int
__collector_getStackTrace(void * buf,int size,void * bptr,void * eptr,void * arg)847 __collector_getStackTrace (void *buf, int size, void *bptr, void *eptr, void *arg)
848 {
849   if (arg == (void*) __collector_omp_stack_trace)
850     seen_omp = 1;
851   int do_walk = 1;
852   if (arg == NULL || arg == (void*) __collector_omp_stack_trace)
853     {
854       do_walk = (arg == (void*) __collector_omp_stack_trace && omp_no_walk) ? 0 : 1;
855       ucontext_t *context = (ucontext_t*) alloca (sizeof (ucontext_t));
856       FILL_CONTEXT (context);
857       arg = context;
858     }
859   int unwind_mode = 0;
860   if (do_walk == 0)
861     unwind_mode |= FRINFO_NO_WALK;
862   return stack_unwind (buf, size, bptr, eptr, arg, unwind_mode);
863 }
864 
865 #if ARCH(SPARC)
866 /*
867  * These are important data structures taken from the header files reg.h and
868  * ucontext.h. They are used for the stack trace algorithm explained below.
869  *
870  *	typedef struct ucontext {
871  * 		u_long		uc_flags;
872  * 		struct ucontext	*uc_link;
873  * 		usigset_t   	uc_sigmask;
874  * 		stack_t 	uc_stack;
875  * 		mcontext_t 	uc_mcontext;
876  * 		long		uc_filler[23];
877  * 	} ucontext_t;
878  *
879  *	#define	SPARC_MAXREGWINDOW	31
880  *
881  *	struct	rwindow {
882  *		greg_t	rw_local[8];
883  *		greg_t	rw_in[8];
884  *	};
885  *
886  *	#define	rw_fp	rw_in[6]
887  *	#define	rw_rtn	rw_in[7]
888  *
889  *	struct gwindows {
890  *		int		wbcnt;
891  *		int		*spbuf[SPARC_MAXREGWINDOW];
892  *		struct rwindow	wbuf[SPARC_MAXREGWINDOW];
893  *	};
894  *
895  *	typedef struct gwindows	gwindows_t;
896  *
897  *	typedef struct {
898  *		gregset_t	gregs;
899  *		gwindows_t	*gwins;
900  *		fpregset_t	fpregs;
901  *		long		filler[21];
902  *	} mcontext_t;
903  *
904  * The stack would look like this when SIGPROF occurrs.
905  *
906  *	------------------------- <- high memory
907  *	|			|
908  *	|			|
909  *	-------------------------
910  *	|			|
911  *	------------------------- <- fp' <-|
912  *	|			|	   |
913  *		:	:	 	   |
914  *	|			|	   |
915  *	-------------------------	   |
916  *	|	fp		|----------|
917  *	|			|
918  *	------------------------- <- sp'
919  *	|			|		             |	|
920  *	| 	gwins		| <- saved stack pointers &  |  |
921  *	|			|    register windows	     |  |- mcontext
922  *	-------------------------			     |  |
923  *	|	gregs		| <- saved registers	     |  |
924  *	-------------------------			     |
925  *	|			|			     |- ucontext
926  *	------------------------- <- ucp (ucontext pointer)  |
927  *	|			|				|
928  *	|			|				|- siginfo
929  *	------------------------- <- sip (siginfo pointer)	|
930  *	|			|
931  *	------------------------- <- sp
932  *
933  * Then the signal handler is called with:
934  *	handler( signo, sip, uip );
935  * When gwins is null, all the stack frames are saved in the user stack.
936  * In that case we can find sp' from gregs and walk the stack for a backtrace.
937  * However, if gwins is not null we will have a more complicated case.
938  * Wbcnt(in gwins) tells you how many saved register windows are valid.
939  * This is important because the kernel does not allocate the entire array.
940  * And the top most frame is saved in the lowest index element. The next
941  * paragraph explains the possible causes.
942  *
943  * There are two routines in the kernel to flush out user register windows.
944  *	flush_user_windows and flush_user_windows_to_stack
945  * The first routine will not cause a page fault. Therefore if the user
946  * stack is not in memory, the register windows will be saved to the pcb.
947  * This can happen when the kernel is trying to deliver a signal and
948  * the user stack got swap out. The kernel will then build a new context for
949  * the signal handler and the saved register windows will
950  * be copied to the ucontext as show above. On the other hand,
951  * flush_user_windows_to_stack can cause a page fault, and if it failed
952  * then there is something wrong (stack overflow, misalign).
953  * The first saved register window does not necessary correspond to the
954  * first stack frame. So the current stack pointer must be compare with
955  * the stack pointers in spbuf to find a match.
956  *
957  * We will also follow the uc_link field in ucontext to trace also nested
958  * signal stack frames.
959  *
960  */
961 
962 /* Dealing with trap handlers.
963  * When a user defined trap handler is invoked the return address
964  * (or actually the address of an instruction that raised the trap)
965  * is passed to the trap handler in %l6, whereas saved %o7 contains
966  * garbage. First, we need to find out if a particular pc belongs
967  * to the trap handler, and if so, take the %l6 value from the stack rather
968  * than %o7 from either the stack or the register.
969  * There are three possible situations represented
970  * by the following stacks:
971  *
972  *   MARKER		MARKER			MARKER
973  *   trap handler pc	__func pc before 'save'	__func pc after 'save'
974  *   %l6		%o7 from reg		%o7 (garbage)
975  *   ...		%l6			trap handler pc
976  *			...			%l6
977  *						...
978  * where __func is a function called from the trap handler.
979  *
980  * Currently this is implemented to only deal with __misalign_trap_handler
981  * set for v9 FORTRAN applications. Implementation of IN_TRAP_HANDLER
982  * macro shows it. A general solution is postponed.
983  */
984 
985 /* Special handling of unwind through the parallel loop barrier code:
986  *
987  *  The library defines two symbols, __mt_EndOfTask_Barrier_ and
988  *	__mt_EndOfTask_Barrier_Dummy_ representing the first word of
989  *	the barrier sychronization code, and the first word following
990  *	it.  Whenever the leaf PC is between these two symbols,
991  *	the unwind code is special-cased as follows:
992  *	The __mt_EndOfTask_Barrier_ function is guaranteed to be a leaf
993  *	function, so its return address is in a register, not saved on
994  *	the stack.
995  *
996  *    MARKER
997  *    __mt_EndOfTask_Barrier_ PC -- the leaf PC
998  *    loop body function address for the task -- implied caller of __mt_EndOfTask_Barrier_
999  *	    this address is taken from the %O0 register
1000  *    {mt_master or mt_slave} -- real caller of __mt_EndOfTask_Barrier_
1001  *     ...
1002  *
1003  *  With this trick, the analyzer will show the time in the barrier
1004  *	attributed to the loop at the end of which the barrier synchronization
1005  *	is taking place.  That loop body routine, will be shown as called
1006  *	from the function from which it was extracted, which will be shown
1007  *	as called from the real caller, either the slave or master library routine.
1008  */
1009 
1010 /*
1011  * These no-fault-load (0x82) assembly functions are courtesy of Rob Gardner.
1012  *
1013  * Note that 0x82 is ASI_PNF.  See
1014  *   http://lxr.free-electrons.com/source/arch/sparc/include/uapi/asm/asi.h#L134
1015  *   ASI  address space identifier; PNF  primary no fault
1016  */
1017 
1018 /* load an int from an address */
1019 
1020 /* if the address is illegal, return a 0 */
1021 static int
SPARC_no_fault_load_int(void * addr)1022 SPARC_no_fault_load_int (void *addr)
1023 {
1024   int val;
1025   __asm__ __volatile__(
1026 		       "lda [%1] 0x82, %0\n\t"
1027 		       : "=r" (val)
1028 		       : "r" (addr)
1029 		       );
1030 
1031   return val;
1032 }
1033 
1034 /* check if an address is invalid
1035  *
1036  * A no-fault load of an illegal address still faults, but it does so silently to the calling process.
1037  * It returns a 0, but so could a load of a legal address.
1038  * So, we time the load.  A "fast" load must be a successful load.
1039  * A "slow" load is probably a fault.
1040  * Since it could also be a cache/TLB miss or other abnormality,
1041  * it's safest to retry a slow load.
1042  * The cost of trying a valid address should be some nanosecs.
1043  * The cost of trying an invalid address up to 10 times could be some microsecs.
1044  */
1045 #if 0
1046 static
1047 int invalid_SPARC_addr(void *addr)
1048 {
1049     long t1, t2;
1050     int i;
1051 
1052     for (i=0; i<10; i++) {
1053       __asm__ __volatile__(
1054 	"rd %%tick, %0\n\t"
1055 	"lduba [%2] 0x82, %%g0\n\t"
1056 	"rd %%tick, %1\n\t"
1057 	: "=r" (t1), "=r" (t2)
1058 	: "r" (addr) );
1059       if ( (t2 - t1) < 100 )
1060 	return 0;
1061     }
1062     return 1;
1063 }
1064 #endif
1065 
1066 /*
1067  * The standard SPARC procedure-calling convention is that the
1068  * calling PC (for determining the return address when the procedure
1069  * is finished) is placed in register %o7.  A called procedure
1070  * typically executes a "save" instruction that shifts the register
1071  * window, and %o7 becomes %i7.
1072  *
1073  * Optimized leaf procedures do not shift the register window.
1074  * They assume the return address will remain %o7.  So when
1075  * we process a leaf PC, we walk instructions to see if there
1076  * is a call, restore, or other instruction that would indicate
1077  * we can IGNORE %o7 because this is NOT a leaf procedure.
1078  *
1079  * If a limited instruction walk uncovers no such hint, we save
1080  * not only the PC but the %o7 value as well... just to be safe.
1081  * Later, in DBE post-processing of the call stacks, we decide
1082  * whether any recorded %o7 value should be used as a caller
1083  * frame or should be discarded.
1084  */
1085 
1086 #define IS_ILLTRAP(x) (((x) & 0xc1c00000) == 0)
1087 #define IS_SAVE(x)    (((x) & 0xc1f80000) == 0x81e00000)
1088 #define IS_MOVO7R(x)  (((x) & 0xc1f8201f) == 0x8160000f)
1089 #define IS_MOVRO7(x)  (((x) & 0xfff82000) == 0x9f600000)
1090 #define IS_ORRG0O7(x) (((x) & 0xff78201f) == 0x9e100000)
1091 #define IS_ORG0RO7(x) (((x) & 0xff7fe000) == 0x9e100000)
1092 #define IS_ORG0O7R(x) (((x) & 0xc17fe01f) == 0x8010000f)
1093 #define IS_ORO7G0R(x) (((x) & 0xc17fe01f) == 0x8013c000)
1094 #define IS_RESTORE(x) (((x) & 0xc1f80000) == 0x81e80000)
1095 #define IS_RET(x)     ((x) == 0x81c7e008)
1096 #define IS_RETL(x)    ((x) == 0x81c3e008)
1097 #define IS_RETURN(x)  (((x) & 0xc1f80000) == 0x81c80000)
1098 #define IS_BRANCH(x)  ((((x) & 0xc0000000) == 0) && (((x) & 0x01c00000) != 0x01000000))
1099 #define IS_CALL(x)    (((x) & 0xc0000000) == 0x40000000)
1100 #define IS_LDO7(x)    (((x) & 0xfff80000) == 0xde000000)
1101 
1102 static long pagesize = 0;
1103 
1104 static int
process_leaf(long * lbuf,int ind,int lsize,void * context)1105 process_leaf (long *lbuf, int ind, int lsize, void *context)
1106 {
1107   greg_t pc = GET_PC (context);
1108   greg_t o7 = GET_GREG (context, REG_O7);
1109 
1110   /* omazur: TBR START -- not used */
1111   if (IN_BARRIER (pc))
1112     {
1113       if (ind < lsize)
1114 	lbuf[ind++] = pc;
1115       if (ind < lsize)
1116 	lbuf[ind++] = GET_GREG (context, REG_O0);
1117       return ind;
1118     }
1119   /* omazur: TBR END */
1120 #if WSIZE(64)
1121   if (IN_TRAP_HANDLER (pc))
1122     {
1123       if (ind < lsize)
1124 	lbuf[ind++] = pc;
1125       return ind;
1126     }
1127 #endif
1128   unsigned *instrp = (unsigned *) pc;
1129   unsigned *end_addr = instrp + 20;
1130   while (instrp < end_addr)
1131     {
1132       unsigned instr = *instrp++;
1133       if (IS_ILLTRAP (instr))
1134 	break;
1135       else if (IS_SAVE (instr))
1136 	{
1137 	  if (ind < lsize)
1138 	    lbuf[ind++] = pc;
1139 	  if (o7 && ind < lsize)
1140 	    lbuf[ind++] = o7;
1141 	  return ind;
1142 	}
1143       else if (IS_MOVO7R (instr) || IS_ORG0O7R (instr) || IS_ORO7G0R (instr))
1144 	break;
1145       else if (IS_MOVRO7 (instr) || IS_ORG0RO7 (instr))
1146 	{
1147 	  int rs2 = (instr & 0x1f) + REG_G1 - 1;
1148 	  o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0;
1149 	  break;
1150 	}
1151       else if (IS_ORRG0O7 (instr))
1152 	{
1153 	  int rs2 = ((instr & 0x7c000) >> 14) + REG_G1 - 1;
1154 	  o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0;
1155 	  break;
1156 	}
1157       else if (IS_RESTORE (instr))
1158 	{
1159 	  o7 = 0;
1160 	  break;
1161 	}
1162       else if (IS_RETURN (instr))
1163 	{
1164 	  o7 = 0;
1165 	  break;
1166 	}
1167       else if (IS_RET (instr))
1168 	{
1169 	  o7 = 0;
1170 	  break;
1171 	}
1172       else if (IS_RETL (instr))
1173 	{
1174 	  /* process delay slot */
1175 	  instr = *instrp++;
1176 	  if (IS_RESTORE (instr))
1177 	    o7 = 0;
1178 	  break;
1179 	}
1180       else if (IS_BRANCH (instr))
1181 	{
1182 	  unsigned *backbegin = ((unsigned *) pc - 1);
1183 	  unsigned *backend = backbegin - 12 + (instrp - (unsigned *) pc);
1184 	  while (backbegin > backend)
1185 	    {
1186 	      // 21920143 stack unwind: SPARC process_leaf backtracks too far
1187 	      /*
1188 	       * We've already dereferenced backbegin+1.
1189 	       * So if backbegin is on the same page, we're fine.
1190 	       * If we've gone to a different page, possibly things are not fine.
1191 	       * We don't really know how to test that.
1192 	       * Let's just assume the worst:  that dereferencing backbegin would segv.
1193 	       * We won't know if we're in a leaf function or not.
1194 	       */
1195 	      if (pagesize == 0)
1196 		pagesize = CALL_UTIL (sysconf)(_SC_PAGESIZE);
1197 	      if ((((long) (backbegin + 1)) & (pagesize - 1)) < sizeof (unsigned*))
1198 		break;
1199 	      unsigned backinstr = *backbegin--;
1200 	      if (IS_LDO7 (backinstr))
1201 		{
1202 		  o7 = 0;
1203 		  break;
1204 		}
1205 	      else if (IS_ILLTRAP (backinstr))
1206 		break;
1207 	      else if (IS_RETURN (backinstr))
1208 		break;
1209 	      else if (IS_RET (backinstr))
1210 		break;
1211 	      else if (IS_RETL (backinstr))
1212 		break;
1213 	      else if (IS_CALL (backinstr))
1214 		break;
1215 	      else if (IS_SAVE (backinstr))
1216 		{
1217 		  o7 = 0;
1218 		  break;
1219 		}
1220 	    }
1221 	  break;
1222 	}
1223       else if (IS_CALL (instr))
1224 	o7 = 0;
1225     }
1226 
1227 #if WSIZE(64)
1228   if (o7 != 0 && ((long) o7) < 32 && ((long) o7) > -32)
1229     {
1230       /* 20924821 SEGV in unwind code on SPARC/Linux
1231        * We've seen this condition in some SPARC-Linux runs.
1232        * o7 is non-zero but not a valid address.
1233        * Values like 4 or -7 have been seen.
1234        * Let's check if o7 is unreasonably small.
1235        * If so, set to 0 so that it won't be recorded.
1236        * Otherwise, there is risk of it being dereferenced in process_sigreturn().
1237        */
1238       // __collector_log_write("<event kind=\"%s\" id=\"%d\">time %lld, internal debug unwind at leaf; o7 = %ld, pc = %x</event>\n",
1239       //       SP_JCMD_COMMENT, COL_COMMENT_NONE, __collector_gethrtime() - __collector_start_time, (long) o7, pc );
1240       o7 = 0;
1241     }
1242 #endif
1243 
1244   if (o7)
1245     {
1246       if (ind < lsize)
1247 	lbuf[ind++] = SP_LEAF_CHECK_MARKER;
1248       if (ind < lsize)
1249 	lbuf[ind++] = pc;
1250       if (ind < lsize)
1251 	lbuf[ind++] = o7;
1252     }
1253   else if (ind < lsize)
1254     lbuf[ind++] = pc;
1255   return ind;
1256 }
1257 
1258 #if WSIZE(64)
1259 // detect signal handler
1260 static int
process_sigreturn(long * lbuf,int ind,int lsize,unsigned char * tpc,struct frame ** pfp,void * bptr,int extra_frame)1261 process_sigreturn (long *lbuf, int ind, int lsize, unsigned char * tpc,
1262 		   struct frame **pfp, void * bptr, int extra_frame)
1263 {
1264   // cheap checks whether tpc is obviously not an instruction address
1265   if ((4096 > (unsigned long) tpc) // the first page is off limits
1266       || (3 & (unsigned long) tpc))
1267     return ind;  // the address is not aligned
1268 
1269   // get the instruction at tpc, skipping over as many as 7 nop's (0x01000000)
1270   int insn, i;
1271   for (i = 0; i < 7; i++)
1272     {
1273       insn = SPARC_no_fault_load_int ((void *) tpc);
1274       if (insn != 0x01000000)
1275 	break;
1276       tpc += 4;
1277     }
1278 
1279   // we're not expecting 0 (and it could mean an illegal address)
1280   if (insn == 0)
1281     return ind;
1282 
1283   // We are looking for __rt_sigreturn_stub with the instruction
1284   //     0x82102065 : mov 0x65 /* __NR_rt_sigreturn */, %g1
1285   if (insn == 0x82102065)
1286     {
1287       /*
1288        * according to linux kernel source code,
1289        * syscall(_NR_rt_sigreturn) uses the following data in stack:
1290        * struct rt_signal_frame {
1291        *     struct sparc_stackf     ss;
1292        *     siginfo_t               info;
1293        *     struct pt_regs          regs;
1294        *     ....};
1295        * sizeof(struct sparc_stackf) is 192;
1296        * sizeof(siginfo_t) is 128;
1297        * we need to get the register values from regs, which is defined as:
1298        * struct pt_regs {
1299        *     unsigned long u_regs[16];
1300        *     unsigned long tstate;
1301        *     unsigned long tpc;
1302        *     unsigned long tnpc;
1303        *     ....};
1304        * pc and fp register has offset of 120 and 112;
1305        * the pc of kill() is stored in tnpc, whose offest is 136.
1306        */
1307       greg_t pc = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 136));
1308       greg_t pc1 = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 120));
1309       (*pfp) = *((struct frame**) ((char*) ((*pfp)) + 192 + 128 + 112));
1310       if (pc && pc1)
1311 	{
1312 	  if (bptr != NULL && extra_frame && ((char*) (*pfp) + STACK_BIAS) < (char*) bptr && ind < 2)
1313 	    {
1314 	      lbuf[0] = pc1;
1315 	      if (ind == 0)
1316 		ind++;
1317 	    }
1318 	  if (bptr == NULL || ((char*) (*pfp) + STACK_BIAS) >= (char*) bptr)
1319 	    {
1320 	      if (ind < lsize)
1321 		lbuf[ind++] = (unsigned long) tpc;
1322 	      if (ind < lsize)
1323 		lbuf[ind++] = pc;
1324 	      if (ind < lsize)
1325 		lbuf[ind++] = pc1;
1326 	    }
1327 	}
1328       DprintfT (SP_DUMP_UNWIND, "unwind.c: resolved sigreturn pc=0x%lx, pc1=0x%lx, fp=0x%lx\n", pc, pc1, *(pfp));
1329     }
1330   return ind;
1331 }
1332 #endif
1333 
1334 /*
1335  * int stack_unwind( char *buf, int size, ucontext_t *context )
1336  *	This routine looks into the mcontext and
1337  *	trace stack frames to record return addresses.
1338  */
1339 int
stack_unwind(char * buf,int size,void * bptr,void * eptr,ucontext_t * context,int mode)1340 stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
1341 {
1342   /*
1343    * trace the stack frames from user stack.
1344    * We are assuming that the frame pointer and return address
1345    * are null when we are at the top level.
1346    */
1347   long *lbuf = (long*) buf;
1348   int lsize = size / sizeof (long);
1349   struct frame *fp = (struct frame *) GET_SP (context); /* frame pointer */
1350   greg_t pc; /* program counter */
1351   int extra_frame = 0;
1352   if ((mode & 0xffff) == FRINFO_FROM_STACK)
1353     extra_frame = 1;
1354 
1355   int ind = 0;
1356   if (bptr == NULL)
1357     ind = process_leaf (lbuf, ind, lsize, context);
1358 
1359   int extra_frame = 0;
1360   if ((mode & 0xffff) == FRINFO_FROM_STACK)
1361     extra_frame = 1;
1362   int ind = 0;
1363   if (bptr == NULL)
1364     ind = process_leaf (lbuf, ind, lsize, context);
1365 
1366   while (fp)
1367     {
1368       if (ind >= lsize)
1369 	break;
1370       fp = (struct frame *) ((char *) fp + STACK_BIAS);
1371       if (eptr && fp >= (struct frame *) eptr)
1372 	{
1373 	  ind = ind >= 2 ? ind - 2 : 0;
1374 	  break;
1375 	}
1376 #if WSIZE(64) // detect signal handler
1377       unsigned char * tpc = ((unsigned char*) (fp->fr_savpc));
1378       struct frame * tfp = (struct frame*) ((char*) (fp->fr_savfp) + STACK_BIAS);
1379       int old_ind = ind;
1380       ind = process_sigreturn (lbuf, old_ind, lsize, tpc, &tfp, bptr, extra_frame);
1381       if (ind != old_ind)
1382 	{
1383 	  pc = (greg_t) tpc;
1384 	  fp = tfp;
1385 	}
1386       else
1387 #endif
1388 	{
1389 #if WSIZE(64)
1390 	  if (IN_TRAP_HANDLER (lbuf[ind - 1]))
1391 	    pc = fp->fr_local[6];
1392 	  else
1393 	    pc = fp->fr_savpc;
1394 #else
1395 	  pc = fp->fr_savpc;
1396 #endif
1397 	  fp = fp->fr_savfp;
1398 	  if (pc)
1399 	    {
1400 	      if (bptr != NULL && extra_frame && ((char*) fp + STACK_BIAS) < (char*) bptr && ind < 2)
1401 		{
1402 		  lbuf[0] = pc;
1403 		  if (ind == 0)
1404 		    ind++;
1405 		}
1406 	      if (bptr == NULL || ((char*) fp + STACK_BIAS) >= (char*) bptr)
1407 		lbuf[ind++] = pc;
1408 	    }
1409 	}
1410 
1411       /* 4616238: _door_return may have a frame that has non-zero
1412        * saved stack pointer and zero pc
1413        */
1414       if (pc == (greg_t) NULL)
1415 	break;
1416     }
1417 
1418   if (ind >= lsize)
1419     { /* truncated stack handling */
1420       ind = lsize - 1;
1421       lbuf[ind++] = SP_TRUNC_STACK_MARKER;
1422     }
1423   return ind * sizeof (long);
1424 }
1425 
1426 #elif ARCH(Intel)
1427 
1428 /* get __NR_<syscall_name> constants */
1429 #include <syscall.h>
1430 
1431 /*
1432  * From uts/intel/ia32/os/sendsig.c:
1433  *
1434  * An amd64 signal frame looks like this on the stack:
1435  *
1436  * old %rsp:
1437  *		<128 bytes of untouched stack space>
1438  *		<a siginfo_t [optional]>
1439  *		<a ucontext_t>
1440  *		<siginfo_t *>
1441  *		<signal number>
1442  * new %rsp:	<return address (deliberately invalid)>
1443  *
1444  * The signal number and siginfo_t pointer are only pushed onto the stack in
1445  * order to allow stack backtraces.  The actual signal handling code expects the
1446  * arguments in registers.
1447  *
1448  * An i386 SVR4/ABI signal frame looks like this on the stack:
1449  *
1450  * old %esp:
1451  *		<a siginfo32_t [optional]>
1452  *		<a ucontext32_t>
1453  *		<pointer to that ucontext32_t>
1454  *		<pointer to that siginfo32_t>
1455  *		<signo>
1456  * new %esp:	<return address (deliberately invalid)>
1457  */
1458 
1459 #if WSIZE(32)
1460 #define OPC_REG(x)      ((x)&0x7)
1461 #define MRM_REGD(x)     (((x)>>3)&0x7)
1462 #define MRM_REGS(x)     ((x)&0x7)
1463 #define RED_ZONE        0
1464 #elif WSIZE(64)
1465 #define OPC_REG(x)      (B|((x)&0x7))
1466 #define MRM_REGD(x)     (R|(((x)>>3)&0x7))
1467 #define MRM_REGS(x)     (B|((x)&0x7))
1468 #define RED_ZONE        16
1469 #endif
1470 #define MRM_EXT(x)      (((x)>>3)&0x7)
1471 #define MRM_MOD(x)      ((x)&0xc0)
1472 
1473 #define RAX             0
1474 #define RDX             2
1475 #define RSP             4
1476 #define RBP             5
1477 
1478 struct AdvWalkContext
1479 {
1480   unsigned char *pc;
1481   unsigned long *sp;
1482   unsigned long *sp_safe;
1483   unsigned long *fp;
1484   unsigned long *fp_sav;
1485   unsigned long *fp_loc;
1486   unsigned long rax;
1487   unsigned long rdx;
1488   unsigned long ra_sav;
1489   unsigned long *ra_loc;
1490   unsigned long regs[16];
1491   int tidx;         /* targets table index */
1492   uint32_t cval;    /* cache value */
1493 };
1494 
1495 static unsigned long
getRegVal(struct AdvWalkContext * cur,int r,int * undefRez)1496 getRegVal (struct AdvWalkContext *cur, int r, int *undefRez)
1497 {
1498   if (cur->regs[r] == 0)
1499     {
1500       if (r == RBP)
1501 	{
1502 	  tprintf (DBG_LT3, "getRegVal: returns cur->regs[RBP]=0x%lx  cur->pc=0x%lx\n",
1503 		   (unsigned long) cur->fp, (unsigned long) cur->pc);
1504 	  return (unsigned long) cur->fp;
1505 	}
1506       *undefRez = 1;
1507     }
1508   tprintf (DBG_LT3, "getRegVal: cur->regs[%d]=0x%lx  cur->pc=0x%lx\n",
1509 	   r, (unsigned long) cur->regs[r], (unsigned long) cur->pc);
1510   return cur->regs[r];
1511 }
1512 
1513 static unsigned char *
check_modrm(unsigned char * pc)1514 check_modrm (unsigned char *pc)
1515 {
1516   unsigned char modrm = *pc++;
1517   unsigned char mod = MRM_MOD (modrm);
1518   if (mod == 0xc0)
1519     return pc;
1520   unsigned char regs = modrm & 0x07;
1521   if (regs == RSP)
1522     {
1523       if (mod == 0x40)
1524 	return pc + 2;  // SIB + disp8
1525       if (mod == 0x80)
1526 	return pc + 5;  // SIB + disp32
1527       return pc + 1;    // SIB
1528     }
1529   if (mod == 0x0)
1530     {
1531       if (regs == RBP)
1532 	pc += 4; // disp32
1533     }
1534   else if (mod == 0x40)
1535     pc += 1; /* byte */
1536   else if (mod == 0x80)
1537     pc += 4; /* word */
1538   return pc;
1539 }
1540 
1541 static int
read_int(unsigned char * pc,int w)1542 read_int (unsigned char *pc, int w)
1543 {
1544   if (w == 1)
1545     return *((char *) pc);
1546   if (w == 2)
1547     return *(short*) pc;
1548   return *(int*) pc;
1549 }
1550 
1551 /* Return codes */
1552 enum
1553 {
1554   RA_FAILURE = 0,
1555   RA_SUCCESS,
1556   RA_END_OF_STACK,
1557   RA_SIGRETURN,
1558   RA_RT_SIGRETURN
1559 };
1560 
1561 /* Cache value encodings */
1562 static const uint32_t RA_FROMFP = (uint32_t) - 1; /* get the RA from the frame pointer */
1563 static const uint32_t RA_EOSTCK = (uint32_t) - 2; /* end-of-stack */
1564 
1565 
1566 #define MAXCTX         16
1567 #define MAXTRGTS       64
1568 #define MAXJMPREG       2
1569 #define MAXJMPREGCTX    3
1570 
1571 #define DELETE_CURCTX()  __collector_memcpy (cur, buf + (--nctx), sizeof (*cur))
1572 
1573 /**
1574  * Look for pc in AddrTable_RA_FROMFP and in AddrTable_RA_EOSTCK
1575  * @param wctx
1576  * @return
1577  */
1578 static int
cache_get(struct WalkContext * wctx)1579 cache_get (struct WalkContext *wctx)
1580 {
1581   unsigned long addr;
1582   if (AddrTable_RA_FROMFP != NULL)
1583     {
1584       uint64_t idx = wctx->pc % ValTableSize;
1585       addr = AddrTable_RA_FROMFP[ idx ];
1586       if (addr == wctx->pc)
1587 	{ // Found in AddrTable_RA_FROMFP
1588 	  unsigned long *sp = NULL;
1589 	  unsigned long fp = wctx->fp;
1590 	  /* validate fp before use */
1591 	  if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp))
1592 	    return RA_FAILURE;
1593 	  sp = (unsigned long *) fp;
1594 	  fp = *sp++;
1595 	  unsigned long ra = *sp++;
1596 	  unsigned long tbgn = wctx->tbgn;
1597 	  unsigned long tend = wctx->tend;
1598 	  if (ra < tbgn || ra >= tend)
1599 	    if (!__collector_check_segment (ra, &tbgn, &tend, 0))
1600 	      return RA_FAILURE;
1601 	  unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
1602 	  if (npc == 0)
1603 	    return RA_FAILURE;
1604 	  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached pc=0x%lX\n", __LINE__, npc);
1605 	  wctx->pc = npc;
1606 	  wctx->sp = (unsigned long) sp;
1607 	  wctx->fp = fp;
1608 	  wctx->tbgn = tbgn;
1609 	  wctx->tend = tend;
1610 	  return RA_SUCCESS;
1611 	}
1612     }
1613   if (NULL == AddrTable_RA_EOSTCK)
1614     return RA_FAILURE;
1615   uint64_t idx = wctx->pc % ValTableSize;
1616   addr = AddrTable_RA_EOSTCK[ idx ];
1617   if (addr != wctx->pc)
1618     return RA_FAILURE;
1619   DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached RA_END_OF_STACK\n", __LINE__);
1620   return RA_END_OF_STACK;
1621 }
1622 /**
1623  * Save pc in RA_FROMFP or RA_EOSTCK cache depending on val
1624  * @param wctx
1625  */
1626 static void
cache_put(struct WalkContext * wctx,const uint32_t val)1627 cache_put (struct WalkContext *wctx, const uint32_t val)
1628 {
1629   if (RA_FROMFP == val)
1630     {
1631       // save pc in RA_FROMFP cache
1632       if (NULL != AddrTable_RA_FROMFP)
1633 	{
1634 	  uint64_t idx = wctx->pc % ValTableSize;
1635 	  AddrTable_RA_FROMFP[ idx ] = wctx->pc;
1636 	  if (NULL != AddrTable_RA_EOSTCK)
1637 	    if (AddrTable_RA_EOSTCK[ idx ] == wctx->pc)
1638 	      // invalidate pc in RA_EOSTCK cache
1639 	      AddrTable_RA_EOSTCK[ idx ] = 0;
1640 	}
1641       return;
1642     }
1643   if (RA_EOSTCK == val)
1644     {
1645       // save pc in RA_EOSTCK cache
1646       if (NULL != AddrTable_RA_EOSTCK)
1647 	{
1648 	  uint64_t idx = wctx->pc % ValTableSize;
1649 	  AddrTable_RA_EOSTCK[ idx ] = wctx->pc;
1650 	  if (NULL != AddrTable_RA_FROMFP)
1651 	    {
1652 	      if (AddrTable_RA_FROMFP[ idx ] == wctx->pc)
1653 		// invalidate pc in RA_FROMFP cache
1654 		AddrTable_RA_FROMFP[ idx ] = 0;
1655 	    }
1656 	}
1657       return;
1658     }
1659 }
1660 
1661 static int
process_return_real(struct WalkContext * wctx,struct AdvWalkContext * cur,int cache_on)1662 process_return_real (struct WalkContext *wctx, struct AdvWalkContext *cur, int cache_on)
1663 {
1664   if ((unsigned long) cur->sp >= wctx->sbase ||
1665       (unsigned long) cur->sp < wctx->sp)
1666     {
1667       DprintfT (SP_DUMP_UNWIND, "unwind.c: not in stack: %p [0x%lX-0x%lX]\n",
1668 		cur->sp, wctx->sp, wctx->sbase);
1669       return RA_FAILURE;
1670     }
1671 
1672   unsigned long ra;
1673   if (cur->sp == cur->ra_loc)
1674     {
1675       ra = cur->ra_sav;
1676       cur->sp++;
1677     }
1678   else if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase)
1679     ra = *cur->sp++;
1680   else
1681     {
1682       DprintfT (SP_DUMP_UNWIND, "unwind.c: not safe: %p >= %p\n", cur->sp, cur->sp_safe);
1683       return RA_FAILURE;
1684     }
1685   if (ra == 0)
1686     {
1687       if (cache_on)
1688 	cache_put (wctx, RA_EOSTCK);
1689       wctx->pc = ra;
1690       wctx->sp = (unsigned long) cur->sp;
1691       wctx->fp = (unsigned long) cur->fp;
1692       DprintfT (SP_DUMP_UNWIND, "unwind.c:%d RA_END_OF_STACK\n", __LINE__);
1693       return RA_END_OF_STACK;
1694     }
1695 
1696   unsigned long tbgn = wctx->tbgn;
1697   unsigned long tend = wctx->tend;
1698   if (ra < tbgn || ra >= tend)
1699     {
1700       if (!__collector_check_segment (ra, &tbgn, &tend, 0))
1701 	{
1702 	  DprintfT (SP_DUMP_UNWIND, "unwind.c: not in segment: 0x%lX [0x%lX-0x%lX]\n",
1703 		    ra, wctx->tbgn, wctx->tend);
1704 	  return RA_FAILURE;
1705 	}
1706     }
1707 
1708   if (cur->cval == RA_FROMFP)
1709     {
1710       if (wctx->fp == (unsigned long) (cur->sp - 2))
1711 	{
1712 	  if (cache_on)
1713 	    cache_put (wctx, RA_FROMFP);
1714 	}
1715       else
1716 	cur->cval = 0;
1717     }
1718 
1719   unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
1720   if (npc == 0)
1721     {
1722       if (cur->cval == RA_FROMFP)
1723 	{
1724 	  /* We have another evidence that we can trust this RA */
1725 	  DprintfT (SP_DUMP_UNWIND, "unwind.c: trusted fp, pc = 0x%lX\n", wctx->pc);
1726 	  wctx->pc = ra;
1727 	}
1728       else
1729 	{
1730 	  DprintfT (SP_DUMP_UNWIND, "unwind.c: 0 after adjustment\n");
1731 	  return RA_FAILURE;
1732 	}
1733     }
1734   else
1735     wctx->pc = npc;
1736   wctx->sp = (unsigned long) cur->sp;
1737   wctx->fp = (unsigned long) cur->fp;
1738   wctx->tbgn = tbgn;
1739   wctx->tend = tend;
1740   return RA_SUCCESS;
1741 }
1742 
1743 static int
process_return(struct WalkContext * wctx,struct AdvWalkContext * cur)1744 process_return (struct WalkContext *wctx, struct AdvWalkContext *cur)
1745 {
1746   return process_return_real (wctx, cur, 1);
1747 }
1748 
1749 static void
omp_cache_put(unsigned long * cur_sp_safe,struct WalkContext * wctx_pc_save,struct WalkContext * wctx,uint32_t val)1750 omp_cache_put (unsigned long *cur_sp_safe, struct WalkContext * wctx_pc_save,
1751 	       struct WalkContext *wctx, uint32_t val)
1752 {
1753   if (omp_no_walk && (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL))
1754     {
1755       size_t sz = OmpValTableSize * sizeof (*OmpCurCtxs);
1756       OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
1757       sz = OmpValTableSize * sizeof (*OmpCtxs);
1758       OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
1759       sz = OmpValTableSize * sizeof (*OmpVals);
1760       OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1);
1761       sz = OmpValTableSize * sizeof (*OmpRAs);
1762       OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
1763     }
1764   if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
1765     return;
1766 
1767 #define USE_18434988_OMP_CACHE_WORKAROUND
1768 #ifndef USE_18434988_OMP_CACHE_WORKAROUND
1769   uint64_t idx = wctx_pc_save->pc * ROOT_IDX;
1770   OmpVals[ idx % OmpValTableSize ] = val;
1771   idx = (idx + val) * ROOT_IDX;
1772   __collector_memcpy (&(OmpCurCtxs[ idx % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext));
1773   idx = (idx + val) * ROOT_IDX;
1774   __collector_memcpy (&(OmpCtxs[ idx % OmpValTableSize ]), wctx, sizeof (struct WalkContext));
1775 #endif
1776   unsigned long *sp = NULL;
1777   unsigned long fp = wctx_pc_save->fp;
1778   int from_fp = 0;
1779   if (val == RA_END_OF_STACK)
1780     {
1781       sp = (unsigned long *) (wctx->sp);
1782       sp--;
1783       TprintfT (DBG_LT1, "omp_cache_put: get sp from EOS, sp=%p\n", sp);
1784     }
1785   else
1786     {
1787       if (fp < wctx_pc_save->sp || fp >= wctx_pc_save->sbase - sizeof (*sp))
1788 	{
1789 	  sp = (unsigned long *) (wctx->sp);
1790 	  sp--;
1791 	  TprintfT (DBG_LT1, "omp_cache_put: get sp from sp, sp=%p\n", sp);
1792 	}
1793       else
1794 	{
1795 	  TprintfT (DBG_LT1, "omp_cache_put: get sp from fp=0x%lx\n", fp);
1796 	  sp = (unsigned long *) fp;
1797 	  from_fp = 1;
1798 	}
1799     }
1800 
1801   if (sp < cur_sp_safe || ((unsigned long) sp >= wctx->sbase))
1802     return;
1803 
1804   unsigned long ra = *sp++;
1805   if (from_fp)
1806     {
1807       unsigned long tbgn = wctx_pc_save->tbgn;
1808       unsigned long tend = wctx_pc_save->tend;
1809       if (ra < tbgn || ra >= tend)
1810 	{
1811 	  sp = (unsigned long *) (wctx->sp);
1812 	  sp--;
1813 	  ra = *sp++;
1814 	}
1815     }
1816 #ifdef USE_18434988_OMP_CACHE_WORKAROUND
1817   uint64_t idx1 = wctx_pc_save->pc * ROOT_IDX;
1818   uint64_t idx2 = (idx1 + val) * ROOT_IDX;
1819   uint64_t idx3 = (idx2 + val) * ROOT_IDX;
1820   uint64_t idx4 = (idx3 + val) * ROOT_IDX;
1821   OmpRAs [ idx4 % OmpValTableSize ] = 0; // lock
1822   OmpVals[ idx1 % OmpValTableSize ] = val;
1823   __collector_memcpy (&(OmpCurCtxs[ idx2 % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext));
1824   __collector_memcpy (&(OmpCtxs [ idx3 % OmpValTableSize ]), wctx, sizeof (struct WalkContext));
1825   OmpRAs [ idx4 % OmpValTableSize ] = ra;
1826 #else
1827   idx = (idx + val) * ROOT_IDX;
1828   OmpRAs[ idx % OmpValTableSize ] = ra;
1829 #endif
1830   TprintfT (DBG_LT1, "omp_cache_put: pc=0x%lx\n", wctx_pc_save->pc);
1831 }
1832 
1833 /*
1834  *  See bug 17166877 - malloc_internal unwind failure.
1835  *  Sometimes there are several calls right after ret, like:
1836  *      leave
1837  *      ret
1838  *      call xxx
1839  *      call xxxx
1840  *      call xxxxx
1841  *  If they are also jump targets, we should better not
1842  *  create new jump context for those, since they may
1843  *  end up into some other function.
1844  */
1845 static int
is_after_ret(unsigned char * npc)1846 is_after_ret (unsigned char * npc)
1847 {
1848   if (*npc != 0xe8)
1849     return 0;
1850   unsigned char * onpc = npc;
1851   int ncall = 1;
1852   int maxsteps = 10;
1853   int mincalls = 3;
1854   int steps = 0;
1855   while (*(npc - 5) == 0xe8 && steps < maxsteps)
1856     {
1857       npc -= 5;
1858       ncall++;
1859       steps++;
1860     }
1861   if (*(npc - 1) != 0xc3 || *(npc - 2) != 0xc9)
1862     return 0;
1863   steps = 0;
1864   while (*(onpc + 5) == 0xe8 && steps < maxsteps)
1865     {
1866       onpc += 5;
1867       ncall++;
1868       steps++;
1869     }
1870   if (ncall < mincalls)
1871     return 0;
1872   return 1;
1873 }
1874 
1875 static int
find_i386_ret_addr(struct WalkContext * wctx,int do_walk)1876 find_i386_ret_addr (struct WalkContext *wctx, int do_walk)
1877 {
1878   if (wctx->sp == 0)
1879     // Some artificial contexts may have %sp set to 0. See SETFUNCTIONCONTEXT()
1880     return RA_FAILURE;
1881 
1882   /* Check cached values */
1883   int retc = cache_get (wctx);
1884   if (retc != RA_FAILURE)
1885     return retc;
1886 
1887   /* An attempt to perform code analysis for call stack tracing */
1888   unsigned char opcode;
1889   unsigned char extop;
1890   unsigned char extop2;
1891   unsigned char modrm;
1892   int imm8; /* immediate operand, byte */
1893   int immv; /* immediate operand, word(2) or doubleword(4) */
1894   int reg; /* register code */
1895 
1896   /* Buffer for branch targets (analysis stoppers) */
1897   unsigned char *targets[MAXTRGTS];
1898   int ntrg = 0; /* number of entries in the table */
1899   targets[ntrg++] = (unsigned char*) wctx->pc;
1900   targets[ntrg++] = (unsigned char*) - 1;
1901 
1902   struct AdvWalkContext buf[MAXCTX];
1903   struct AdvWalkContext *cur = buf;
1904   CALL_UTIL (memset)((void*) cur, 0, sizeof (*cur));
1905 
1906   cur->pc = (unsigned char*) wctx->pc;
1907   cur->sp = (unsigned long*) wctx->sp;
1908   cur->sp_safe = cur->sp - RED_ZONE; /* allow for the 128-byte red zone on amd64 */
1909   cur->fp = (unsigned long*) wctx->fp;
1910   cur->tidx = 1;
1911   DprintfT (SP_DUMP_UNWIND, "\nstack_unwind (x86 walk):%d %p start\n", __LINE__, cur->pc);
1912 
1913   int nctx = 1; /* number of contexts being processed */
1914   int cnt = 8192; /* number of instructions to analyse */
1915 
1916   /*
1917    * The basic idea of our x86 stack unwind is that we don't know
1918    * if we can trust the frame-pointer register.  So we walk
1919    * instructions to find a return instruction, at which point
1920    * we know the return address is on the top of the stack, etc.
1921    *
1922    * A severe challenge to walking x86 instructions is when we
1923    * encounter "jmp *(reg)" instructions, where we are expected
1924    * to jump to the (unknown-to-us) contents of a register.
1925    *
1926    * The "jmp_reg" code here attempts to keep track of the
1927    * context for such a jump, deferring any handling of such
1928    * a difficult case.  We continue with other contexts, hoping
1929    * that some other walk will take us to a return instruction.
1930    *
1931    * If no other walk helps, we return to "jmp_reg" contexts.
1932    * While we don't know the jump target, it is possible that the
1933    * bytes immediately following the jmp_reg instruction represent
1934    * one possible target, as might be the case when a "switch"
1935    * statement is compiled.
1936    *
1937    * Unfortunately, the bytes following a "jmp_reg" instruction might
1938    * instead be a jump target from somewhere else -- execution might
1939    * never "fall through" from the preceding "jmp_reg".  Those bytes
1940    * might not even be instructions at all.  There are many uses of
1941    * jmp_reg instructions beyond just compiling switch statements.
1942    *
1943    * So walking the bytes after a "jmp_reg" instruction can lead
1944    * to bugs and undefined behavior, including SEGV and core dump.
1945    *
1946    * We currently do not really understand the "jmp_reg" code below.
1947    */
1948   int jmp_reg_switch_mode = 0;
1949   int num_jmp_reg = 0; // number of jmp *reg met when switch mode is off or when in current switch case
1950   int total_num_jmp_reg = 0; // number of total jmp *reg met
1951   struct AdvWalkContext * jmp_reg_ctx[MAXJMPREG]; // context of jmp *reg met when switch mode is off or when in current switch case
1952   struct AdvWalkContext * jmp_reg_switch_ctx[MAXJMPREG]; // context of jmp *reg used in switch cases
1953   struct AdvWalkContext * jmp_reg_switch_backup_ctx = NULL; // context of the first jmp *reg used in switch cases
1954 
1955   int cur_jmp_reg_switch = 0; // current switch table
1956   int num_jmp_reg_switch = 0; // number of switch table
1957   int jmp_reg_switch_case = 0; // case number in current switch table
1958   unsigned char * jmp_reg_switch_pc = NULL; // the start pc of current switch case
1959   unsigned char * jmp_reg_switch_pc_old = NULL; // backup for deleteing context of jump target
1960   unsigned char * jmp_reg_switch_base = NULL; // start pc for checking offsets
1961   int max_jmp_reg_switch_case = 2;
1962 #if WSIZE(32)
1963   int max_switch_pc_offset = 512;
1964 #else // WSIZE(64)
1965   int max_switch_pc_offset = 1024;
1966 #endif
1967   int expected_num_jmp_reg = 1; // should be smaller than MAXJMPREG
1968   int max_num_jmp_reg_seen = 4; // try to resolve return if there are so many such instructions
1969 
1970 
1971   int save_ctx = 0; // flag to save walk context in the cache to speed up unwind
1972   struct WalkContext wctx_pc_save;
1973   if (do_walk == 0)
1974     // do_walk is the flag indicating not walking through the instructions, resolving the RA from the stack fp first
1975     __collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext));
1976 
1977 startWalk:
1978   if (do_walk == 0)
1979     { // try to resolve RA from stack frame pointer
1980       if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
1981 	{
1982 	  do_walk = 1;
1983 	  goto startWalk;
1984 	}
1985       // before goto checkFP, try the RA from cache (key: WalkContext -> value: caller's WalkContext))
1986       uint64_t idx = wctx->pc * ROOT_IDX;
1987       uint32_t val = OmpVals[idx % OmpValTableSize];
1988       idx = (idx + val) * ROOT_IDX;
1989 #ifdef USE_18434988_OMP_CACHE_WORKAROUND
1990       // Check ra: if it is 0 - then cache is invalid
1991       uint64_t idx4;
1992       idx4 = (idx + val) * ROOT_IDX;
1993       idx4 = (idx4 + val) * ROOT_IDX;
1994       if (0 == OmpRAs[ idx4 % OmpValTableSize ])  // Invalid cache
1995 	goto checkFP;
1996 #endif
1997       struct WalkContext saved_ctx;
1998       __collector_memcpy (&saved_ctx, &OmpCurCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
1999       if (wctx->pc == saved_ctx.pc
2000 	  && wctx->sp == saved_ctx.sp
2001 	  && wctx->fp == saved_ctx.fp
2002 	  && wctx->tbgn == saved_ctx.tbgn
2003 	  && wctx->tend == saved_ctx.tend)
2004 	{ // key match, RA may be valid
2005 	  idx = (idx + val) * ROOT_IDX;
2006 	  unsigned long *sp = NULL;
2007 	  unsigned long fp = wctx->fp;
2008 	  int from_fp = 0;
2009 	  if (val == RA_END_OF_STACK)
2010 	    {
2011 	      DprintfT (SP_DUMP_UNWIND, "find_i386_ret_addr:%d -- RA_END_OF_STACK: pc=0x%lx\n", __LINE__, wctx->pc);
2012 	      __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2013 	      return val;
2014 	    }
2015 	  else
2016 	    {
2017 	      if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp))
2018 		{
2019 		  TprintfT (DBG_LT1, "omp_cache_get -- wrong fp: pc=0x%lx\n", wctx->pc);
2020 		  sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp);
2021 		  sp--;
2022 		  if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase)
2023 		    {
2024 		      goto checkFP;
2025 		    }
2026 		  unsigned long ra = *sp;
2027 		  uint64_t idx2 = (idx + val) * ROOT_IDX;
2028 		  if (OmpRAs[ idx2 % OmpValTableSize ] == ra)
2029 		    {
2030 		      __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2031 		      TprintfT (DBG_LT1, "omp_cache_get -- ra match with target sp: pc=0x%lx, ra=0x%lx, val=%d\n", wctx->pc, ra, val);
2032 		      return val;
2033 		    }
2034 		  TprintfT (DBG_LT1, "omp_cache_get -- ra mismatch: ra=0x%lx, expected ra=0x%lx, val=%d\n", ra, OmpRAs[ idx2 % OmpValTableSize ], val);
2035 		  goto checkFP;
2036 		}
2037 	      sp = (unsigned long *) fp;
2038 	      from_fp = 1;
2039 	    }
2040 
2041 	  uint64_t idx2 = (idx + val) * ROOT_IDX;
2042 	  unsigned long ra = *sp++;
2043 	  if (from_fp)
2044 	    {
2045 	      unsigned long tbgn = wctx->tbgn;
2046 	      unsigned long tend = wctx->tend;
2047 	      if (ra < tbgn || ra >= tend)
2048 		{
2049 		  sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp);
2050 		  sp--;
2051 		  //if (sp < cur->sp_safe - 16 || (unsigned long)sp >= wctx->sbase - sizeof(*sp)) {
2052 		  // The check above was replaced with the check below,
2053 		  // because we do not know why "- 16" and "- sizeof(*sp)" was used.
2054 		  if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase)
2055 		    goto checkFP;
2056 		  else
2057 		    ra = *sp;
2058 		}
2059 	    }
2060 	  if (OmpRAs[ idx2 % OmpValTableSize ] == ra)
2061 	    {
2062 	      TprintfT (DBG_LT1, "omp_cache_get -- ra match: pc=0x%lx\n", wctx->pc);
2063 	      __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2064 	      return val;
2065 	    }
2066 	}
2067       goto checkFP;
2068     }
2069   else
2070     {
2071       CALL_UTIL (memset)(jmp_reg_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *));
2072       CALL_UTIL (memset)(jmp_reg_switch_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *));
2073     }
2074   while (cnt--)
2075     {
2076       if (nctx == 0 && (num_jmp_reg == expected_num_jmp_reg || jmp_reg_switch_mode == 1))
2077 	{ // no context available, try jmp switch mode
2078 	  int i = 0;
2079 	  if (num_jmp_reg == expected_num_jmp_reg)
2080 	    jmp_reg_switch_mode = 0; // first jmp reg expected, restart switch mode
2081 	  DprintfT (SP_DUMP_UNWIND, "unwind.c: begin switch mode, num_jmp_reg = %d, jmp_reg_switch_backup_ctx=%p, jmp_reg_switch_case=%d, jmp_reg_switch_mode=%d.\n",
2082 		    num_jmp_reg, jmp_reg_switch_backup_ctx, jmp_reg_switch_case, jmp_reg_switch_mode);
2083 	  // the ideal asm of switch is
2084 	  //   jmp reg
2085 	  //   ...//case 1
2086 	  //   ret
2087 	  //   ...//case 2
2088 	  //   ret
2089 	  //   ...//etc
2090 	  if (jmp_reg_switch_mode == 0)
2091 	    {
2092 	      num_jmp_reg_switch = num_jmp_reg; // backup num_jmp_reg
2093 	      jmp_reg_switch_mode = 1; // begin switch mode
2094 	      for (i = 0; i < num_jmp_reg_switch; i++)
2095 		{
2096 		  if (jmp_reg_switch_ctx[i] == NULL)
2097 		    jmp_reg_switch_ctx[i] = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_ctx[i]));
2098 		  if (jmp_reg_switch_ctx[i] != NULL)
2099 		    { // backup jmp_reg_ctx
2100 		      __collector_memcpy (jmp_reg_switch_ctx[i], jmp_reg_ctx[i], sizeof (*jmp_reg_switch_ctx[i]));
2101 		      cur_jmp_reg_switch = 0; // reset the current switch table
2102 		      jmp_reg_switch_case = 0; // reset the case number in current switch table
2103 		    }
2104 		}
2105 	      if (jmp_reg_switch_backup_ctx == NULL)
2106 		{ // only backup when the first jmp *reg is met for restoring later, if switch mode fails to resolve RA
2107 		  jmp_reg_switch_backup_ctx = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_backup_ctx));
2108 		  if (jmp_reg_switch_backup_ctx != NULL)
2109 		    __collector_memcpy (jmp_reg_switch_backup_ctx, cur, sizeof (*cur));
2110 		  DprintfT (SP_DUMP_UNWIND, "unwind.c: back up context for switch mode.\n");
2111 		}
2112 	    }
2113 	  if (jmp_reg_switch_mode == 1)
2114 	    { // in the process of trying switch cases
2115 	      if (cur_jmp_reg_switch == num_jmp_reg_switch)
2116 		{
2117 		  DprintfT (SP_DUMP_UNWIND, "unwind.c: have tried all switch with max_jmp_reg_switch_case for each\n");
2118 		  if (jmp_reg_switch_backup_ctx != NULL)
2119 		    __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
2120 		  int rc = process_return_real (wctx, cur, 0);
2121 		  if (rc == RA_SUCCESS)
2122 		    {
2123 		      if (save_ctx)
2124 			omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
2125 		      return rc;
2126 		    }
2127 		  break; // have tried all switch with max_jmp_reg_switch_case for each, goto checkFP
2128 		}
2129 	      unsigned char *npc = jmp_reg_switch_ctx[cur_jmp_reg_switch]->pc;
2130 	      if (jmp_reg_switch_case == 0)
2131 		// first switch case
2132 		npc = check_modrm (npc); // pc next to "jmp reg" instruction
2133 	      else if (jmp_reg_switch_pc != NULL)
2134 		npc = jmp_reg_switch_pc; // // pc next to "ret" instruction of previous case
2135 	      else
2136 		{
2137 		  DprintfT (SP_DUMP_UNWIND, "unwind.c: unexpected jum switch mode situation, jmp_reg_switch_case=%d, jmp_reg_switch_pc=%p\n",
2138 			    jmp_reg_switch_case, jmp_reg_switch_pc);
2139 		  break; //goto checkFP
2140 		}
2141 	      jmp_reg_switch_base = npc;
2142 	      struct AdvWalkContext *new = buf + nctx;
2143 	      nctx += 1;
2144 	      __collector_memcpy (new, jmp_reg_switch_ctx[cur_jmp_reg_switch], sizeof (*new));
2145 	      new->pc = npc;
2146 	      cur = new; /* advance the new context first */
2147 	      jmp_reg_switch_pc = NULL;
2148 	      jmp_reg_switch_case++;
2149 	      if (jmp_reg_switch_case == max_jmp_reg_switch_case)
2150 		{ // done many cases, change to another switch table
2151 		  cur_jmp_reg_switch++;
2152 		  jmp_reg_switch_case = 0;
2153 		}
2154 	    }
2155 	  num_jmp_reg = 0;
2156 	}
2157       if (jmp_reg_switch_mode == 1)
2158 	{ // when processing switch cases, check pc each time
2159 	  unsigned long tbgn = wctx->tbgn;
2160 	  unsigned long tend = wctx->tend;
2161 	  if ((unsigned long) (cur->pc) < tbgn || (unsigned long) (cur->pc) >= tend)
2162 	    {
2163 	      DprintfT (SP_DUMP_UNWIND, "unwind.c: pc out of range, pc=0x%lx\n", (unsigned long) (cur->pc));
2164 	      break;
2165 	    }
2166 	  if (jmp_reg_switch_base != NULL && cur->pc > jmp_reg_switch_base + max_switch_pc_offset)
2167 	    {
2168 	      DprintfT (SP_DUMP_UNWIND, "unwind.c: limit the walk offset after jmp reg instruction\n");
2169 	      if (jmp_reg_switch_backup_ctx != NULL)
2170 		__collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
2171 	      int rc = process_return_real (wctx, cur, 0);
2172 	      if (rc == RA_SUCCESS)
2173 		{
2174 		  if (save_ctx)
2175 		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
2176 		  return rc;
2177 		}
2178 	      break; // limit the walk offset after jmp reg instruction, got checkFP
2179 	    }
2180 	}
2181 
2182       if (nctx == 0)
2183 	break;
2184 //      dump_targets (__LINE__, ntrg, targets);
2185       while (cur->pc > targets[cur->tidx])
2186 	cur->tidx += 1;
2187       if (cur->pc == targets[cur->tidx])
2188 	{
2189 	  /* Stop analysis. Delete context. */
2190 	  if (jmp_reg_switch_mode == 0 || cur->pc != jmp_reg_switch_pc_old)
2191 	    {
2192 	      if (jmp_reg_switch_mode == 1 && nctx == 1 && jmp_reg_switch_pc == NULL)
2193 		{
2194 		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d old target, cur->pc=%p, jmp_reg_switch_pc=%p, nctx=%d\n",
2195 			    __LINE__, cur->pc, jmp_reg_switch_pc, nctx);
2196 		  jmp_reg_switch_pc = cur->pc; // save cp before delete context, may be used as a start of switch case
2197 		  jmp_reg_switch_pc_old = jmp_reg_switch_pc;
2198 		}
2199 	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, old target.\n", __LINE__);
2200 	      DELETE_CURCTX ();
2201 	      if (cur >= buf + nctx)
2202 		cur = buf;
2203 	      continue;
2204 	    }
2205 	  if (jmp_reg_switch_mode == 1 && cur->pc == jmp_reg_switch_pc_old)
2206 	    jmp_reg_switch_pc_old = NULL; // reset jmp_reg_switch_pc_old to delete the context later when cur->pc != jmp_reg_switch_pc_old
2207 	}
2208 
2209       /* let's walk the next x86 instruction */
2210       DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cur:%ld pc=0x%lx %02x %02x %02x %02x %02x %02x %02x sp=0x%lx\n",
2211 	       __LINE__, (long) (cur - buf), (unsigned long) cur->pc,
2212 	       (int) cur->pc[0], (int) cur->pc[1], (int) cur->pc[2],
2213 	       (int) cur->pc[3], (int) cur->pc[4], (int) cur->pc[5],
2214 	       (int) cur->pc[6], (unsigned long) cur->sp);
2215       int v = 4; /* Operand size */
2216       int a = 4; /* Address size */
2217       /* int W = 0;	   REX.W bit */
2218 #if WSIZE(64)
2219       int R = 0; /* REX.R bit */
2220 #endif
2221       int X = 0; /* REX.X bit */
2222       int B = 0; /* REX.B bit */
2223       /* Check prefixes */
2224       int done = 0;
2225       while (!done)
2226 	{
2227 	  opcode = *cur->pc++;
2228 	  switch (opcode)
2229 	    {
2230 	    case 0x66: /* opd size override */
2231 	      v = 2;
2232 	      break;
2233 	    case 0x67: /*addr size override */
2234 	      a = 2;
2235 	      break;
2236 #if WSIZE(64)
2237 	    case 0x40: /* REX */
2238 	    case 0x41:
2239 	    case 0x42:
2240 	    case 0x43:
2241 	    case 0x44:
2242 	    case 0x45:
2243 	    case 0x46:
2244 	    case 0x47:
2245 	    case 0x48:
2246 	    case 0x49:
2247 	    case 0x4a:
2248 	    case 0x4b:
2249 	    case 0x4c:
2250 	    case 0x4d:
2251 	    case 0x4e:
2252 	    case 0x4f:
2253 	      B = (opcode & 0x1) ? 8 : 0;
2254 	      X = (opcode & 0x2) ? 8 : 0;
2255 	      R = (opcode & 0x4) ? 8 : 0;
2256 	      if (opcode & 0x8)  /* 64 bit operand size */
2257 		v = 8;
2258 	      opcode = *cur->pc++;
2259 	      done = 1;
2260 	      break;
2261 #endif
2262 	    default:
2263 	      done = 1;
2264 	      break;
2265 	    }
2266 	}
2267       int z = (v == 8) ? 4 : v;
2268       switch (opcode)
2269 	{
2270 	case 0x0: /* add Eb,Gb */
2271 	case 0x01: /* add Ev,Gv */
2272 	case 0x02: /* add Gb,Eb */
2273 	case 0x03: /* add Gv,Ev */
2274 	  cur->pc = check_modrm (cur->pc);
2275 	  break;
2276 	case 0x04: /* add %al,Ib */
2277 	  cur->pc += 1;
2278 	  break;
2279 	case 0x05: /* add %eax,Iz */
2280 	  cur->pc += z;
2281 	  break;
2282 	case 0x06: /* push es */
2283 	  cur->sp -= 1;
2284 	  break;
2285 	case 0x07: /* pop es */
2286 	  cur->sp += 1;
2287 	  if (cur->sp - RED_ZONE > cur->sp_safe)
2288 	    cur->sp_safe = cur->sp - RED_ZONE;
2289 	  break;
2290 	case 0x08: /* or Eb,Gb */
2291 	case 0x09: /* or Ev,Gv */
2292 	case 0x0a: /* or Gb,Eb */
2293 	case 0x0b: /* or Gv,Ev */
2294 	  cur->pc = check_modrm (cur->pc);
2295 	  break;
2296 	case 0x0c: /* or %al,Ib */
2297 	  cur->pc += 1;
2298 	  break;
2299 	case 0x0d: /* or %eax,Iz */
2300 	  cur->pc += z;
2301 	  break;
2302 	case 0x0e: /* push cs */
2303 	  cur->sp -= 1;
2304 	  break;
2305 	case 0x0f: /* two-byte opcodes */
2306 	  extop = *cur->pc++;
2307 	  switch (extop)
2308 	    { /* RTM or HLE */
2309 	    case 0x01:
2310 	      extop2 = *cur->pc;
2311 	      switch (extop2)
2312 		{
2313 		case 0xd5: /* xend */
2314 		case 0xd6: /* xtest */
2315 		  cur->pc++;
2316 		  break;
2317 		default:
2318 		  break;
2319 		}
2320 	      break;
2321 	    case 0x03:
2322 	      cur->pc = check_modrm (cur->pc);
2323 	      break;
2324 	    case 0x0b:
2325 	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, undefined instruction. opcode=0x%02x\n",
2326 		       __LINE__, (int) opcode);
2327 	      DELETE_CURCTX ();
2328 	      break;
2329 	    case 0x05: /* syscall */
2330 	    case 0x34: /* sysenter */
2331 	      if (cur->rax == __NR_exit)
2332 		{
2333 		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n",
2334 			   __LINE__, (int) opcode);
2335 		  DELETE_CURCTX ();
2336 		  break;
2337 		}
2338 	      else if (cur->rax == __NR_rt_sigreturn)
2339 		{
2340 		  if (jmp_reg_switch_mode == 1)
2341 		    {
2342 		      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0x%02x\n",
2343 			       __LINE__, (int) opcode);
2344 		      goto checkFP;
2345 		    }
2346 		  wctx->sp = (unsigned long) cur->sp;
2347 		  if (save_ctx)
2348 		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN);
2349 		  return RA_RT_SIGRETURN;
2350 		}
2351 #if WSIZE(32)
2352 	      else if (cur->rax == __NR_sigreturn)
2353 		{
2354 		  if (jmp_reg_switch_mode == 1)
2355 		    {
2356 		      DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0x34\n");
2357 		      goto checkFP;
2358 		    }
2359 		  wctx->sp = (unsigned long) cur->sp;
2360 		  if (save_ctx)
2361 		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN);
2362 		  return RA_SIGRETURN;
2363 		}
2364 #endif
2365 	      /* Check for Linus' trick in the vsyscall page */
2366 	      while (*cur->pc == 0x90)  /* nop */
2367 		cur->pc++;
2368 	      if (*cur->pc == 0xeb)  /* jmp imm8 */
2369 		cur->pc += 2;
2370 	      break;
2371 	    case 0x0d: /* nop Ev */
2372 	      cur->pc = check_modrm (cur->pc);
2373 	      break;
2374 	    case 0x10: /* xmm Vq,Wq */
2375 	    case 0x11:
2376 	    case 0x12:
2377 	    case 0x13:
2378 	    case 0x14:
2379 	    case 0x15:
2380 	    case 0x16:
2381 	    case 0x17:
2382 	      cur->pc = check_modrm (cur->pc);
2383 	      break;
2384 	    case 0x18: /* prefetch */
2385 	      cur->pc = check_modrm (cur->pc);
2386 	      break;
2387 	    case 0x1E: /* endbr64/endbr32 (f3 0f 1e .. ) is parsing as repz nop edx */
2388 	      cur->pc += 2;
2389 	      break;
2390 	    case 0x1f: /* nop Ev */
2391 	      cur->pc = check_modrm (cur->pc);
2392 	      break;
2393 	    case 0x28: /* xmm Vq,Wq */
2394 	    case 0x29:
2395 	    case 0x2a:
2396 	    case 0x2b:
2397 	    case 0x2c:
2398 	    case 0x2d:
2399 	    case 0x2e:
2400 	    case 0x2f:
2401 	      cur->pc = check_modrm (cur->pc);
2402 	      break;
2403 	    case 0x30: /* wrmsr */
2404 	    case 0x31: /* rdtsc */
2405 	    case 0x32: /* rdmsr */
2406 	    case 0x33: /* rdpmc */
2407 	      break;
2408 	      /* case 0x34: sysenter (see above) */
2409 	    case 0x38: case 0x3a:
2410 	      extop2 = *cur->pc++;
2411 	      cur->pc = check_modrm (cur->pc);
2412 	      // 21275311 Unwind failure in native stack for java application running on jdk8
2413 	      // Three-byte opcodes "66 0f 3a ??" should consume an additional "immediate" byte.
2414 	      if (extop == 0x3a)
2415 		cur->pc++;
2416 	      break;
2417 	    case 0x40: case 0x41: case 0x42: case 0x43: /* CMOVcc Gv,Ev */
2418 	    case 0x44: case 0x45: case 0x46: case 0x47:
2419 	    case 0x48: case 0x49: case 0x4a: case 0x4b:
2420 	    case 0x4c: case 0x4d: case 0x4e: case 0x4f:
2421 	      cur->pc = check_modrm (cur->pc);
2422 	      break;
2423 	    case 0x50: case 0x51: case 0x52: case 0x53:
2424 	    case 0x54: case 0x55: case 0x56: case 0x57:
2425 	    case 0x58: case 0x59: case 0x5a: case 0x5b:
2426 	    case 0x5c: case 0x5d: case 0x5e: case 0x5f:
2427 	    case 0x60: case 0x61: case 0x62: case 0x63:
2428 	    case 0x64: case 0x65: case 0x66: case 0x67:
2429 	    case 0x68: case 0x69: case 0x6a: case 0x6b:
2430 	    case 0x6c: case 0x6d: case 0x6e: case 0x6f:
2431 	      cur->pc = check_modrm (cur->pc);
2432 	      break;
2433 	    case 0x70: case 0x71: case 0x72: case 0x73:
2434 	      cur->pc = check_modrm (cur->pc) + 1;
2435 	      break;
2436 	    case 0x74: case 0x75: case 0x76:
2437 	      cur->pc = check_modrm (cur->pc);
2438 	      break;
2439 	    case 0x77:
2440 	      break;
2441 	    case 0x7c: case 0x7d: case 0x7e: case 0x7f:
2442 	      cur->pc = check_modrm (cur->pc);
2443 	      break;
2444 	    case 0x80: case 0x81: case 0x82: case 0x83: /* Jcc Jz */
2445 	    case 0x84: case 0x85: case 0x86: case 0x87:
2446 	    case 0x88: case 0x89: case 0x8a: case 0x8b:
2447 	    case 0x8c: case 0x8d: case 0x8e: case 0x8f:
2448 	      immv = read_int (cur->pc, z);
2449 	      cur->pc += z;
2450 	      if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
2451 		{
2452 		  int tidx = 0;
2453 		  unsigned char *npc = cur->pc + immv;
2454 		  if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend)
2455 		    {
2456 		      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n",
2457 			       __LINE__, (int) opcode);
2458 		      DELETE_CURCTX ();
2459 		      break;
2460 		    }
2461 		  if (is_after_ret (npc))
2462 		    break;
2463 		  while (npc > targets[tidx])
2464 		    tidx += 1;
2465 		  if (npc != targets[tidx])
2466 		    {
2467 		      if (ntrg < MAXTRGTS)
2468 			{
2469 			  for (int i = 0; i < nctx; i++)
2470 			    if (buf[i].tidx >= tidx)
2471 			      buf[i].tidx++;
2472 
2473 			  /* insert a new target */
2474 			  for (int i = ntrg; i > tidx; i--)
2475 			    targets[i] = targets[i - 1];
2476 			  ntrg += 1;
2477 			  targets[tidx++] = npc;
2478 			}
2479 		      else
2480 			DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg=max(%d)\n",
2481 				  __LINE__, ntrg);
2482 		      struct AdvWalkContext *new = buf + nctx;
2483 		      nctx += 1;
2484 		      __collector_memcpy (new, cur, sizeof (*new));
2485 		      new->pc = npc;
2486 		      new->tidx = tidx;
2487 		      cur = new; /* advance the new context first */
2488 		      continue;
2489 		    }
2490 		}
2491 	      else
2492 		DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx=max(%d)\n",
2493 			  __LINE__, ntrg);
2494 	      break;
2495 	    case 0x90: case 0x91: case 0x92: case 0x93: /* setcc Eb */
2496 	    case 0x94: case 0x95: case 0x96: case 0x97:
2497 	    case 0x98: case 0x99: case 0x9a: case 0x9b:
2498 	    case 0x9c: case 0x9d: case 0x9e: case 0x9f:
2499 	      cur->pc = check_modrm (cur->pc);
2500 	      break;
2501 	    case 0xa0: /* push fs */
2502 	      cur->sp -= 1;
2503 	      break;
2504 	    case 0xa1: /* pop fs */
2505 	      cur->sp += 1;
2506 	      if (cur->sp - RED_ZONE > cur->sp_safe)
2507 		cur->sp_safe = cur->sp - RED_ZONE;
2508 	      break;
2509 	    case 0xa2: /* cpuid */
2510 	      break;
2511 	    case 0xa3: /* bt Ev,Gv */
2512 	      cur->pc = check_modrm (cur->pc);
2513 	      break;
2514 	    case 0xa4: /* shld Ev,Gv,Ib */
2515 	      cur->pc = check_modrm (cur->pc);
2516 	      cur->pc += 1;
2517 	      break;
2518 	    case 0xa5: /* shld Ev,Gv,%cl */
2519 	      cur->pc = check_modrm (cur->pc);
2520 	      break;
2521 	    case 0xa8: /* push gs */
2522 	      cur->sp -= 1;
2523 	      break;
2524 	    case 0xa9: /* pop gs */
2525 	      cur->sp += 1;
2526 	      if (cur->sp - RED_ZONE > cur->sp_safe)
2527 		cur->sp_safe = cur->sp - RED_ZONE;
2528 	      break;
2529 	    case 0xaa: /* rsm */
2530 	      break;
2531 	    case 0xab: /* bts Ev,Gv */
2532 	      cur->pc = check_modrm (cur->pc);
2533 	      break;
2534 	    case 0xac: /* shrd Ev,Gv,Ib */
2535 	      cur->pc = check_modrm (cur->pc);
2536 	      cur->pc += 1;
2537 	      break;
2538 	    case 0xad: /* shrd Ev,Gv,%cl */
2539 	      cur->pc = check_modrm (cur->pc);
2540 	      break;
2541 	    case 0xae: /* group15 */
2542 	      cur->pc = check_modrm (cur->pc);
2543 	      break;
2544 	    case 0xaf: /* imul Gv,Ev */
2545 	      cur->pc = check_modrm (cur->pc);
2546 	      break;
2547 	    case 0xb1: /* cmpxchg Ev,Gv */
2548 	      cur->pc = check_modrm (cur->pc);
2549 	      break;
2550 	    case 0xb3:
2551 	    case 0xb6: /* movzx Gv,Eb */
2552 	    case 0xb7: /* movzx Gv,Ew */
2553 	      cur->pc = check_modrm (cur->pc);
2554 	      break;
2555 	    case 0xba: /* group8 Ev,Ib */
2556 	      cur->pc = check_modrm (cur->pc);
2557 	      cur->pc += 1;
2558 	      break;
2559 	    case 0xbb: /* btc Ev,Gv */
2560 	    case 0xbc: /* bsf Gv,Ev */
2561 	    case 0xbd: /* bsr Gv,Ev */
2562 	      cur->pc = check_modrm (cur->pc);
2563 	      break;
2564 	    case 0xbe: /* movsx Gv,Eb */
2565 	    case 0xbf: /* movsx Gv,Ew */
2566 	      cur->pc = check_modrm (cur->pc);
2567 	      break;
2568 	    case 0xc0: /* xadd Eb,Gb */
2569 	    case 0xc1: /* xadd Ev,Gv */
2570 	      cur->pc = check_modrm (cur->pc);
2571 	      break;
2572 	    case 0xc2: /* cmpps V,W,Ib */
2573 	      cur->pc = check_modrm (cur->pc);
2574 	      cur->pc += 1;
2575 	      break;
2576 	    case 0xc3: /* movnti M,G */
2577 	      cur->pc = check_modrm (cur->pc);
2578 	      break;
2579 	    case 0xc6: /* shufps V,W,Ib */
2580 	      cur->pc = check_modrm (cur->pc);
2581 	      cur->pc += 1;
2582 	      break;
2583 	    case 0xc7: /* RDRAND */
2584 	      cur->pc = check_modrm (cur->pc);
2585 	      break;
2586 	    case 0xc8: case 0xc9: case 0xca: case 0xcb: /* bswap */
2587 	    case 0xcc: case 0xcd: case 0xce: case 0xcf:
2588 	      break;
2589 	    case 0xd0: case 0xd1: case 0xd2: case 0xd3:
2590 	    case 0xd4: case 0xd5: case 0xd6: case 0xd7:
2591 	    case 0xd8: case 0xd9: case 0xda: case 0xdb:
2592 	    case 0xdc: case 0xdd: case 0xde: case 0xdf:
2593 	    case 0xe0: case 0xe1: case 0xe2: case 0xe3:
2594 	    case 0xe4: case 0xe5: case 0xe6: case 0xe7:
2595 	    case 0xe8: case 0xe9: case 0xea: case 0xeb:
2596 	    case 0xec: case 0xed: case 0xee: case 0xef:
2597 	    case 0xf0: case 0xf1: case 0xf2: case 0xf3:
2598 	    case 0xf4: case 0xf5: case 0xf6: case 0xf7:
2599 	    case 0xf8: case 0xf9: case 0xfa: case 0xfb:
2600 	    case 0xfc: case 0xfd: case 0xfe: case 0xff:
2601 	      cur->pc = check_modrm (cur->pc);
2602 	      break;
2603 	    default:
2604 	      if (jmp_reg_switch_mode == 1 && extop == 0x0b)
2605 		DprintfT (SP_DUMP_UNWIND, "unwind.c:%d invalid opcode ub2: 0x0f %x jmp_reg_switch_mode=%d\n",
2606 			  __LINE__, (int) extop, jmp_reg_switch_mode);
2607 	      else
2608 		{
2609 		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x0f %x jmp_reg_switch_mode=%d\n",
2610 			    __LINE__, (int) extop, jmp_reg_switch_mode);
2611 		  DELETE_CURCTX ();
2612 		}
2613 	      break;
2614 	    }
2615 	  break;
2616 	case 0x10: /* adc Eb,Gb */
2617 	case 0x11: /* adc Ev,Gv */
2618 	case 0x12: /* adc Gb,Eb */
2619 	case 0x13: /* adc Gv,Ev */
2620 	  cur->pc = check_modrm (cur->pc);
2621 	  break;
2622 	case 0x14: /* adc %al,Ib */
2623 	  cur->pc += 1;
2624 	  break;
2625 	case 0x15: /* adc %eax,Iz */
2626 	  cur->pc += z;
2627 	  break;
2628 	case 0x16: /* push ss */
2629 	  cur->sp -= 1;
2630 	  break;
2631 	case 0x17: /* pop ss */
2632 	  cur->sp += 1;
2633 	  if (cur->sp - RED_ZONE > cur->sp_safe)
2634 	    cur->sp_safe = cur->sp - RED_ZONE;
2635 	  break;
2636 	case 0x18: /* sbb Eb,Gb */
2637 	case 0x19: /* sbb Ev,Gv */
2638 	case 0x1a: /* sbb Gb,Eb */
2639 	case 0x1b: /* sbb Gv,Ev */
2640 	  cur->pc = check_modrm (cur->pc);
2641 	  break;
2642 	case 0x1c: /* sbb %al,Ib */
2643 	  cur->pc += 1;
2644 	  break;
2645 	case 0x1d: /* sbb %eax,Iz */
2646 	  cur->pc += z;
2647 	  break;
2648 	case 0x1e: /* push ds */
2649 	  cur->sp -= 1;
2650 	  break;
2651 	case 0x1f: /* pop ds */
2652 	  cur->sp += 1;
2653 	  if (cur->sp - RED_ZONE > cur->sp_safe)
2654 	    cur->sp_safe = cur->sp - RED_ZONE;
2655 	  break;
2656 	case 0x20: /* and Eb,Gb */
2657 	case 0x21: /* and Ev,Gv */
2658 	case 0x22: /* and Gb,Eb */
2659 	case 0x23: /* and Gv,Ev */
2660 	  cur->pc = check_modrm (cur->pc);
2661 	  break;
2662 	case 0x24: /* and %al,Ib */
2663 	  cur->pc += 1;
2664 	  break;
2665 	case 0x25: /* and %eax,Iz */
2666 	  cur->pc += z;
2667 	  break;
2668 	case 0x26: /* seg=es prefix */
2669 	  break;
2670 	case 0x27: /* daa */
2671 	  break;
2672 	case 0x28: /* sub Eb,Gb */
2673 	case 0x29: /* sub Ev,Gv */
2674 	case 0x2a: /* sub Gb,Eb */
2675 	case 0x2b: /* sub Gv,Ev */
2676 	  cur->pc = check_modrm (cur->pc);
2677 	  break;
2678 	case 0x2c: /* sub %al,Ib */
2679 	  cur->pc += 1;
2680 	  break;
2681 	case 0x2d: /* sub %eax,Iz */
2682 	  cur->pc += z;
2683 	  break;
2684 	case 0x2e: /* seg=cs prefix */
2685 	  break;
2686 	case 0x2f: /* das */
2687 	  break;
2688 	case 0x30: /* xor Eb,Gb */
2689 	case 0x31: /* xor Ev,Gv */
2690 	case 0x32: /* xor Gb,Eb */
2691 	case 0x33: /* xor Gv,Ev */
2692 	  cur->pc = check_modrm (cur->pc);
2693 	  break;
2694 	case 0x34: /* xor %al,Ib */
2695 	  cur->pc += 1;
2696 	  break;
2697 	case 0x35: /* xor %eax,Iz */
2698 	  cur->pc += z;
2699 	  break;
2700 	case 0x36: /* seg=ss prefix */
2701 	  break;
2702 	case 0x37: /* aaa */
2703 	  break;
2704 	case 0x38: /* cmp Eb,Gb */
2705 	case 0x39: /* cmp Ev,Gv */
2706 	case 0x3a: /* cmp Gb,Eb */
2707 	case 0x3b: /* cmp Gv,Ev */
2708 	  cur->pc = check_modrm (cur->pc);
2709 	  break;
2710 	case 0x3c: /* cmp %al,Ib */
2711 	  cur->pc += 1;
2712 	  break;
2713 	case 0x3d: /* cmp %eax,Iz */
2714 	  cur->pc += z;
2715 	  break;
2716 	case 0x3e: /* seg=ds prefix */
2717 	  break;
2718 	case 0x3f: /* aas */
2719 	  break;
2720 #if WSIZE(32)
2721 	case 0x40: /* inc %eax */
2722 	case 0x41: /* inc %ecx */
2723 	case 0x42: /* inc %edx */
2724 	case 0x43: /* inc %ebx */
2725 	  break;
2726 	case 0x44: /* inc %esp */
2727 	  /* Can't be a valid stack pointer - delete context */
2728 	  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x44.\n", __LINE__);
2729 	  DELETE_CURCTX ();
2730 	  break;
2731 	case 0x45: /* inc %ebp */
2732 	case 0x46: /* inc %esi */
2733 	case 0x47: /* inc %edi */
2734 	case 0x48: /* dec %eax */
2735 	case 0x49: /* dec %ecx */
2736 	case 0x4a: /* dec %edx */
2737 	case 0x4b: /* dec %ebx */
2738 	  break;
2739 	case 0x4c: /* dec %esp */
2740 	  /* Can't be a valid stack pointer - delete context */
2741 	  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x4c.\n", __LINE__);
2742 	  DELETE_CURCTX ();
2743 	  break;
2744 	case 0x4d: /* dec %ebp */
2745 	case 0x4e: /* dec %esi */
2746 	case 0x4f: /* dec %edi */
2747 	  break;
2748 #endif
2749 	case 0x50: /* push %eax */
2750 	case 0x51: /* push %ecx */
2751 	case 0x52: /* push %edx */
2752 	case 0x53: /* push %ebx */
2753 	case 0x54: /* push %esp */
2754 	case 0x55: /* push %ebp */
2755 	case 0x56: /* push %esi */
2756 	case 0x57: /* push %edi */
2757 	  cur->sp -= 1;
2758 	  reg = OPC_REG (opcode);
2759 	  if (reg == RBP)
2760 	    {
2761 #if 0
2762 	      /* Don't do this check yet. Affects tail calls. */
2763 	      /* avoid other function's prologue */
2764 	      if ((cur->pc[0] == 0x89 && cur->pc[1] == 0xe5) ||
2765 		  (cur->pc[0] == 0x8b && cur->pc[1] == 0xec))
2766 		{
2767 		  /* mov %esp,%ebp */
2768 		  DELETE_CURCTX ();
2769 		  break;
2770 		}
2771 #endif
2772 	      if (cur->fp_loc == NULL)
2773 		{
2774 		  cur->fp_loc = cur->sp;
2775 		  cur->fp_sav = cur->fp;
2776 		}
2777 	    }
2778 	  break;
2779 	case 0x58: /* pop %eax */
2780 	case 0x59: /* pop %ecx */
2781 	case 0x5a: /* pop %edx */
2782 	case 0x5b: /* pop %ebx */
2783 	case 0x5c: /* pop %esp */
2784 	case 0x5d: /* pop %ebp */
2785 	case 0x5e: /* pop %esi */
2786 	case 0x5f: /* pop %edi */
2787 	  reg = OPC_REG (opcode);
2788 	  cur->regs[reg] = 0;
2789 	  if (isInside ((unsigned long) cur->sp, (unsigned long) cur->sp_safe, wctx->sbase))
2790 	    cur->regs[reg] = *cur->sp;
2791 	  DprintfT (SP_DUMP_UNWIND, "stack_unwind:%d cur->regs[%d]=0x%lx\n",
2792 		   __LINE__, reg, (unsigned long) cur->regs[reg]);
2793 	  if (reg == RDX)
2794 	    {
2795 	      if (cur->sp >= cur->sp_safe &&
2796 		  (unsigned long) cur->sp < wctx->sbase)
2797 		cur->rdx = *cur->sp;
2798 	    }
2799 	  else if (reg == RBP)
2800 	    {
2801 	      if (cur->fp_loc == cur->sp)
2802 		{
2803 		  cur->fp = cur->fp_sav;
2804 		  cur->fp_loc = NULL;
2805 		}
2806 	      else if (cur->sp >= cur->sp_safe &&
2807 		       (unsigned long) cur->sp < wctx->sbase)
2808 		cur->fp = (unsigned long*) (*cur->sp);
2809 	    }
2810 	  else if (reg == RSP)
2811 	    {
2812 	      /* f.e. JVM I2CAdapter */
2813 	      if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase)
2814 		{
2815 		  unsigned long *nsp = (unsigned long*) (*cur->sp);
2816 		  if (nsp >= cur->sp && nsp <= cur->fp)
2817 		    {
2818 		      cur->sp = nsp;
2819 		    }
2820 		  else
2821 		    {
2822 		      DprintfT (SP_DUMP_UNWIND, "stack_unwind%d give up return address, opcode=0x%02x\n",
2823 			       __LINE__, opcode);
2824 		      goto checkFP;
2825 		    }
2826 		}
2827 	      else
2828 		{
2829 		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode=0x%02x\n",
2830 			    __LINE__, opcode);
2831 		  goto checkFP;
2832 		}
2833 	      break;
2834 	    }
2835 	  cur->sp += 1;
2836 	  if (cur->sp - RED_ZONE > cur->sp_safe)
2837 	    {
2838 	      cur->sp_safe = cur->sp - RED_ZONE;
2839 	    }
2840 	  break;
2841 	case 0x60: /* pusha(d) */
2842 	  cur->sp -= 8;
2843 	  break;
2844 	case 0x61: /* popa(d) */
2845 	  cur->sp += 8;
2846 	  if (cur->sp - RED_ZONE > cur->sp_safe)
2847 	    cur->sp_safe = cur->sp - RED_ZONE;
2848 	  break;
2849 	case 0x62: /* group AVX, 4-bytes EVEX prefix */
2850 	  {
2851 	    unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
2852 	    int len = parse_x86_AVX_instruction (pc);
2853 	    if (len < 4)
2854 	      {
2855 		DELETE_CURCTX ();
2856 	      }
2857 	    else
2858 	      {
2859 		pc += len;
2860 		cur->pc = pc;
2861 	      }
2862 	  }
2863 	  break;
2864 	case 0x63: /* arpl Ew,Gw (32) movsxd Gv,Ev (64)*/
2865 	  cur->pc = check_modrm (cur->pc);
2866 	  break;
2867 	case 0x64: /* seg=fs prefix */
2868 	case 0x65: /* seg=gs prefix */
2869 	  break;
2870 	case 0x66: /* opd size override */
2871 	case 0x67: /* addr size override */
2872 	  break;
2873 	case 0x68: /* push Iz */
2874 	  cur->sp = (unsigned long*) ((long) cur->sp - z);
2875 	  cur->pc += z;
2876 	  break;
2877 	case 0x69: /* imul Gv,Ev,Iz */
2878 	  cur->pc = check_modrm (cur->pc);
2879 	  cur->pc += z;
2880 	  break;
2881 	case 0x6a: /* push Ib */
2882 	  cur->sp = (unsigned long*) ((long) cur->sp - v);
2883 	  cur->pc += 1;
2884 	  break;
2885 	case 0x6b: /* imul Gv,Ev,Ib */
2886 	  cur->pc = check_modrm (cur->pc);
2887 	  cur->pc += 1;
2888 	  break;
2889 	case 0x6c: case 0x6d: case 0x6e: case 0x6f:
2890 	  cur->pc = check_modrm (cur->pc);
2891 	  break;
2892 	case 0x70: /* jo Jb */
2893 	case 0x71: /* jno Jb */
2894 	case 0x72: /* jb Jb */
2895 	case 0x73: /* jnb Jb */
2896 	case 0x74: /* jz Jb */
2897 	case 0x75: /* jnz Jb */
2898 	case 0x76: /* jna Jb */
2899 	case 0x77: /* ja Jb */
2900 	case 0x78: /* js Jb */
2901 	case 0x79: /* jns Jb */
2902 	case 0x7a: /* jp Jb */
2903 	case 0x7b: /* jnp Jb */
2904 	case 0x7c: /* jl Jb */
2905 	case 0x7d: /* jge Jb */
2906 	case 0x7e: /* jle Jb */
2907 	case 0x7f: /* jg Jb */
2908 	  imm8 = *(char*) cur->pc++;
2909 	  if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
2910 	    {
2911 	      int tidx = 0;
2912 	      unsigned char *npc = cur->pc + imm8;
2913 	      if (is_after_ret (npc))
2914 		break;
2915 	      while (npc > targets[tidx])
2916 		tidx += 1;
2917 	      if (npc != targets[tidx])
2918 		{
2919 		  if (ntrg < MAXTRGTS)
2920 		    {
2921 		      for (int i = 0; i < nctx; i++)
2922 			if (buf[i].tidx >= tidx)
2923 			  buf[i].tidx++;
2924 
2925 		      /* insert a new target */
2926 		      for (int i = ntrg; i > tidx; i--)
2927 			targets[i] = targets[i - 1];
2928 		      ntrg += 1;
2929 		      targets[tidx++] = npc;
2930 		    }
2931 		  else
2932 		    DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg(%d)=max\n", __LINE__, ntrg);
2933 		  struct AdvWalkContext *new = buf + nctx;
2934 		  nctx += 1;
2935 		  __collector_memcpy (new, cur, sizeof (*new));
2936 		  new->pc = npc;
2937 		  new->tidx = tidx;
2938 		  cur = new; /* advance the new context first */
2939 		  continue;
2940 		}
2941 	    }
2942 	  else
2943 	    DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx(%d)=max\n", __LINE__, nctx);
2944 	  break;
2945 	case 0x80: /* group1 Eb,Ib */
2946 	  cur->pc = check_modrm (cur->pc);
2947 	  cur->pc += 1;
2948 	  break;
2949 	case 0x81: /* group1 Ev,Iz */
2950 	  modrm = *cur->pc;
2951 	  if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RSP)
2952 	    {
2953 	      int immz = read_int (cur->pc + 1, z);
2954 	      extop = MRM_EXT (modrm);
2955 	      if (extop == 0) /* add  imm32,%esp */
2956 		cur->sp = (unsigned long*) ((long) cur->sp + immz);
2957 	      else if (extop == 4) /* and imm32,%esp */
2958 		cur->sp = (unsigned long*) ((long) cur->sp & immz);
2959 	      else if (extop == 5) /* sub imm32,%esp */
2960 		cur->sp = (unsigned long*) ((long) cur->sp - immz);
2961 	      if (cur->sp - RED_ZONE > cur->sp_safe)
2962 		cur->sp_safe = cur->sp - RED_ZONE;
2963 	    }
2964 	  cur->pc = check_modrm (cur->pc);
2965 	  cur->pc += z;
2966 	  break;
2967 	case 0x82: /* group1 Eb,Ib */
2968 	  cur->pc = check_modrm (cur->pc);
2969 	  cur->pc += 1;
2970 	  break;
2971 	case 0x83: /* group1 Ev,Ib */
2972 	  modrm = *cur->pc;
2973 	  if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RSP)
2974 	    {
2975 	      imm8 = (char) cur->pc[1]; /* sign extension */
2976 	      extop = MRM_EXT (modrm);
2977 	      if (extop == 0) /* add  imm8,%esp */
2978 		cur->sp = (unsigned long*) ((long) cur->sp + imm8);
2979 	      else if (extop == 4) /* and imm8,%esp */
2980 		  cur->sp = (unsigned long*) ((long) cur->sp & imm8);
2981 	      else if (extop == 5) /* sub imm8,%esp */
2982 		cur->sp = (unsigned long*) ((long) cur->sp - imm8);
2983 	      if (cur->sp - RED_ZONE > cur->sp_safe)
2984 		cur->sp_safe = cur->sp - RED_ZONE;
2985 	    }
2986 	  cur->pc = check_modrm (cur->pc);
2987 	  cur->pc += 1;
2988 	  break;
2989 	case 0x84: /* test Eb,Gb */
2990 	case 0x85: /* test Ev,Gv */
2991 	case 0x86: /* xchg Eb,Gb */
2992 	case 0x87: /* xchg Ev,Gv */
2993 	  cur->pc = check_modrm (cur->pc);
2994 	  break;
2995 	case 0x88: /* mov Eb,Gb */
2996 	  cur->pc = check_modrm (cur->pc);
2997 	  break;
2998 	case 0x89: /* mov Ev,Gv */
2999 	  modrm = *cur->pc;
3000 	  if (MRM_MOD (modrm) == 0xc0)
3001 	    {
3002 	      if (MRM_REGS (modrm) == RBP && MRM_REGD (modrm) == RSP)
3003 		/* movl %esp,%ebp */
3004 		cur->fp = cur->sp;
3005 	      else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3006 		{ /* mov %ebp,%esp */
3007 		  cur->sp = cur->fp;
3008 		  if (cur->sp - RED_ZONE > cur->sp_safe)
3009 		    cur->sp_safe = cur->sp - RED_ZONE;
3010 		  if (wctx->fp == (unsigned long) cur->sp)
3011 		    cur->cval = RA_FROMFP;
3012 		}
3013 	    }
3014 	  else if (MRM_MOD (modrm) == 0x80)
3015 	    {
3016 	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3017 		{
3018 		  if (cur->pc[1] == 0x24)
3019 		    { /* mov %ebp,disp32(%esp) - JVM */
3020 		      immv = read_int (cur->pc + 2, 4);
3021 		      cur->fp_loc = (unsigned long*) ((char*) cur->sp + immv);
3022 		      cur->fp_sav = cur->fp;
3023 		    }
3024 		}
3025 	    }
3026 	  else if (MRM_MOD (modrm) == 0x40)
3027 	    {
3028 	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RDX)
3029 		{
3030 		  if (cur->pc[1] == 0x24 && cur->pc[2] == 0x0)
3031 		    { /* movl %edx,0(%esp) */
3032 		      cur->ra_loc = cur->sp;
3033 		      cur->ra_sav = cur->rdx;
3034 		    }
3035 		}
3036 	      else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3037 		{
3038 		  if (cur->pc[1] == 0x24)
3039 		    { /* mov %ebp,disp8(%esp) - JVM */
3040 		      imm8 = ((char*) (cur->pc))[2];
3041 		      cur->fp_loc = (unsigned long*) ((char*) cur->sp + imm8);
3042 		      cur->fp_sav = cur->fp;
3043 		    }
3044 		}
3045 	    }
3046 	  else if (MRM_MOD (modrm) == 0x0)
3047 	    {
3048 	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3049 		{
3050 		  if (cur->pc[1] == 0x24)
3051 		    { /* mov %ebp,(%esp) */
3052 		      cur->fp_loc = cur->sp;
3053 		      cur->fp_sav = cur->fp;
3054 		    }
3055 		}
3056 	      else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RDX)
3057 		{
3058 		  if (cur->pc[1] == 0x24)
3059 		    { /* movl %edx,(%esp) */
3060 		      cur->ra_loc = cur->sp;
3061 		      cur->ra_sav = cur->rdx;
3062 		    }
3063 		}
3064 	    }
3065 	  cur->pc = check_modrm (cur->pc);
3066 	  break;
3067 	case 0x8a: /* mov Gb,Eb */
3068 	  cur->pc = check_modrm (cur->pc);
3069 	  break;
3070 	case 0x8b: /* mov Gv,Ev */
3071 	  modrm = *cur->pc;
3072 	  if (MRM_MOD (modrm) == 0xc0)
3073 	    {
3074 	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3075 		/* mov %esp,%ebp */
3076 		cur->fp = cur->sp;
3077 	      else if (MRM_REGS (modrm) == RBP && MRM_REGD (modrm) == RSP)
3078 		{ /* mov %ebp,%esp */
3079 		  cur->sp = cur->fp;
3080 		  if (cur->sp - RED_ZONE > cur->sp_safe)
3081 		    cur->sp_safe = cur->sp - RED_ZONE;
3082 		  if (wctx->fp == (unsigned long) cur->sp)
3083 		    cur->cval = RA_FROMFP;
3084 		}
3085 	    }
3086 	  else if (MRM_MOD (modrm) == 0x80)
3087 	    {
3088 	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3089 		{
3090 		  if (cur->pc[1] == 0x24)
3091 		    { /* mov disp32(%esp),%ebp */
3092 		      immv = read_int (cur->pc + 2, 4);
3093 		      unsigned long *ptr = (unsigned long*) ((char*) cur->sp + immv);
3094 		      if (cur->fp_loc == ptr)
3095 			{
3096 			  cur->fp = cur->fp_sav;
3097 			  cur->fp_loc = NULL;
3098 			}
3099 		      else if (ptr >= cur->sp_safe && (unsigned long) ptr < wctx->sbase)
3100 			cur->fp = (unsigned long*) (*ptr);
3101 		    }
3102 		}
3103 	    }
3104 	  else if (MRM_MOD (modrm) == 0x40)
3105 	    {
3106 	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3107 		{
3108 		  if (cur->pc[1] == 0x24)
3109 		    { /* mov disp8(%esp),%ebp - JVM */
3110 		      imm8 = ((char*) (cur->pc))[2];
3111 		      unsigned long *ptr = (unsigned long*) ((char*) cur->sp + imm8);
3112 		      if (cur->fp_loc == ptr)
3113 			{
3114 			  cur->fp = cur->fp_sav;
3115 			  cur->fp_loc = NULL;
3116 			}
3117 		      else if (ptr >= cur->sp_safe && (unsigned long) ptr < wctx->sbase)
3118 			cur->fp = (unsigned long*) (*ptr);
3119 		    }
3120 		}
3121 	    }
3122 	  else if (MRM_MOD (modrm) == 0x0)
3123 	    {
3124 	      if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3125 		{
3126 		  if (cur->pc[1] == 0x24)
3127 		    { /* mov (%esp),%ebp */
3128 		      if (cur->fp_loc == cur->sp)
3129 			{
3130 			  cur->fp = cur->fp_sav;
3131 			  cur->fp_loc = NULL;
3132 			}
3133 		      else if (cur->sp >= cur->sp_safe &&
3134 			       (unsigned long) cur->sp < wctx->sbase)
3135 			cur->fp = (unsigned long*) *cur->sp;
3136 		    }
3137 		}
3138 	    }
3139 	  cur->pc = check_modrm (cur->pc);
3140 	  break;
3141 	case 0x8c: /* mov Mw,Sw */
3142 	  cur->pc = check_modrm (cur->pc);
3143 	  break;
3144 	case 0x8d: /* lea Gv,M */
3145 	  modrm = *cur->pc;
3146 	  if (MRM_REGD (modrm) == RSP)
3147 	    {
3148 	      unsigned char *pc = cur->pc;
3149 	      // Mez: need to use always regs[RSP/RBP] instead cur->sp(or fp):
3150 	      cur->regs[RSP] = (unsigned long) cur->sp;
3151 	      cur->regs[RBP] = (unsigned long) cur->fp;
3152 	      cur->pc++;
3153 	      int mod = (modrm >> 6) & 3;
3154 	      int r_m = modrm & 7;
3155 	      long val = 0;
3156 	      int undefRez = 0;
3157 	      if (mod == 0x3)
3158 		val = getRegVal (cur, MRM_REGS (modrm), &undefRez);
3159 	      else if (r_m == 4)
3160 		{ // SP or R12. Decode SIB-byte.
3161 		  int sib = *cur->pc++;
3162 		  int scale = 1 << (sib >> 6);
3163 		  int index = X | ((sib >> 3) & 7);
3164 		  int base = B | (sib & 7);
3165 		  if (mod == 0)
3166 		    {
3167 		      if ((base & 7) == 5)
3168 			{ // BP or R13
3169 			  if (index != 4) // SP
3170 			    val += getRegVal (cur, index, &undefRez) * scale;
3171 			  val += read_int (cur->pc, 4);
3172 			  cur->pc += 4;
3173 			}
3174 		      else
3175 			{
3176 			  val += getRegVal (cur, base, &undefRez);
3177 			  if (index != 4) // SP
3178 			    val += getRegVal (cur, index, &undefRez) * scale;
3179 			}
3180 		    }
3181 		  else
3182 		    {
3183 		      val += getRegVal (cur, base, &undefRez);
3184 		      if (index != 4) // SP
3185 			val += getRegVal (cur, index, &undefRez) * scale;
3186 		      if (mod == 1)
3187 			{
3188 			  val += read_int (cur->pc, 1);
3189 			  cur->pc++;
3190 			}
3191 		      else
3192 			{ // mod == 2
3193 			  val += read_int (cur->pc, 4);
3194 			  cur->pc += 4;
3195 			}
3196 		    }
3197 		}
3198 	      else if (mod == 0)
3199 		{
3200 		  if (r_m == 5)
3201 		    { // BP or R13
3202 		      val += read_int (cur->pc, 4);
3203 		      cur->pc += 4;
3204 		    }
3205 		  else
3206 		    val += getRegVal (cur, MRM_REGS (modrm), &undefRez);
3207 		}
3208 	      else
3209 		{ // mod == 1 || mod == 2
3210 		  val += getRegVal (cur, MRM_REGS (modrm), &undefRez);
3211 		  if (mod == 1)
3212 		    {
3213 		      val += read_int (cur->pc, 1);
3214 		      cur->pc++;
3215 		    }
3216 		  else
3217 		    { // mod == 2
3218 		      val += read_int (cur->pc, 4);
3219 		      cur->pc += 4;
3220 		    }
3221 		}
3222 	      if (undefRez)
3223 		{
3224 		  DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cannot calculate RSP. cur->pc=0x%lx val=0x%lx\n",
3225 			   __LINE__, (unsigned long) cur->pc, (unsigned long) val);
3226 		  goto checkFP;
3227 		}
3228 	      cur->regs[MRM_REGD (modrm)] = val;
3229 	      DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cur->pc=0x%lx val=0x%lx wctx->sp=0x%lx wctx->sbase=0x%lx\n",
3230 		       __LINE__, (unsigned long) cur->pc, (unsigned long) val,
3231 		       (unsigned long) wctx->sp, (unsigned long) wctx->sbase);
3232 	      if (cur->pc != check_modrm (pc))
3233 		DprintfT (SP_DUMP_UNWIND, "stack_unwind%d ERROR: cur->pc=0x%lx != check_modrm(0x%lx)=0x%lx\n",
3234 			 __LINE__, (unsigned long) cur->pc, (unsigned long) pc,
3235 			 (unsigned long) check_modrm (pc));
3236 	      if (MRM_REGD (modrm) == RSP)
3237 		{
3238 		  if (!isInside ((unsigned long) val, wctx->sp, wctx->sbase))
3239 		    {
3240 		      DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cannot calculate RSP. cur->pc=0x%lx opcode=0x%02x val=0x%lx wctx->sp=0x%lx wctx->sbase=0x%lx\n",
3241 			       __LINE__, (unsigned long) cur->pc, opcode, (unsigned long) val,
3242 			       (unsigned long) wctx->sp, (unsigned long) wctx->sbase);
3243 		      goto checkFP;
3244 		    }
3245 		  cur->sp = (unsigned long *) val;
3246 		  if (cur->sp - RED_ZONE > cur->sp_safe)
3247 		    cur->sp_safe = cur->sp - RED_ZONE;
3248 		}
3249 	    }
3250 	  else
3251 	    cur->pc = check_modrm (cur->pc);
3252 	  break;
3253 	case 0x8e: /* mov Sw,Ew */
3254 	  cur->pc = check_modrm (cur->pc);
3255 	  break;
3256 	case 0x8f: /* pop Ev */
3257 	  cur->pc = check_modrm (cur->pc);
3258 	  cur->sp += 1;
3259 	  if (cur->sp - RED_ZONE > cur->sp_safe)
3260 	    cur->sp_safe = cur->sp - RED_ZONE;
3261 	  break;
3262 	case 0x90: /* nop */
3263 	  break;
3264 	case 0x91: /* xchg %eax,%ecx */
3265 	case 0x92: /* xchg %eax,%edx */
3266 	case 0x93: /* xchg %eax,%ebx */
3267 	case 0x94: /* xchg %eax,%esp XXXX */
3268 	case 0x95: /* xchg %eax,%ebp XXXX */
3269 	case 0x96: /* xchg %eax,%esi */
3270 	case 0x97: /* xchg %eax,%edi */
3271 	  break;
3272 	case 0x98: /* cbw/cwde */
3273 	case 0x99: /* cwd/cwq */
3274 	  break;
3275 	case 0x9a: /* callf Ap */
3276 	  if (jmp_reg_switch_mode == 1)
3277 	    {
3278 	      struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3279 	      __collector_memcpy (tmpctx, cur, sizeof (*cur));
3280 	      int rc = process_return (wctx, tmpctx);
3281 	      if (rc != RA_FAILURE)
3282 		{
3283 		  if (save_ctx)
3284 		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3285 		  return rc;
3286 		}
3287 	    }
3288 	  cur->pc += 2 + a;
3289 	  break;
3290 	case 0x9b: /* fwait */
3291 	case 0x9c: /* pushf Fv */
3292 	case 0x9d: /* popf Fv */
3293 	case 0x9e: /* sahf */
3294 	case 0x9f: /* lahf */
3295 	  break;
3296 	case 0xa0: /* mov al,Ob */
3297 	case 0xa1: /* mov eax,Ov */
3298 	case 0xa2: /* mov Ob,al */
3299 	case 0xa3: /* mov Ov,eax */
3300 	  cur->pc += a;
3301 	  break;
3302 	case 0xa4: /* movsb Yb,Xb */
3303 	case 0xa5: /* movsd Yv,Xv */
3304 	case 0xa6: /* cmpsb Yb,Xb */
3305 	case 0xa7: /* cmpsd Xv,Yv */
3306 	  break;
3307 	case 0xa8: /* test al,Ib */
3308 	  cur->pc += 1;
3309 	  break;
3310 	case 0xa9: /* test eax,Iz */
3311 	  cur->pc += z;
3312 	  break;
3313 	case 0xaa: /* stosb Yb,%al */
3314 	case 0xab: /* stosd Yv,%eax */
3315 	case 0xac: /* lodsb %al,Xb */
3316 	case 0xad: /* lodsd %eax,Xv */
3317 	case 0xae: /* scasb %al,Yb */
3318 	case 0xaf: /* scasd %eax,Yv */
3319 	  break;
3320 	case 0xb0: /* mov %al,Ib */
3321 	case 0xb1: /* mov %cl,Ib */
3322 	case 0xb2: /* mov %dl,Ib */
3323 	case 0xb3: /* mov %bl,Ib */
3324 	case 0xb4: /* mov %ah,Ib */
3325 	case 0xb5: /* mov %ch,Ib */
3326 	case 0xb6: /* mov %dh,Ib */
3327 	case 0xb7: /* mov %bh,Ib */
3328 	  cur->pc += 1;
3329 	  break;
3330 	case 0xb8: /* mov Iv,%eax */
3331 	case 0xb9: /* mov Iv,%ecx */
3332 	case 0xba: /* mov Iv,%edx */
3333 	case 0xbb: /* mov Iv,%ebx */
3334 	case 0xbc: /* mov Iv,%esp */
3335 	case 0xbd: /* mov Iv,%rbp */
3336 	case 0xbe: /* mov Iv,%esi */
3337 	case 0xbf: /* mov Iv,%edi */
3338 	  reg = OPC_REG (opcode);
3339 	  if (reg == RAX)
3340 	    cur->rax = read_int (cur->pc, v);
3341 	  cur->pc += v;
3342 	  break;
3343 	case 0xc0: /* group2 Eb,Ib */
3344 	case 0xc1: /* group2 Ev,Ib */
3345 	  cur->pc = check_modrm (cur->pc) + 1;
3346 	  break;
3347 	case 0xc2: /* ret Iw */
3348 	  /* In the dynamic linker we may see that
3349 	   * the actual return address is at sp+immv,
3350 	   * while sp points to the resolved address.
3351 	   */
3352 	  {
3353 	    immv = read_int (cur->pc, 2);
3354 	    int rc = process_return (wctx, cur);
3355 	    if (rc != RA_FAILURE)
3356 	      {
3357 		if (jmp_reg_switch_mode == 1)
3358 		  {
3359 		    DprintfT (SP_DUMP_UNWIND, "stack_unwind%d give up return address under jmp switch mode, opcode = 0xc2\n", __LINE__);
3360 		    goto checkFP;
3361 		  }
3362 		wctx->sp += immv;
3363 		if (save_ctx)
3364 		  omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3365 		return rc;
3366 	      }
3367 	    DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xc2.\n", __LINE__);
3368 	    DELETE_CURCTX ();
3369 	  }
3370 	  break;
3371 	case 0xc3: /* ret */
3372 	  {
3373 	    int rc = process_return (wctx, cur);
3374 	    if (rc != RA_FAILURE)
3375 	      {
3376 		if (save_ctx)
3377 		  omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3378 		return rc;
3379 	      }
3380 	    if (jmp_reg_switch_mode == 1)
3381 	      jmp_reg_switch_pc = cur->pc;
3382 	    DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xc3.\n", __LINE__);
3383 	    DELETE_CURCTX ();
3384 	  }
3385 	  break;
3386 	case 0xc4: /* group AVX, 3-bytes VEX prefix */
3387 	  {
3388 	    unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
3389 	    int len = parse_x86_AVX_instruction (pc);
3390 	    if (len < 3)
3391 	      DELETE_CURCTX ();
3392 	    else
3393 	      {
3394 		pc += len;
3395 		cur->pc = pc;
3396 	      }
3397 	  }
3398 	  break;
3399 	case 0xc5: /* group AVX, 2-bytes VEX prefix */
3400 	  {
3401 	    unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
3402 	    int len = parse_x86_AVX_instruction (pc);
3403 	    if (len < 2)
3404 	      DELETE_CURCTX ();
3405 	    else
3406 	      {
3407 		pc += len;
3408 		cur->pc = pc;
3409 	      }
3410 	  }
3411 	  break;
3412 	case 0xc6:
3413 	  modrm = *cur->pc;
3414 	  if (modrm == 0xf8) /* xabort */
3415 	    cur->pc += 2;
3416 	  else /* mov Eb,Ib */
3417 	    cur->pc = check_modrm (cur->pc) + 1;
3418 	  break;
3419 	case 0xc7:
3420 	  modrm = *cur->pc;
3421 	  if (modrm == 0xf8) /* xbegin */
3422 	    cur->pc += v + 1;
3423 	  else
3424 	    { /* mov Ev,Iz */
3425 	      extop = MRM_EXT (modrm);
3426 	      if (extop != 0)
3427 		{
3428 		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode = 0xc7\n", __LINE__);
3429 		  goto checkFP;
3430 		}
3431 	      if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RAX)
3432 		cur->rax = read_int (cur->pc + 1, z);
3433 	      cur->pc = check_modrm (cur->pc) + z;
3434 	    }
3435 	  break;
3436 	case 0xc8: /* enter Iw,Ib */
3437 	  cur->pc += 3;
3438 	  break;
3439 	case 0xc9: /* leave */
3440 	  /* mov %ebp,%esp */
3441 	  cur->sp = cur->fp;
3442 	  /* pop %ebp */
3443 	  if (cur->fp_loc == cur->sp)
3444 	    {
3445 	      cur->fp = cur->fp_sav;
3446 	      cur->fp_loc = NULL;
3447 	    }
3448 	  else if (cur->sp >= cur->sp_safe &&
3449 		   (unsigned long) cur->sp < wctx->sbase)
3450 	    {
3451 	      cur->fp = (unsigned long*) (*cur->sp);
3452 	      if (wctx->fp == (unsigned long) cur->sp)
3453 		cur->cval = RA_FROMFP;
3454 	    }
3455 	  cur->sp += 1;
3456 	  if (cur->sp - RED_ZONE > cur->sp_safe)
3457 	    cur->sp_safe = cur->sp - RED_ZONE;
3458 	  break;
3459 	case 0xca: /* retf Iw */
3460 	  cur->pc += 2; /* XXXX process return */
3461 	  break;
3462 	case 0xcb: /* retf */
3463 	  break; /* XXXX process return */
3464 	case 0xcc: /* int 3 */
3465 	  break;
3466 	case 0xcd: /* int Ib */
3467 	  if (*cur->pc == 0x80)
3468 	    {
3469 	      if (cur->rax == __NR_exit)
3470 		{
3471 		  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xcd.\n", __LINE__);
3472 		  DELETE_CURCTX ();
3473 		  break;
3474 		}
3475 	      else if (cur->rax == __NR_rt_sigreturn)
3476 		{
3477 		  if (jmp_reg_switch_mode == 1)
3478 		    {
3479 		      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0xcd\n",
3480 				__LINE__);
3481 		      goto checkFP;
3482 		    }
3483 		  wctx->sp = (unsigned long) cur->sp;
3484 		  if (save_ctx)
3485 		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN);
3486 		  return RA_RT_SIGRETURN;
3487 		}
3488 #if WSIZE(32)
3489 	      else if (cur->rax == __NR_sigreturn)
3490 		{
3491 		  if (jmp_reg_switch_mode == 1)
3492 		    {
3493 		      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode = 0xc2\n",
3494 				__LINE__);
3495 		      goto checkFP;
3496 		    }
3497 		  wctx->sp = (unsigned long) cur->sp;
3498 		  if (save_ctx)
3499 		    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN);
3500 		  return RA_SIGRETURN;
3501 		}
3502 #endif
3503 	    }
3504 	  cur->pc += 1;
3505 	  break;
3506 	case 0xce: /* into */
3507 	case 0xcf: /* iret */
3508 	  break;
3509 	case 0xd0: /* shift group2 Eb,1 */
3510 	case 0xd1: /* shift group2 Ev,1 */
3511 	case 0xd2: /* shift group2 Eb,%cl */
3512 	case 0xd3: /* shift group2 Ev,%cl */
3513 	  cur->pc = check_modrm (cur->pc);
3514 	  break;
3515 	case 0xd4: /* aam Ib */
3516 	  cur->pc += 1;
3517 	  break;
3518 	case 0xd5: /* aad Ib */
3519 	  cur->pc += 1;
3520 	  break;
3521 	case 0xd6: /* falc? */
3522 	  break;
3523 	case 0xd7:
3524 	  cur->pc = check_modrm (cur->pc);
3525 	  cur->pc++;
3526 	  break;
3527 	case 0xd8: /* esc instructions */
3528 	case 0xd9:
3529 	case 0xda:
3530 	case 0xdb:
3531 	case 0xdc:
3532 	case 0xdd:
3533 	case 0xde:
3534 	case 0xdf:
3535 	  cur->pc = check_modrm (cur->pc);
3536 	  break;
3537 	case 0xe0: /* loopne Jb */
3538 	case 0xe1: /* loope Jb */
3539 	case 0xe2: /* loop Jb */
3540 	case 0xe3: /* jcxz Jb */
3541 	  imm8 = *(char*) cur->pc++;
3542 	  if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
3543 	    {
3544 	      int tidx = 0;
3545 	      unsigned char *npc = cur->pc + imm8;
3546 	      if (is_after_ret (npc))
3547 		break;
3548 	      while (npc > targets[tidx])
3549 		tidx += 1;
3550 	      if (npc != targets[tidx])
3551 		{
3552 		  if (ntrg < MAXTRGTS)
3553 		    {
3554 		      for (int i = 0; i < nctx; i++)
3555 			if (buf[i].tidx >= tidx)
3556 			  buf[i].tidx++;
3557 		      /* insert a new target */
3558 		      for (int i = ntrg; i > tidx; i--)
3559 			targets[i] = targets[i - 1];
3560 		      ntrg += 1;
3561 		      targets[tidx++] = npc;
3562 		    }
3563 		  else
3564 		    DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3565 		  struct AdvWalkContext *new = buf + nctx;
3566 		  nctx += 1;
3567 		  __collector_memcpy (new, cur, sizeof (*new));
3568 		  new->pc = npc;
3569 		  new->tidx = tidx;
3570 		  cur = new; /* advance the new context first */
3571 		  continue;
3572 		}
3573 	    }
3574 	  else
3575 	    DprintfT (SP_DUMP_UNWIND, "unwind.c: nctx = max\n");
3576 	  break;
3577 	case 0xe4: case 0xe5:
3578 	  cur->pc = check_modrm (cur->pc);
3579 	  cur->pc++;
3580 	  break;
3581 	case 0xe6: case 0xe7:
3582 	  cur->pc++;
3583 	  cur->pc = check_modrm (cur->pc);
3584 	  break;
3585 	case 0xec: case 0xed: case 0xee: case 0xef:
3586 	  cur->pc = check_modrm (cur->pc);
3587 	  break;
3588 	case 0xe8: /* call Jz (f64) */
3589 	  {
3590 	    if (jmp_reg_switch_mode == 1)
3591 	      {
3592 		struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3593 		__collector_memcpy (tmpctx, cur, sizeof (*cur));
3594 		int rc = process_return (wctx, tmpctx);
3595 		if (rc != RA_FAILURE)
3596 		  {
3597 		    if (save_ctx)
3598 		      omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3599 		    return rc;
3600 		  }
3601 	      }
3602 	    int immz = read_int (cur->pc, z);
3603 	    if (immz == 0)
3604 	      /* special case in PIC code */
3605 	      cur->sp -= 1;
3606 	    cur->pc += z;
3607 	  }
3608 	  break;
3609 	case 0xe9: /* jump Jz */
3610 	  {
3611 	    int immz = read_int (cur->pc, z);
3612 	    unsigned char *npc = cur->pc + z + immz;
3613 	    if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend)
3614 	      {
3615 		DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xe9.\n", __LINE__);
3616 		DELETE_CURCTX ();
3617 		break;
3618 	      }
3619 	    int tidx = 0;
3620 	    while (npc > targets[tidx])
3621 	      tidx += 1;
3622 	    if (npc != targets[tidx])
3623 	      {
3624 		if (ntrg < MAXTRGTS)
3625 		  {
3626 		    for (int i = 0; i < nctx; i++)
3627 		      if (buf[i].tidx >= tidx)
3628 			buf[i].tidx++;
3629 		    /* insert a new target */
3630 		    for (int i = ntrg; i > tidx; i--)
3631 		      targets[i] = targets[i - 1];
3632 		    ntrg += 1;
3633 		    targets[tidx++] = npc;
3634 		  }
3635 		else
3636 		  DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3637 		cur->pc = npc;
3638 		cur->tidx = tidx;
3639 		continue; /* advance this context first */
3640 	      }
3641 	    else
3642 	      {
3643 		/* Delete context */
3644 		DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xe9.\n", __LINE__);
3645 		DELETE_CURCTX ();
3646 	      }
3647 	  }
3648 	  break;
3649 	case 0xeb: /* jump imm8 */
3650 	  {
3651 	    imm8 = *(char*) cur->pc++;
3652 	    int tidx = 0;
3653 	    unsigned char *npc = cur->pc + imm8;
3654 	    while (npc > targets[tidx])
3655 	      tidx += 1;
3656 	    if (npc != targets[tidx])
3657 	      {
3658 		if (ntrg < MAXTRGTS)
3659 		  {
3660 		    for (int i = 0; i < nctx; i++)
3661 		      if (buf[i].tidx >= tidx)
3662 			buf[i].tidx++;
3663 		    /* insert a new target */
3664 		    for (int i = ntrg; i > tidx; i--)
3665 		      targets[i] = targets[i - 1];
3666 		    ntrg += 1;
3667 		    targets[tidx++] = npc;
3668 		  }
3669 		else
3670 		  DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3671 		cur->pc = npc;
3672 		cur->tidx = tidx;
3673 		continue; /* advance this context first */
3674 	      }
3675 	    else
3676 	      {
3677 		/* Delete context */
3678 		DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xeb.\n", __LINE__);
3679 		DELETE_CURCTX ();
3680 	      }
3681 	  }
3682 	  break;
3683 	case 0xf0: /* lock prefix */
3684 	case 0xf2: /* repne prefix */
3685 	case 0xf3: /* repz prefix */
3686 	  break;
3687 	case 0xf4: /* hlt */
3688 	  extop2 = *(cur->pc - 3);
3689 	  if (extop2 == 0x90)
3690 	    {
3691 	      // 17851712 occasional SEGV in find_i386_ret_addr in unwind.c during attach
3692 	      if (save_ctx)
3693 		omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
3694 	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK\n", __LINE__);
3695 	      return RA_END_OF_STACK;
3696 	    }
3697 	  /* We see 'hlt' in _start. Stop analysis, revert to FP */
3698 	  /* A workaround for the Linux main stack */
3699 	  if (nctx > 1)
3700 	    {
3701 	      DELETE_CURCTX ();
3702 	      break;
3703 	    }
3704 	  if (cur->fp == 0)
3705 	    {
3706 	      if (jmp_reg_switch_mode == 1)
3707 		{
3708 		  DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0xf4\n");
3709 		  goto checkFP;
3710 		}
3711 	      cache_put (wctx, RA_EOSTCK);
3712 	      wctx->pc = 0;
3713 	      wctx->sp = 0;
3714 	      wctx->fp = 0;
3715 	      if (save_ctx)
3716 		omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
3717 	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK\n", __LINE__);
3718 	      return RA_END_OF_STACK;
3719 	    }
3720 	  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode = 0xf4\n", __LINE__);
3721 	  goto checkFP;
3722 	case 0xf5: /* cmc */
3723 	  break;
3724 	case 0xf6: /* group3 Eb */
3725 	  modrm = *cur->pc;
3726 	  extop = MRM_EXT (modrm);
3727 	  cur->pc = check_modrm (cur->pc);
3728 	  if (extop == 0x0) /* test Ib */
3729 	    cur->pc += 1;
3730 	  break;
3731 	case 0xf7: /* group3 Ev */
3732 	  modrm = *cur->pc;
3733 	  extop = MRM_EXT (modrm);
3734 	  cur->pc = check_modrm (cur->pc);
3735 	  if (extop == 0x0)  /* test Iz */
3736 	    cur->pc += z;
3737 	  break;
3738 	case 0xf8: /* clc */
3739 	case 0xf9: /* stc */
3740 	case 0xfa: /* cli */
3741 	case 0xfb: /* sti */
3742 	case 0xfc: /* cld */
3743 	case 0xfd: /* std */
3744 	  break;
3745 	case 0xfe: /* group4 */
3746 	  modrm = *cur->pc;
3747 	  extop = MRM_EXT (modrm);
3748 	  switch (extop)
3749 	    {
3750 	    case 0x0: /* inc Eb */
3751 	    case 0x1: /* dec Eb */
3752 	      cur->pc = check_modrm (cur->pc);
3753 	      break;
3754 	    case 0x7:
3755 	      cur->pc = check_modrm (cur->pc);
3756 	      break;
3757 	    default:
3758 	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0xfe %x\n",
3759 			__LINE__, extop);
3760 	      DELETE_CURCTX ();
3761 	      break;
3762 	    }
3763 	  break;
3764 	case 0xff: /* group5 */
3765 	  modrm = *cur->pc;
3766 	  extop = MRM_EXT (modrm);
3767 	  switch (extop)
3768 	    {
3769 	    case 0x0: /* inc Ev */
3770 	    case 0x1: /* dec Ev */
3771 	      cur->pc = check_modrm (cur->pc);
3772 	      break;
3773 	    case 0x2: /* calln Ev */
3774 	      if (jmp_reg_switch_mode == 1)
3775 		{
3776 		  struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3777 		  __collector_memcpy (tmpctx, cur, sizeof (*cur));
3778 		  int rc = process_return (wctx, tmpctx);
3779 		  if (rc != RA_FAILURE)
3780 		    {
3781 		      if (save_ctx)
3782 			omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3783 		      return rc;
3784 		    }
3785 		}
3786 	      cur->pc = check_modrm (cur->pc);
3787 	      break;
3788 	    case 0x3: /* callf Ep */
3789 	      if (jmp_reg_switch_mode == 1)
3790 		{
3791 		  struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3792 		  __collector_memcpy (tmpctx, cur, sizeof (*cur));
3793 		  int rc = process_return (wctx, tmpctx);
3794 		  if (rc != RA_FAILURE)
3795 		    {
3796 		      if (save_ctx)
3797 			omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3798 		      return rc;
3799 		    }
3800 		}
3801 	      cur->pc = check_modrm (cur->pc); /* XXXX */
3802 	      break;
3803 	    case 0x4: /* jumpn Ev */
3804 	      /* This instruction appears in PLT or
3805 	       * in tail call optimization.
3806 	       * In both cases treat it as return.
3807 	       * Save jump *(reg) - switch, etc, for later use when no ctx left
3808 	       */
3809 	      if (modrm == 0x25 || /* jumpn *disp32 */
3810 		  MRM_MOD (modrm) == 0x40 || /* jumpn byte(reg) */
3811 		  MRM_MOD (modrm) == 0x80) /* jumpn word(reg) */
3812 		{
3813 		  DprintfT (SP_DUMP_UNWIND, "unwind.c: PLT or tail call: %p\n", cur->pc - 1);
3814 		  int rc = process_return (wctx, cur);
3815 		  if (rc != RA_FAILURE)
3816 		    {
3817 		      if (jmp_reg_switch_mode == 1 && total_num_jmp_reg < max_num_jmp_reg_seen)
3818 			{
3819 			  DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0xff\n");
3820 			  goto checkFP;
3821 			}
3822 		      if (save_ctx)
3823 			omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3824 		      return rc;
3825 		    }
3826 		}
3827 	      else if (modrm != 0x24 /*ignore SIB*/) /* jumpn *(reg) or jumpn reg */
3828 		{
3829 		  // 22846120 stack unwind does not find caller of __memcpy_ssse3_back with B64 intel-Linux
3830 		  /*
3831 		   * For now, let's deal rather narrowly with this scenario.  If:
3832 		   * - we are in the middle of an "ff e2" instruction, and
3833 		   * - the next instruction is undefined ( 0f 0b == ud2 )
3834 		   * then test return.  (Might eventually have to broaden the scope
3835 		   * of this fix to other registers/etc.)
3836 		   */
3837 		  if (cur->pc[0] == 0xe2 && cur->pc[1] == 0x0f && cur->pc[2] == 0x0b)
3838 		    {
3839 		      int rc = process_return_real (wctx, cur, 0);
3840 		      if (rc == RA_SUCCESS)
3841 			{
3842 			  if (save_ctx)
3843 			    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3844 			  return rc;
3845 			}
3846 		    }
3847 
3848 		  // 22691241 shjsynprog, jsynprog core dump from find_i386_ret_addr
3849 		  /*
3850 		   * Here is another oddity.  Java 9 seems to emit dynamically generated
3851 		   * code where a code block ends with a "jmp *reg" and then padding to a
3852 		   * multiple-of-16 boundary and then a bunch of 0s.  In this case, let's
3853 		   * not continue to walk bytes since we would be walking off the end of
3854 		   * the instructions into ... something.  Treating them as instructions
3855 		   * can lead to unexpected results, including SEGV.
3856 		   */
3857 		  /*
3858 		   * While the general problem deserves a better solution, let's look
3859 		   * here only for one particular case:
3860 		   *    0xff 0xe7               jmp *reg
3861 		   *                            nop to bring us to a multiple-of-16 boundary
3862 		   *    0x0000000000000a00      something that does not look like an instruction
3863 		   *
3864 		   * A different nop might be used depending on how much padding is needed
3865 		   * to reach that multiple-of-16 boundary.  We've seen two:
3866 		   *    0x90                    one byte
3867 		   *    0x0f 0x1f 0x40 0x00     four bytes
3868 		   */
3869 		  // confirm the instruction is 0xff 0xe7
3870 		  if (cur->pc[0] == 0xe7)
3871 		    {
3872 		      // check for correct-length nop and find next 16-byte boundary
3873 		      int found_nop = 0;
3874 		      unsigned long long *boundary = 0;
3875 		      switch ((((unsigned long) (cur->pc)) & 0xf))
3876 			{
3877 			case 0xb: // look for 4-byte nop
3878 			  if (*((unsigned *) (cur->pc + 1)) == 0x00401f0f)
3879 			    found_nop = 1;
3880 			  boundary = (unsigned long long *) (cur->pc + 5);
3881 			  break;
3882 			case 0xe: // look for 1-byte nop
3883 			  if (cur->pc[1] == 0x90)
3884 			    found_nop = 1;
3885 			  boundary = (unsigned long long *) (cur->pc + 2);
3886 			  break;
3887 			default:
3888 			  break;
3889 			}
3890 
3891 		      // if nop is found, check what's at the boundary
3892 		      if (found_nop && *boundary == 0x000000000a00)
3893 			{
3894 			  DELETE_CURCTX ();
3895 			  break;
3896 			}
3897 		    }
3898 
3899 		  DprintfT (SP_DUMP_UNWIND, "unwind.c: probably PLT or tail call or switch table: %p\n",
3900 			    cur->pc - 1);
3901 		  if (num_jmp_reg < expected_num_jmp_reg)
3902 		    {
3903 		      if (jmp_reg_ctx[num_jmp_reg] == NULL)
3904 			jmp_reg_ctx[num_jmp_reg] = (struct AdvWalkContext *) alloca (sizeof (*cur));
3905 		      if (jmp_reg_ctx[num_jmp_reg] != NULL)
3906 			__collector_memcpy (jmp_reg_ctx[num_jmp_reg], cur, sizeof (*cur));
3907 		    }
3908 		  if (num_jmp_reg < expected_num_jmp_reg ||
3909 		      (num_jmp_reg >= expected_num_jmp_reg &&
3910 		       jmp_reg_ctx[expected_num_jmp_reg - 1] != NULL &&
3911 		       cur->pc != jmp_reg_ctx[expected_num_jmp_reg - 1]->pc))
3912 		    {
3913 		      num_jmp_reg++;
3914 		      total_num_jmp_reg++;
3915 		    }
3916 		  if (jmp_reg_switch_mode == 1 && total_num_jmp_reg >= max_num_jmp_reg_seen)
3917 		    {
3918 		      int rc = process_return_real (wctx, cur, 0);
3919 		      if (rc == RA_SUCCESS)
3920 			{
3921 			  if (save_ctx)
3922 			    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3923 			  return rc;
3924 			}
3925 		    }
3926 		}
3927 	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xff.\n", __LINE__);
3928 	      DELETE_CURCTX ();
3929 	      break;
3930 	    case 0x5: /* jmpf Ep */
3931 	      cur->pc = check_modrm (cur->pc); /* XXXX */
3932 	      break;
3933 	    case 0x6: /* push Ev */
3934 	      cur->pc = check_modrm (cur->pc);
3935 	      cur->sp -= 1;
3936 	      break;
3937 	    case 0x7:
3938 	      cur->pc = check_modrm (cur->pc); /* XXXX */
3939 	      if (jmp_reg_switch_mode == 1)
3940 		{
3941 		  int rc = process_return_real (wctx, cur, 0);
3942 		  if (rc == RA_SUCCESS)
3943 		    {
3944 		      if (save_ctx)
3945 			omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3946 		      return rc;
3947 		    }
3948 		}
3949 	      break;
3950 	    default:
3951 	      DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0xff %x\n",
3952 			__LINE__, (int) extop);
3953 	      DELETE_CURCTX ();
3954 	      break;
3955 	    }
3956 	  break;
3957 	default:
3958 	  DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x%x\n",
3959 		    __LINE__, (int) opcode);
3960 	  DELETE_CURCTX ();
3961 	  break;
3962 	}
3963 
3964       /* switch to next context */
3965       if (++cur >= buf + nctx)
3966 	cur = buf;
3967       DprintfT (SP_DUMP_UNWIND, "unwind.c:%d switch context: cur=0x%lx(%ld)  nctx=%d  cnt=%d\n",
3968 	       __LINE__, (unsigned long) cur, (long) (cur - buf), (int) nctx, (int) cnt);
3969     }
3970 
3971 checkFP:
3972   Tprintf (DBG_LT3, "find_i386_ret_addr:%d checkFP: wctx=0x%lx fp=0x%lx ln=0x%lx pc=0x%lx sbase=0x%lx sp=0x%lx tbgn=0x%lx tend=0x%lx\n",
3973 	   __LINE__, (unsigned long) wctx, (unsigned long) wctx->fp,
3974 	   (unsigned long) wctx->ln, (unsigned long) wctx->pc, (unsigned long) wctx->sbase,
3975 	   (unsigned long) wctx->sp, (unsigned long) wctx->tbgn, (unsigned long) wctx->tend);
3976 
3977   if (jmp_reg_switch_mode == 1)
3978     { // not deal with switch cases not ending with ret
3979       if (jmp_reg_switch_backup_ctx != NULL)
3980 	__collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
3981       DprintfT (SP_DUMP_UNWIND, "stack_unwind jmp reg mode on: pc = 0x%lx cnt = %d, nctx = %d\n", wctx->pc, cnt, nctx);
3982     }
3983 
3984   unsigned long *cur_fp = cur->fp;
3985   unsigned long *cur_sp = cur->sp;
3986   if (do_walk == 0)
3987     __collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext));
3988 
3989   /* Resort to the frame pointer */
3990   if (cur->fp_loc)
3991     cur->fp = cur->fp_sav;
3992   cur->sp = cur->fp;
3993   if ((unsigned long) cur->sp >= wctx->sbase ||
3994       (unsigned long) cur->sp < wctx->sp)
3995     {
3996       DprintfT (SP_DUMP_UNWIND, "unwind.c:%d do_walk=%d cur->sp=0x%p out of range. wctx->sbase=0x%lx wctx->sp=0x%lx wctx->pc=0x%lx\n",
3997 		__LINE__, (int) do_walk, cur->sp, (unsigned long) wctx->sbase,
3998 		(unsigned long) wctx->sp, (unsigned long) wctx->pc);
3999       if (do_walk == 0)
4000 	{
4001 	  cur->sp = cur_sp;
4002 	  cur->fp = cur_fp;
4003 	  do_walk = 1;
4004 	  save_ctx = 1;
4005 	  goto startWalk;
4006 	}
4007       if (save_ctx)
4008 	omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4009       return RA_FAILURE;
4010     }
4011 
4012   unsigned long fp = *cur->sp++;
4013   if (fp <= (unsigned long) cur->sp || fp >= wctx->sbase)
4014     {
4015       DprintfT (SP_DUMP_UNWIND, "unwind.c:%d fp=0x%016llx out of range. cur->sp=%p wctx->sbase=0x%lx wctx->pc=0x%lx\n",
4016 	       __LINE__, (unsigned long long) fp, cur->sp,
4017 	       (unsigned long) wctx->sbase, (unsigned long) wctx->pc);
4018       if (do_walk == 0)
4019 	{
4020 	  cur->sp = cur_sp;
4021 	  cur->fp = cur_fp;
4022 	  do_walk = 1;
4023 	  save_ctx = 1;
4024 	  goto startWalk;
4025 	}
4026       if (save_ctx)
4027 	omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4028       return RA_FAILURE;
4029     }
4030 
4031   unsigned long ra = *cur->sp++;
4032   if (ra == 0)
4033     {
4034       cache_put (wctx, RA_EOSTCK);
4035       DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK wctx->pc = 0x%lx\n", __LINE__, wctx->pc);
4036       if (save_ctx)
4037 	omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
4038       return RA_END_OF_STACK;
4039     }
4040 
4041   unsigned long tbgn = wctx->tbgn;
4042   unsigned long tend = wctx->tend;
4043   if (ra < tbgn || ra >= tend)
4044     {
4045       // We do not know yet if update_map_segments is really needed
4046       if (!__collector_check_segment (ra, &tbgn, &tend, 0))
4047 	{
4048 	  DprintfT (SP_DUMP_UNWIND, "unwind.c: __collector_check_segment fail. wctx->pc = 0x%lx\n", wctx->pc);
4049 	  if (do_walk == 0)
4050 	    {
4051 	      cur->sp = cur_sp;
4052 	      cur->fp = cur_fp;
4053 	      do_walk = 1;
4054 	      save_ctx = 1;
4055 	      goto startWalk;
4056 	    }
4057 	  if (save_ctx)
4058 	    omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4059 	  return RA_FAILURE;
4060 	}
4061     }
4062 
4063   unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
4064   if (npc == 0)
4065     {
4066       DprintfT (SP_DUMP_UNWIND, "unwind.c: adjust_ret_addr fail. wctx->pc = 0x%lx\n", wctx->pc);
4067       if (do_walk == 0)
4068 	{
4069 	  cur->sp = cur_sp;
4070 	  cur->fp = cur_fp;
4071 	  do_walk = 1;
4072 	  save_ctx = 1;
4073 	  goto startWalk;
4074 	}
4075       if (save_ctx)
4076 	omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4077       return RA_FAILURE;
4078     }
4079   wctx->pc = npc;
4080   wctx->sp = (unsigned long) cur->sp;
4081   wctx->fp = fp;
4082   wctx->tbgn = tbgn;
4083   wctx->tend = tend;
4084 
4085   if (save_ctx)
4086     {
4087       omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SUCCESS);
4088       DprintfT (SP_DUMP_UNWIND, "unwind.c: cache walk context. wctx_pc_save->pc = 0x%lx\n", wctx_pc_save.pc);
4089     }
4090   return RA_SUCCESS;
4091 }
4092 
4093 /*
4094  * We have the return address, but we would like to report to the user
4095  * the calling PC, which is the instruction immediately preceding the
4096  * return address.  Unfortunately, x86 instructions can have variable
4097  * length.  So we back up 8 bytes and try to figure out where the
4098  * calling PC starts.  (FWIW, call instructions are often 5-bytes long.)
4099  */
4100 unsigned long
adjust_ret_addr(unsigned long ra,unsigned long segoff,unsigned long tend)4101 adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend)
4102 {
4103   unsigned long npc = 0;
4104   int i = segoff < 8 ? segoff : 8;
4105   for (; i > 1; i--)
4106     {
4107       unsigned char *ptr = (unsigned char*) ra - i;
4108       int z = 4;
4109       int a = 4;
4110       int done = 0;
4111       int bVal;
4112       while (!done)
4113 	{
4114 	  bVal = getByteInstruction (ptr);
4115 	  if (bVal < 0)
4116 	    return 0;
4117 	  switch (bVal)
4118 	    {
4119 	    case 0x26:
4120 	    case 0x36:
4121 #if WSIZE(64)
4122 	      ptr += 1;
4123 	      break;
4124 #endif
4125 	    case 0x64:
4126 	    case 0x65:
4127 	      bVal = getByteInstruction (ptr + 1);
4128 	      if (bVal < 0)
4129 		return 0;
4130 	      if (bVal == 0xe8)
4131 		// a workaround for bug 16193041, assuming "call Jz" has no segment override prefix
4132 	       done = 1;
4133 	      else
4134 		ptr += 1;
4135 	      break;
4136 	    case 0x66:
4137 	      z = 2;
4138 	      ptr += 1;
4139 	      break;
4140 	    case 0x67:
4141 	      a = 2;
4142 	      ptr += 1;
4143 	      break;
4144 	    default:
4145 	      done = 1;
4146 	      break;
4147 	    }
4148 	}
4149 #if WSIZE(64)
4150       bVal = getByteInstruction (ptr);
4151       if (bVal < 0)
4152 	return 0;
4153       if (bVal >= 0x40 && bVal <= 0x4f)
4154 	{ /* XXXX not all REX codes applicable */
4155 	  if (bVal & 0x8)
4156 	    z = 4;
4157 	  ptr += 1;
4158 	}
4159 #endif
4160       int opcode = getByteInstruction (ptr);
4161       if (opcode < 0)
4162 	return 0;
4163       ptr++;
4164       switch (opcode)
4165 	{
4166 	case 0xe8: /* call Jz (f64) */
4167 	  ptr += z;
4168 	  break;
4169 	case 0x9a: /* callf Ap */
4170 	  ptr += 2 + a;
4171 	  break;
4172 	case 0xff: /* calln Ev , callf Ep */
4173 	  {
4174 	    int extop = MRM_EXT (*ptr);
4175 	    if (extop == 2 || extop == 3)
4176 	      ptr = check_modrm (ptr);
4177 	  }
4178 	  break;
4179 	default:
4180 	  continue;
4181 	}
4182       if ((unsigned long) ptr == ra)
4183 	{
4184 	  npc = ra - i;
4185 	  break;
4186 	}
4187     }
4188   if (npc == 0)
4189     {
4190       unsigned char * ptr = (unsigned char *) ra;
4191 #if WSIZE(32)
4192       // test __kernel_sigreturn or __kernel_rt_sigreturn
4193       if ((ra + 7 < tend && getByteInstruction (ptr) == 0x58
4194 	   && getByteInstruction (ptr + 1) == 0xb8
4195 	   && getByteInstruction (ptr + 6) == 0xcd
4196 	   && getByteInstruction (ptr + 7) == 0x80) /* pop %eax; mov $NNNN, %eax; int */
4197 	  || (ra + 7 < tend && getByteInstruction (ptr) == 0x58
4198 	      && getByteInstruction (ptr + 1) == 0xb8
4199 	      && getByteInstruction (ptr + 6) == 0x0f
4200 	      && getByteInstruction (ptr + 7) == 0x05) /* pop %eax; mov $NNNN, %eax; syscall */
4201 	  || (ra + 6 < tend && getByteInstruction (ptr) == 0xb8
4202 	      && getByteInstruction (ptr + 5) == 0xcd
4203 	      && getByteInstruction (ptr + 6) == 0x80) /* mov $NNNN, %eax; int */
4204 	  || (ra + 6 < tend && getByteInstruction (ptr) == 0xb8
4205 	      && getByteInstruction (ptr + 5) == 0x0f
4206 	      && getByteInstruction (ptr + 6) == 0x05)) /* mov $NNNN, %eax; syscall */
4207 #else //WSIZE(64)
4208       // test __restore_rt
4209       if (ra + 8 < tend && getByteInstruction (ptr) == 0x48
4210 	  && getByteInstruction (ptr + 7) == 0x0f
4211 	  && getByteInstruction (ptr + 8) == 0x05) /* mov $NNNNNNNN, %rax; syscall */
4212 #endif
4213 	{
4214 	  npc = ra;
4215 	}
4216     }
4217   if (npc == 0 && __collector_java_mode
4218       && __collector_java_asyncgetcalltrace_loaded)
4219     { // detect jvm interpreter code for java user threads
4220       unsigned char * ptr = (unsigned char *) ra;
4221 #if WSIZE(32)
4222       // up to J170
4223       /*
4224        * ff 24 9d e0 64 02 f5    jmp     *-0xafd9b20(,%ebx,4)
4225        * 8b 4e 01                movl    1(%esi),%ecx
4226        * f7 d1                   notl    %ecx
4227        * 8b 5d ec                movl    -0x14(%ebp),%ebx
4228        * c1 e1 02                shll    $2,%ecx
4229        * eb d8                   jmp     .-0x26 [ 0x92a ]
4230        * 83 ec 08                subl    $8,%esp || 8b 65 f8                movl    -8(%ebp),%esp
4231        * */
4232       if (ra - 20 >= (ra - segoff) && ((*ptr == 0x83 && *(ptr + 1) == 0xec) || (*ptr == 0x8b && *(ptr + 1) == 0x65))
4233 	  && *(ptr - 2) == 0xeb
4234 	  && *(ptr - 5) == 0xc1 && *(ptr - 4) == 0xe1
4235 	  && *(ptr - 8) == 0x8b && *(ptr - 7) == 0x5d
4236 	  && *(ptr - 10) == 0xf7 && *(ptr - 9) == 0xd1
4237 	  && *(ptr - 13) == 0x8b && *(ptr - 12) == 0x4e
4238 	  && *(ptr - 20) == 0xff && *(ptr - 19) == 0x24 && *(ptr - 18) == 0x9d)
4239 	{
4240 	  npc = ra - 20;
4241 	}
4242       // J180 J190
4243       // ff 24 9d ** ** ** **    jmp     *-0x*******(,%ebx,4)
4244       if (npc == 0
4245 	  && ra - 7 >= (ra - segoff)
4246 	  && *(ptr - 7) == 0xff
4247 	  && *(ptr - 6) == 0x24
4248 	  && *(ptr - 5) == 0x9d)
4249 	{
4250 	  npc = ra - 7;
4251 	}
4252 #else //WSIZE(64)
4253       // up to J170
4254       /*
4255        * 41 ff 24 da             jmp     *(%r10,%rbx,8)
4256        * 41 8b 4d 01             movl    1(%r13),%ecx
4257        * f7 d1                   notl    %ecx
4258        * 48 8b 5d d8             movq    -0x28(%rbp),%rbx
4259        * c1 e1 02                shll    $2,%ecx
4260        * eb cc                   jmp     .-0x32 [ 0xd23 ]
4261        * 48 8b 65 f0             movq    -0x10(%rbp),%rsp
4262        */
4263       if (ra - 19 >= (ra - segoff) && *ptr == 0x48 && ((*(ptr + 1) == 0x8b && *(ptr + 2) == 0x65) || (*(ptr + 1) == 0x83 && *(ptr + 2) == 0xec))
4264 	  && *(ptr - 2) == 0xeb
4265 	  && *(ptr - 5) == 0xc1 && *(ptr - 4) == 0xe1
4266 	  && *(ptr - 9) == 0x48 && *(ptr - 8) == 0x8b && *(ptr - 7) == 0x5d
4267 	  && *(ptr - 11) == 0xf7 && *(ptr - 10) == 0xd1
4268 	  && *(ptr - 15) == 0x41 && *(ptr - 14) == 0x8b && *(ptr - 13) == 0x4d
4269 	  && *(ptr - 19) == 0x41 && *(ptr - 18) == 0xff)
4270 	npc = ra - 19;
4271       // J180 J190
4272       // 41 ff 24 da             jmp     *(%r10,%rbx,8)
4273       if (npc == 0
4274 	  && ra - 4 >= (ra - segoff)
4275 	  && *(ptr - 4) == 0x41
4276 	  && *(ptr - 3) == 0xff
4277 	  && *(ptr - 2) == 0x24
4278 	  && *(ptr - 1) == 0xda)
4279 	npc = ra - 4;
4280 #endif
4281     }
4282 
4283   return npc;
4284 }
4285 
4286 /*
4287  * Parses AVX instruction and returns its length.
4288  * Returns 0 if parsing failed.
4289  * https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
4290  */
4291 static int
parse_x86_AVX_instruction(unsigned char * pc)4292 parse_x86_AVX_instruction (unsigned char *pc)
4293 {
4294   /*
4295    * VEX prefix has a two-byte form (0xc5) and a three byte form (0xc4).
4296    * If an instruction syntax can be encoded using the two-byte form,
4297    * it can also be encoded using the three byte form of VEX.
4298    * The latter increases the length of the instruction by one byte.
4299    * This may be helpful in some situations for code alignment.
4300    *
4301 		     Byte 0           Byte 1              Byte 2         Byte 3
4302      (Bit Position) 7      0     7 6 5   4    0     7   6  3   2   10
4303      3-byte VEX   [ 11000100 ] [ R X B | m-mmmm ] [ W | vvvv | L | pp ]
4304 		    7      0     7   6  3   2   10
4305      2-byte VEX   [ 11000101 ] [ R | vvvv | L | pp ]
4306 		    7      0     7 6 5  4 3 2 1 0     7 6 5 4 3 2 1 0     7  6 5  4  3 2 1 0
4307      4-byte EVEX  [ 01100010 ] [ R X B R1 0 0 m m ] [ W v v v v 1 p p ] [ z L1 L B1 V1 a a a ]
4308 
4309      R: REX.R in 1's complement (inverted) form
4310 	  0: Same as REX.R=1 (64-bit mode only)
4311 	  1: Same as REX.R=0 (must be 1 in 32-bit mode)
4312 
4313      X: REX.X in 1's complement (inverted) form
4314 	  0: Same as REX.X=1 (64-bit mode only)
4315 	  1: Same as REX.X=0 (must be 1 in 32-bit mode)
4316 
4317      B: REX.B in 1's complement (inverted) form
4318 	  0: Same as REX.B=1 (64-bit mode only)
4319 	  1: Same as REX.B=0 (Ignored in 32-bit mode).
4320 
4321      W: opcode specific (use like REX.W, or used for opcode
4322 	  extension, or ignored, depending on the opcode byte)
4323 
4324      m-mmmm:
4325 	  00000: Reserved for future use (will #UD)
4326 	  00001: implied 0F leading opcode byte
4327 	  00010: implied 0F 38 leading opcode bytes
4328 	  00011: implied 0F 3A leading opcode bytes
4329 	  00100-11111: Reserved for future use (will #UD)
4330 
4331      vvvv: a register specifier (in 1's complement form) or 1111 if unused.
4332 
4333      L: Vector Length
4334 	  0: scalar or 128-bit vector
4335 	  1: 256-bit vector
4336 
4337      pp: opcode extension providing equivalent functionality of a SIMD prefix
4338 	  00: None
4339 	  01: 66
4340 	  10: F3
4341 	  11: F2
4342    *
4343    * Example: 0xc5f877L vzeroupper
4344    * VEX prefix: 0xc5 0x77
4345    * Opcode: 0xf8
4346    *
4347    */
4348   int len = 0;
4349   disassemble_info dis_info;
4350   dis_info.arch = bfd_arch_i386;
4351   dis_info.mach = bfd_mach_x86_64;
4352   dis_info.flavour = bfd_target_unknown_flavour;
4353   dis_info.endian = BFD_ENDIAN_UNKNOWN;
4354   dis_info.endian_code = dis_info.endian;
4355   dis_info.octets_per_byte = 1;
4356   dis_info.disassembler_needs_relocs = FALSE;
4357   dis_info.fprintf_func = fprintf_func;
4358   dis_info.fprintf_styled_func = fprintf_styled_func;
4359   dis_info.stream = NULL;
4360   dis_info.disassembler_options = NULL;
4361   dis_info.read_memory_func = read_memory_func;
4362   dis_info.memory_error_func = memory_error_func;
4363   dis_info.print_address_func = print_address_func;
4364   dis_info.symbol_at_address_func = symbol_at_address_func;
4365   dis_info.symbol_is_valid = symbol_is_valid;
4366   dis_info.display_endian = BFD_ENDIAN_UNKNOWN;
4367   dis_info.symtab = NULL;
4368   dis_info.symtab_size = 0;
4369   dis_info.buffer_vma = 0;
4370   dis_info.buffer = pc;
4371   dis_info.buffer_length = 8;
4372 
4373   disassembler_ftype disassemble = print_insn_i386;
4374   if (disassemble == NULL)
4375     {
4376       DprintfT (SP_DUMP_UNWIND, "parse_x86_AVX_instruction ERROR: unsupported disassemble\n");
4377       return 0;
4378     }
4379   len = disassemble (0, &dis_info);
4380   DprintfT (SP_DUMP_UNWIND, "parse_x86_AVX_instruction: returned %d  pc: %p\n", len, pc);
4381   return len;
4382 }
4383 
4384 /*
4385  * In the Intel world, a stack frame looks like this:
4386  *
4387  * %fp0->|                               |
4388  *       |-------------------------------|
4389  *       |  Args to next subroutine      |
4390  *       |-------------------------------|-\
4391  * %sp0->|  One word struct-ret address  | |
4392  *       |-------------------------------|  > minimum stack frame (8 bytes)
4393  *       |  Previous frame pointer (%fp0)| |
4394  * %fp1->|-------------------------------|-/
4395  *       |  Local variables              |
4396  * %sp1->|-------------------------------|
4397  *
4398  */
4399 
4400 int
stack_unwind(char * buf,int size,void * bptr,void * eptr,ucontext_t * context,int mode)4401 stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
4402 {
4403   long *lbuf = (long*) buf;
4404   int lsize = size / sizeof (long);
4405   int ind = 0;
4406   int do_walk = 1;
4407   int extra_frame = 0;
4408   if (mode & FRINFO_NO_WALK)
4409     do_walk = 0;
4410   if ((mode & 0xffff) == FRINFO_FROM_STACK)
4411     extra_frame = 1;
4412 
4413   /*
4414    * trace the stack frames from user stack.
4415    * We are assuming that the frame pointer and return address
4416    * are null when we are at the top level.
4417    */
4418   struct WalkContext wctx;
4419   wctx.pc = GET_PC (context);
4420   wctx.sp = GET_SP (context);
4421   wctx.fp = GET_FP (context);
4422   wctx.ln = (unsigned long) context->uc_link;
4423   unsigned long *sbase = (unsigned long*) __collector_tsd_get_by_key (unwind_key);
4424   if (sbase && *sbase > wctx.sp)
4425     wctx.sbase = *sbase;
4426   else
4427     {
4428       wctx.sbase = wctx.sp + 0x100000;
4429       if (wctx.sbase < wctx.sp)  /* overflow */
4430 	wctx.sbase = (unsigned long) - 1;
4431     }
4432   // We do not know yet if update_map_segments is really needed
4433   __collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0);
4434 
4435   for (;;)
4436     {
4437       if (ind >= lsize || wctx.pc == 0)
4438 	break;
4439       if (bptr != NULL && extra_frame && wctx.sp <= (unsigned long) bptr && ind < 2)
4440 	{
4441 	  lbuf[0] = wctx.pc;
4442 	  if (ind == 0)
4443 	    {
4444 	      ind++;
4445 	      if (ind >= lsize)
4446 		break;
4447 	    }
4448 	}
4449       if (bptr == NULL || wctx.sp > (unsigned long) bptr)
4450 	{
4451 	  lbuf[ind++] = wctx.pc;
4452 	  if (ind >= lsize)
4453 	    break;
4454 	}
4455 
4456       for (;;)
4457 	{
4458 	  if (eptr != NULL && wctx.sp >= (unsigned long) eptr)
4459 	    {
4460 	      ind = ind >= 2 ? ind - 2 : 0;
4461 	      goto exit;
4462 	    }
4463 	  int ret = find_i386_ret_addr (&wctx, do_walk);
4464 	  DprintfT (SP_DUMP_UNWIND, "stack_unwind (x86 walk):%d find_i386_ret_addr returns %d\n", __LINE__, ret);
4465 	  if (ret == RA_FAILURE)
4466 	    {
4467 	      /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4468 	      goto exit;
4469 	    }
4470 
4471 	  if (ret == RA_END_OF_STACK)
4472 	    goto exit;
4473 #if WSIZE(32)
4474 	  if (ret == RA_RT_SIGRETURN)
4475 	    {
4476 	      struct SigFrame
4477 	      {
4478 		unsigned long arg0;
4479 		unsigned long arg1;
4480 		unsigned long arg2;
4481 	      } *sframe = (struct SigFrame*) wctx.sp;
4482 	      ucontext_t *ncontext = (ucontext_t*) sframe->arg2;
4483 	      wctx.pc = GET_PC (ncontext);
4484 	      if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4485 		{
4486 		  /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4487 		  goto exit;
4488 		}
4489 	      unsigned long nsp = GET_SP (ncontext);
4490 	      /* Check the new stack pointer */
4491 	      if (nsp <= sframe->arg2 || nsp > sframe->arg2 + sizeof (ucontext_t) + 1024)
4492 		{
4493 		  /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4494 		  goto exit;
4495 		}
4496 	      wctx.sp = nsp;
4497 	      wctx.fp = GET_FP (ncontext);
4498 	      break;
4499 	    }
4500 	  else if (ret == RA_SIGRETURN)
4501 	    {
4502 	      struct sigcontext *sctx = (struct sigcontext*) wctx.sp;
4503 	      wctx.pc = sctx->eip;
4504 	      if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4505 		{
4506 		  /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4507 		  goto exit;
4508 		}
4509 	      wctx.sp = sctx->esp;
4510 	      wctx.fp = sctx->ebp;
4511 	      break;
4512 	    }
4513 #elif WSIZE(64)
4514 	  if (ret == RA_RT_SIGRETURN)
4515 	    {
4516 	      ucontext_t *ncontext = (ucontext_t*) wctx.sp;
4517 	      wctx.pc = GET_PC (ncontext);
4518 	      if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4519 		{
4520 		  /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4521 		  goto exit;
4522 		}
4523 	      unsigned long nsp = GET_SP (ncontext);
4524 	      /* Check the new stack pointer */
4525 	      if (nsp <= wctx.sp || nsp > wctx.sp + sizeof (ucontext_t) + 1024)
4526 		{
4527 		  /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4528 		  goto exit;
4529 		}
4530 	      wctx.sp = nsp;
4531 	      wctx.fp = GET_FP (ncontext);
4532 	      break;
4533 	    }
4534 #endif /* WSIZE() */
4535 	  if (bptr != NULL && extra_frame && wctx.sp <= (unsigned long) bptr && ind < 2)
4536 	    {
4537 	      lbuf[0] = wctx.pc;
4538 	      if (ind == 0)
4539 		{
4540 		  ind++;
4541 		  if (ind >= lsize)
4542 		    break;
4543 		}
4544 	    }
4545 	  if (bptr == NULL || wctx.sp > (unsigned long) bptr)
4546 	    {
4547 	      lbuf[ind++] = wctx.pc;
4548 	      if (ind >= lsize)
4549 		goto exit;
4550 	    }
4551 	}
4552     }
4553 
4554 exit:
4555 #if defined(DEBUG)
4556   if ((SP_DUMP_UNWIND & __collector_tracelevel) != 0)
4557     {
4558       DprintfT (SP_DUMP_UNWIND, "stack_unwind (x86 walk):%d found %d frames\n\n", __LINE__, ind);
4559       for (int i = 0; i < ind; i++)
4560 	DprintfT (SP_DUMP_UNWIND, "  %3d:  0x%lx\n", i, (unsigned long) lbuf[i]);
4561     }
4562 #endif
4563   dump_stack (__LINE__);
4564   if (ind >= lsize)
4565     {
4566       ind = lsize - 1;
4567       lbuf[ind++] = (unsigned long) SP_TRUNC_STACK_MARKER;
4568     }
4569   return ind * sizeof (long);
4570 }
4571 
4572 #elif ARCH(Aarch64)
4573 
4574 static int
stack_unwind(char * buf,int size,void * bptr,void * eptr,ucontext_t * context,int mode)4575 stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
4576 {
4577   if (buf && bptr && eptr && context && size + mode > 0)
4578     getByteInstruction ((unsigned char *) eptr);
4579   int ind = 0;
4580   __u64 *lbuf = (void *) buf;
4581   int lsize = size / sizeof (__u64);
4582   __u64 pc = context->uc_mcontext.pc;
4583   __u64 sp = context->uc_mcontext.sp;
4584   __u64 stack_base;
4585   unsigned long tbgn = 0;
4586   unsigned long tend = 0;
4587 
4588   unsigned long *sbase = (unsigned long*) __collector_tsd_get_by_key (unwind_key);
4589   if (sbase && *sbase > sp)
4590     stack_base = *sbase;
4591   else
4592     {
4593       stack_base = sp + 0x100000;
4594       if (stack_base < sp)  // overflow
4595 	stack_base = (__u64) -1;
4596     }
4597   DprintfT (SP_DUMP_UNWIND,
4598     "unwind.c:%d stack_unwind %2d pc=0x%llx  sp=0x%llx  stack_base=0x%llx\n",
4599     __LINE__, ind, (unsigned long long) pc, (unsigned long long) sp,
4600     (unsigned long long) stack_base);
4601 
4602   while (sp && pc)
4603   {
4604     DprintfT (SP_DUMP_UNWIND,
4605 	"unwind.c:%d stack_unwind %2d pc=0x%llx  sp=0x%llx\n",
4606 	__LINE__, ind, (unsigned long long) pc, (unsigned long long) sp);
4607 //      Dl_info dlinfo;
4608 //      if (!dladdr ((void *) pc, &dlinfo))
4609 //	break;
4610 //      DprintfT (SP_DUMP_UNWIND, "%2d: %llx <%s+%llu> (%s)\n",
4611 //		ind, (unsigned long long) pc,
4612 //		dlinfo.dli_sname ? dlinfo.dli_sname : "(?)",
4613 //		(unsigned long long) pc - (unsigned long long) dlinfo.dli_saddr,
4614 //		dlinfo.dli_fname);
4615       lbuf[ind++] = pc;
4616       if (ind >= lsize || sp >= stack_base || (sp & 15) != 0)
4617 	break;
4618       if (pc < tbgn || pc >= tend)
4619 	if (!__collector_check_segment ((unsigned long) pc, &tbgn, &tend, 0))
4620 	  {
4621 	    DprintfT (SP_DUMP_UNWIND,
4622 		     "unwind.c:%d __collector_check_segment failed. sp=0x%lx\n",
4623 		      __LINE__, (unsigned long) sp);
4624 	    break;
4625 	  }
4626       pc = ((__u64 *) sp)[1];
4627       __u64 old_sp = sp;
4628       sp = ((__u64 *) sp)[0];
4629       if (sp < old_sp)
4630 	break;
4631     }
4632   if (ind >= lsize)
4633     {
4634       ind = lsize - 1;
4635       lbuf[ind++] = (__u64) SP_TRUNC_STACK_MARKER;
4636     }
4637   return ind * sizeof (__u64);
4638 }
4639 #endif /* ARCH() */
4640