xref: /netbsd-src/external/gpl3/gcc/dist/libgomp/allocator.c (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1*b1e83836Smrg /* Copyright (C) 2020-2022 Free Software Foundation, Inc.
2*b1e83836Smrg    Contributed by Jakub Jelinek <jakub@redhat.com>.
3*b1e83836Smrg 
4*b1e83836Smrg    This file is part of the GNU Offloading and Multi Processing Library
5*b1e83836Smrg    (libgomp).
6*b1e83836Smrg 
7*b1e83836Smrg    Libgomp is free software; you can redistribute it and/or modify it
8*b1e83836Smrg    under the terms of the GNU General Public License as published by
9*b1e83836Smrg    the Free Software Foundation; either version 3, or (at your option)
10*b1e83836Smrg    any later version.
11*b1e83836Smrg 
12*b1e83836Smrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13*b1e83836Smrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14*b1e83836Smrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15*b1e83836Smrg    more details.
16*b1e83836Smrg 
17*b1e83836Smrg    Under Section 7 of GPL version 3, you are granted additional
18*b1e83836Smrg    permissions described in the GCC Runtime Library Exception, version
19*b1e83836Smrg    3.1, as published by the Free Software Foundation.
20*b1e83836Smrg 
21*b1e83836Smrg    You should have received a copy of the GNU General Public License and
22*b1e83836Smrg    a copy of the GCC Runtime Library Exception along with this program;
23*b1e83836Smrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24*b1e83836Smrg    <http://www.gnu.org/licenses/>.  */
25*b1e83836Smrg 
26*b1e83836Smrg /* This file contains wrappers for the system allocation routines.  Most
27*b1e83836Smrg    places in the OpenMP API do not make any provision for failure, so in
28*b1e83836Smrg    general we cannot allow memory allocation to fail.  */
29*b1e83836Smrg 
30*b1e83836Smrg #define _GNU_SOURCE
31*b1e83836Smrg #include "libgomp.h"
32*b1e83836Smrg #include <stdlib.h>
33*b1e83836Smrg #include <string.h>
34*b1e83836Smrg 
35*b1e83836Smrg #define omp_max_predefined_alloc omp_thread_mem_alloc
36*b1e83836Smrg 
37*b1e83836Smrg struct omp_allocator_data
38*b1e83836Smrg {
39*b1e83836Smrg   omp_memspace_handle_t memspace;
40*b1e83836Smrg   omp_uintptr_t alignment;
41*b1e83836Smrg   omp_uintptr_t pool_size;
42*b1e83836Smrg   omp_uintptr_t used_pool_size;
43*b1e83836Smrg   omp_allocator_handle_t fb_data;
44*b1e83836Smrg   unsigned int sync_hint : 8;
45*b1e83836Smrg   unsigned int access : 8;
46*b1e83836Smrg   unsigned int fallback : 8;
47*b1e83836Smrg   unsigned int pinned : 1;
48*b1e83836Smrg   unsigned int partition : 7;
49*b1e83836Smrg #ifndef HAVE_SYNC_BUILTINS
50*b1e83836Smrg   gomp_mutex_t lock;
51*b1e83836Smrg #endif
52*b1e83836Smrg };
53*b1e83836Smrg 
54*b1e83836Smrg struct omp_mem_header
55*b1e83836Smrg {
56*b1e83836Smrg   void *ptr;
57*b1e83836Smrg   size_t size;
58*b1e83836Smrg   omp_allocator_handle_t allocator;
59*b1e83836Smrg   void *pad;
60*b1e83836Smrg };
61*b1e83836Smrg 
62*b1e83836Smrg omp_allocator_handle_t
omp_init_allocator(omp_memspace_handle_t memspace,int ntraits,const omp_alloctrait_t traits[])63*b1e83836Smrg omp_init_allocator (omp_memspace_handle_t memspace, int ntraits,
64*b1e83836Smrg 		    const omp_alloctrait_t traits[])
65*b1e83836Smrg {
66*b1e83836Smrg   struct omp_allocator_data data
67*b1e83836Smrg     = { memspace, 1, ~(uintptr_t) 0, 0, 0, omp_atv_contended, omp_atv_all,
68*b1e83836Smrg 	omp_atv_default_mem_fb, omp_atv_false, omp_atv_environment };
69*b1e83836Smrg   struct omp_allocator_data *ret;
70*b1e83836Smrg   int i;
71*b1e83836Smrg 
72*b1e83836Smrg   if (memspace > omp_low_lat_mem_space)
73*b1e83836Smrg     return omp_null_allocator;
74*b1e83836Smrg   for (i = 0; i < ntraits; i++)
75*b1e83836Smrg     switch (traits[i].key)
76*b1e83836Smrg       {
77*b1e83836Smrg       case omp_atk_sync_hint:
78*b1e83836Smrg 	switch (traits[i].value)
79*b1e83836Smrg 	  {
80*b1e83836Smrg 	  case omp_atv_default:
81*b1e83836Smrg 	    data.sync_hint = omp_atv_contended;
82*b1e83836Smrg 	    break;
83*b1e83836Smrg 	  case omp_atv_contended:
84*b1e83836Smrg 	  case omp_atv_uncontended:
85*b1e83836Smrg 	  case omp_atv_serialized:
86*b1e83836Smrg 	  case omp_atv_private:
87*b1e83836Smrg 	    data.sync_hint = traits[i].value;
88*b1e83836Smrg 	    break;
89*b1e83836Smrg 	  default:
90*b1e83836Smrg 	    return omp_null_allocator;
91*b1e83836Smrg 	  }
92*b1e83836Smrg 	break;
93*b1e83836Smrg       case omp_atk_alignment:
94*b1e83836Smrg         if (traits[i].value == omp_atv_default)
95*b1e83836Smrg 	  {
96*b1e83836Smrg 	    data.alignment = 1;
97*b1e83836Smrg 	    break;
98*b1e83836Smrg 	  }
99*b1e83836Smrg 	if ((traits[i].value & (traits[i].value - 1)) != 0
100*b1e83836Smrg 	    || !traits[i].value)
101*b1e83836Smrg 	  return omp_null_allocator;
102*b1e83836Smrg 	data.alignment = traits[i].value;
103*b1e83836Smrg 	break;
104*b1e83836Smrg       case omp_atk_access:
105*b1e83836Smrg 	switch (traits[i].value)
106*b1e83836Smrg 	  {
107*b1e83836Smrg 	  case omp_atv_default:
108*b1e83836Smrg 	    data.access = omp_atv_all;
109*b1e83836Smrg 	    break;
110*b1e83836Smrg 	  case omp_atv_all:
111*b1e83836Smrg 	  case omp_atv_cgroup:
112*b1e83836Smrg 	  case omp_atv_pteam:
113*b1e83836Smrg 	  case omp_atv_thread:
114*b1e83836Smrg 	    data.access = traits[i].value;
115*b1e83836Smrg 	    break;
116*b1e83836Smrg 	  default:
117*b1e83836Smrg 	    return omp_null_allocator;
118*b1e83836Smrg 	  }
119*b1e83836Smrg 	break;
120*b1e83836Smrg       case omp_atk_pool_size:
121*b1e83836Smrg 	if (traits[i].value == omp_atv_default)
122*b1e83836Smrg 	  data.pool_size = ~(uintptr_t) 0;
123*b1e83836Smrg 	else
124*b1e83836Smrg 	  data.pool_size = traits[i].value;
125*b1e83836Smrg 	break;
126*b1e83836Smrg       case omp_atk_fallback:
127*b1e83836Smrg 	switch (traits[i].value)
128*b1e83836Smrg 	  {
129*b1e83836Smrg 	  case omp_atv_default:
130*b1e83836Smrg 	    data.fallback = omp_atv_default_mem_fb;
131*b1e83836Smrg 	    break;
132*b1e83836Smrg 	  case omp_atv_default_mem_fb:
133*b1e83836Smrg 	  case omp_atv_null_fb:
134*b1e83836Smrg 	  case omp_atv_abort_fb:
135*b1e83836Smrg 	  case omp_atv_allocator_fb:
136*b1e83836Smrg 	    data.fallback = traits[i].value;
137*b1e83836Smrg 	    break;
138*b1e83836Smrg 	  default:
139*b1e83836Smrg 	    return omp_null_allocator;
140*b1e83836Smrg 	  }
141*b1e83836Smrg 	break;
142*b1e83836Smrg       case omp_atk_fb_data:
143*b1e83836Smrg 	data.fb_data = traits[i].value;
144*b1e83836Smrg 	break;
145*b1e83836Smrg       case omp_atk_pinned:
146*b1e83836Smrg 	switch (traits[i].value)
147*b1e83836Smrg 	  {
148*b1e83836Smrg 	  case omp_atv_default:
149*b1e83836Smrg 	  case omp_atv_false:
150*b1e83836Smrg 	    data.pinned = omp_atv_false;
151*b1e83836Smrg 	    break;
152*b1e83836Smrg 	  case omp_atv_true:
153*b1e83836Smrg 	    data.pinned = omp_atv_true;
154*b1e83836Smrg 	    break;
155*b1e83836Smrg 	  default:
156*b1e83836Smrg 	    return omp_null_allocator;
157*b1e83836Smrg 	  }
158*b1e83836Smrg 	break;
159*b1e83836Smrg       case omp_atk_partition:
160*b1e83836Smrg 	switch (traits[i].value)
161*b1e83836Smrg 	  {
162*b1e83836Smrg 	  case omp_atv_default:
163*b1e83836Smrg 	    data.partition = omp_atv_environment;
164*b1e83836Smrg 	    break;
165*b1e83836Smrg 	  case omp_atv_environment:
166*b1e83836Smrg 	  case omp_atv_nearest:
167*b1e83836Smrg 	  case omp_atv_blocked:
168*b1e83836Smrg 	  case omp_atv_interleaved:
169*b1e83836Smrg 	    data.partition = traits[i].value;
170*b1e83836Smrg 	    break;
171*b1e83836Smrg 	  default:
172*b1e83836Smrg 	    return omp_null_allocator;
173*b1e83836Smrg 	  }
174*b1e83836Smrg 	break;
175*b1e83836Smrg       default:
176*b1e83836Smrg 	return omp_null_allocator;
177*b1e83836Smrg       }
178*b1e83836Smrg 
179*b1e83836Smrg   if (data.alignment < sizeof (void *))
180*b1e83836Smrg     data.alignment = sizeof (void *);
181*b1e83836Smrg 
182*b1e83836Smrg   /* No support for these so far (for hbw will use memkind).  */
183*b1e83836Smrg   if (data.pinned || data.memspace == omp_high_bw_mem_space)
184*b1e83836Smrg     return omp_null_allocator;
185*b1e83836Smrg 
186*b1e83836Smrg   ret = gomp_malloc (sizeof (struct omp_allocator_data));
187*b1e83836Smrg   *ret = data;
188*b1e83836Smrg #ifndef HAVE_SYNC_BUILTINS
189*b1e83836Smrg   gomp_mutex_init (&ret->lock);
190*b1e83836Smrg #endif
191*b1e83836Smrg   return (omp_allocator_handle_t) ret;
192*b1e83836Smrg }
193*b1e83836Smrg 
194*b1e83836Smrg void
omp_destroy_allocator(omp_allocator_handle_t allocator)195*b1e83836Smrg omp_destroy_allocator (omp_allocator_handle_t allocator)
196*b1e83836Smrg {
197*b1e83836Smrg   if (allocator != omp_null_allocator)
198*b1e83836Smrg     {
199*b1e83836Smrg #ifndef HAVE_SYNC_BUILTINS
200*b1e83836Smrg       gomp_mutex_destroy (&((struct omp_allocator_data *) allocator)->lock);
201*b1e83836Smrg #endif
202*b1e83836Smrg       free ((void *) allocator);
203*b1e83836Smrg     }
204*b1e83836Smrg }
205*b1e83836Smrg 
206*b1e83836Smrg ialias (omp_init_allocator)
ialias(omp_destroy_allocator)207*b1e83836Smrg ialias (omp_destroy_allocator)
208*b1e83836Smrg 
209*b1e83836Smrg void *
210*b1e83836Smrg omp_aligned_alloc (size_t alignment, size_t size,
211*b1e83836Smrg 		   omp_allocator_handle_t allocator)
212*b1e83836Smrg {
213*b1e83836Smrg   struct omp_allocator_data *allocator_data;
214*b1e83836Smrg   size_t new_size, new_alignment;
215*b1e83836Smrg   void *ptr, *ret;
216*b1e83836Smrg 
217*b1e83836Smrg   if (__builtin_expect (size == 0, 0))
218*b1e83836Smrg     return NULL;
219*b1e83836Smrg 
220*b1e83836Smrg retry:
221*b1e83836Smrg   new_alignment = alignment;
222*b1e83836Smrg   if (allocator == omp_null_allocator)
223*b1e83836Smrg     {
224*b1e83836Smrg       struct gomp_thread *thr = gomp_thread ();
225*b1e83836Smrg       if (thr->ts.def_allocator == omp_null_allocator)
226*b1e83836Smrg 	thr->ts.def_allocator = gomp_def_allocator;
227*b1e83836Smrg       allocator = (omp_allocator_handle_t) thr->ts.def_allocator;
228*b1e83836Smrg     }
229*b1e83836Smrg 
230*b1e83836Smrg   if (allocator > omp_max_predefined_alloc)
231*b1e83836Smrg     {
232*b1e83836Smrg       allocator_data = (struct omp_allocator_data *) allocator;
233*b1e83836Smrg       if (new_alignment < allocator_data->alignment)
234*b1e83836Smrg 	new_alignment = allocator_data->alignment;
235*b1e83836Smrg     }
236*b1e83836Smrg   else
237*b1e83836Smrg     {
238*b1e83836Smrg       allocator_data = NULL;
239*b1e83836Smrg       if (new_alignment < sizeof (void *))
240*b1e83836Smrg 	new_alignment = sizeof (void *);
241*b1e83836Smrg     }
242*b1e83836Smrg 
243*b1e83836Smrg   new_size = sizeof (struct omp_mem_header);
244*b1e83836Smrg   if (new_alignment > sizeof (void *))
245*b1e83836Smrg     new_size += new_alignment - sizeof (void *);
246*b1e83836Smrg   if (__builtin_add_overflow (size, new_size, &new_size))
247*b1e83836Smrg     goto fail;
248*b1e83836Smrg 
249*b1e83836Smrg   if (__builtin_expect (allocator_data
250*b1e83836Smrg 			&& allocator_data->pool_size < ~(uintptr_t) 0, 0))
251*b1e83836Smrg     {
252*b1e83836Smrg       uintptr_t used_pool_size;
253*b1e83836Smrg       if (new_size > allocator_data->pool_size)
254*b1e83836Smrg 	goto fail;
255*b1e83836Smrg #ifdef HAVE_SYNC_BUILTINS
256*b1e83836Smrg       used_pool_size = __atomic_load_n (&allocator_data->used_pool_size,
257*b1e83836Smrg 					MEMMODEL_RELAXED);
258*b1e83836Smrg       do
259*b1e83836Smrg 	{
260*b1e83836Smrg 	  uintptr_t new_pool_size;
261*b1e83836Smrg 	  if (__builtin_add_overflow (used_pool_size, new_size,
262*b1e83836Smrg 				      &new_pool_size)
263*b1e83836Smrg 	      || new_pool_size > allocator_data->pool_size)
264*b1e83836Smrg 	    goto fail;
265*b1e83836Smrg 	  if (__atomic_compare_exchange_n (&allocator_data->used_pool_size,
266*b1e83836Smrg 					   &used_pool_size, new_pool_size,
267*b1e83836Smrg 					   true, MEMMODEL_RELAXED,
268*b1e83836Smrg 					   MEMMODEL_RELAXED))
269*b1e83836Smrg 	    break;
270*b1e83836Smrg 	}
271*b1e83836Smrg       while (1);
272*b1e83836Smrg #else
273*b1e83836Smrg       gomp_mutex_lock (&allocator_data->lock);
274*b1e83836Smrg       if (__builtin_add_overflow (allocator_data->used_pool_size, new_size,
275*b1e83836Smrg 				  &used_pool_size)
276*b1e83836Smrg 	  || used_pool_size > allocator_data->pool_size)
277*b1e83836Smrg 	{
278*b1e83836Smrg 	  gomp_mutex_unlock (&allocator_data->lock);
279*b1e83836Smrg 	  goto fail;
280*b1e83836Smrg 	}
281*b1e83836Smrg       allocator_data->used_pool_size = used_pool_size;
282*b1e83836Smrg       gomp_mutex_unlock (&allocator_data->lock);
283*b1e83836Smrg #endif
284*b1e83836Smrg       ptr = malloc (new_size);
285*b1e83836Smrg       if (ptr == NULL)
286*b1e83836Smrg 	{
287*b1e83836Smrg #ifdef HAVE_SYNC_BUILTINS
288*b1e83836Smrg 	  __atomic_add_fetch (&allocator_data->used_pool_size, -new_size,
289*b1e83836Smrg 			      MEMMODEL_RELAXED);
290*b1e83836Smrg #else
291*b1e83836Smrg 	  gomp_mutex_lock (&allocator_data->lock);
292*b1e83836Smrg 	  allocator_data->used_pool_size -= new_size;
293*b1e83836Smrg 	  gomp_mutex_unlock (&allocator_data->lock);
294*b1e83836Smrg #endif
295*b1e83836Smrg 	  goto fail;
296*b1e83836Smrg 	}
297*b1e83836Smrg     }
298*b1e83836Smrg   else
299*b1e83836Smrg     {
300*b1e83836Smrg       ptr = malloc (new_size);
301*b1e83836Smrg       if (ptr == NULL)
302*b1e83836Smrg 	goto fail;
303*b1e83836Smrg     }
304*b1e83836Smrg 
305*b1e83836Smrg   if (new_alignment > sizeof (void *))
306*b1e83836Smrg     ret = (void *) (((uintptr_t) ptr
307*b1e83836Smrg 		     + sizeof (struct omp_mem_header)
308*b1e83836Smrg 		     + new_alignment - sizeof (void *))
309*b1e83836Smrg 		    & ~(new_alignment - 1));
310*b1e83836Smrg   else
311*b1e83836Smrg     ret = (char *) ptr + sizeof (struct omp_mem_header);
312*b1e83836Smrg   ((struct omp_mem_header *) ret)[-1].ptr = ptr;
313*b1e83836Smrg   ((struct omp_mem_header *) ret)[-1].size = new_size;
314*b1e83836Smrg   ((struct omp_mem_header *) ret)[-1].allocator = allocator;
315*b1e83836Smrg   return ret;
316*b1e83836Smrg 
317*b1e83836Smrg fail:
318*b1e83836Smrg   if (allocator_data)
319*b1e83836Smrg     {
320*b1e83836Smrg       switch (allocator_data->fallback)
321*b1e83836Smrg 	{
322*b1e83836Smrg 	case omp_atv_default_mem_fb:
323*b1e83836Smrg 	  if ((new_alignment > sizeof (void *) && new_alignment > alignment)
324*b1e83836Smrg 	      || (allocator_data
325*b1e83836Smrg 		  && allocator_data->pool_size < ~(uintptr_t) 0))
326*b1e83836Smrg 	    {
327*b1e83836Smrg 	      allocator = omp_default_mem_alloc;
328*b1e83836Smrg 	      goto retry;
329*b1e83836Smrg 	    }
330*b1e83836Smrg 	  /* Otherwise, we've already performed default mem allocation
331*b1e83836Smrg 	     and if that failed, it won't succeed again (unless it was
332*b1e83836Smrg 	     intermittent.  Return NULL then, as that is the fallback.  */
333*b1e83836Smrg 	  break;
334*b1e83836Smrg 	case omp_atv_null_fb:
335*b1e83836Smrg 	  break;
336*b1e83836Smrg 	default:
337*b1e83836Smrg 	case omp_atv_abort_fb:
338*b1e83836Smrg 	  gomp_fatal ("Out of memory allocating %lu bytes",
339*b1e83836Smrg 		      (unsigned long) size);
340*b1e83836Smrg 	case omp_atv_allocator_fb:
341*b1e83836Smrg 	  allocator = allocator_data->fb_data;
342*b1e83836Smrg 	  goto retry;
343*b1e83836Smrg 	}
344*b1e83836Smrg     }
345*b1e83836Smrg   return NULL;
346*b1e83836Smrg }
347*b1e83836Smrg 
ialias(omp_aligned_alloc)348*b1e83836Smrg ialias (omp_aligned_alloc)
349*b1e83836Smrg 
350*b1e83836Smrg void *
351*b1e83836Smrg omp_alloc (size_t size, omp_allocator_handle_t allocator)
352*b1e83836Smrg {
353*b1e83836Smrg   return ialias_call (omp_aligned_alloc) (1, size, allocator);
354*b1e83836Smrg }
355*b1e83836Smrg 
356*b1e83836Smrg /* Like omp_aligned_alloc, but apply on top of that:
357*b1e83836Smrg    "For allocations that arise from this ... the null_fb value of the
358*b1e83836Smrg    fallback allocator trait behaves as if the abort_fb had been specified."  */
359*b1e83836Smrg 
360*b1e83836Smrg void *
GOMP_alloc(size_t alignment,size_t size,uintptr_t allocator)361*b1e83836Smrg GOMP_alloc (size_t alignment, size_t size, uintptr_t allocator)
362*b1e83836Smrg {
363*b1e83836Smrg   void *ret
364*b1e83836Smrg     = ialias_call (omp_aligned_alloc) (alignment, size,
365*b1e83836Smrg 				       (omp_allocator_handle_t) allocator);
366*b1e83836Smrg   if (__builtin_expect (ret == NULL, 0) && size)
367*b1e83836Smrg     gomp_fatal ("Out of memory allocating %lu bytes",
368*b1e83836Smrg 		(unsigned long) size);
369*b1e83836Smrg   return ret;
370*b1e83836Smrg }
371*b1e83836Smrg 
372*b1e83836Smrg void
omp_free(void * ptr,omp_allocator_handle_t allocator)373*b1e83836Smrg omp_free (void *ptr, omp_allocator_handle_t allocator)
374*b1e83836Smrg {
375*b1e83836Smrg   struct omp_mem_header *data;
376*b1e83836Smrg 
377*b1e83836Smrg   if (ptr == NULL)
378*b1e83836Smrg     return;
379*b1e83836Smrg   (void) allocator;
380*b1e83836Smrg   data = &((struct omp_mem_header *) ptr)[-1];
381*b1e83836Smrg   if (data->allocator > omp_max_predefined_alloc)
382*b1e83836Smrg     {
383*b1e83836Smrg       struct omp_allocator_data *allocator_data
384*b1e83836Smrg 	= (struct omp_allocator_data *) (data->allocator);
385*b1e83836Smrg       if (allocator_data->pool_size < ~(uintptr_t) 0)
386*b1e83836Smrg 	{
387*b1e83836Smrg #ifdef HAVE_SYNC_BUILTINS
388*b1e83836Smrg 	  __atomic_add_fetch (&allocator_data->used_pool_size, -data->size,
389*b1e83836Smrg 			      MEMMODEL_RELAXED);
390*b1e83836Smrg #else
391*b1e83836Smrg 	  gomp_mutex_lock (&allocator_data->lock);
392*b1e83836Smrg 	  allocator_data->used_pool_size -= data->size;
393*b1e83836Smrg 	  gomp_mutex_unlock (&allocator_data->lock);
394*b1e83836Smrg #endif
395*b1e83836Smrg 	}
396*b1e83836Smrg     }
397*b1e83836Smrg   free (data->ptr);
398*b1e83836Smrg }
399*b1e83836Smrg 
ialias(omp_free)400*b1e83836Smrg ialias (omp_free)
401*b1e83836Smrg 
402*b1e83836Smrg void
403*b1e83836Smrg GOMP_free (void *ptr, uintptr_t allocator)
404*b1e83836Smrg {
405*b1e83836Smrg   return ialias_call (omp_free) (ptr, (omp_allocator_handle_t) allocator);
406*b1e83836Smrg }
407*b1e83836Smrg 
408*b1e83836Smrg void *
omp_aligned_calloc(size_t alignment,size_t nmemb,size_t size,omp_allocator_handle_t allocator)409*b1e83836Smrg omp_aligned_calloc (size_t alignment, size_t nmemb, size_t size,
410*b1e83836Smrg 		    omp_allocator_handle_t allocator)
411*b1e83836Smrg {
412*b1e83836Smrg   struct omp_allocator_data *allocator_data;
413*b1e83836Smrg   size_t new_size, size_temp, new_alignment;
414*b1e83836Smrg   void *ptr, *ret;
415*b1e83836Smrg 
416*b1e83836Smrg   if (__builtin_expect (size == 0 || nmemb == 0, 0))
417*b1e83836Smrg     return NULL;
418*b1e83836Smrg 
419*b1e83836Smrg retry:
420*b1e83836Smrg   new_alignment = alignment;
421*b1e83836Smrg   if (allocator == omp_null_allocator)
422*b1e83836Smrg     {
423*b1e83836Smrg       struct gomp_thread *thr = gomp_thread ();
424*b1e83836Smrg       if (thr->ts.def_allocator == omp_null_allocator)
425*b1e83836Smrg 	thr->ts.def_allocator = gomp_def_allocator;
426*b1e83836Smrg       allocator = (omp_allocator_handle_t) thr->ts.def_allocator;
427*b1e83836Smrg     }
428*b1e83836Smrg 
429*b1e83836Smrg   if (allocator > omp_max_predefined_alloc)
430*b1e83836Smrg     {
431*b1e83836Smrg       allocator_data = (struct omp_allocator_data *) allocator;
432*b1e83836Smrg       if (new_alignment < allocator_data->alignment)
433*b1e83836Smrg 	new_alignment = allocator_data->alignment;
434*b1e83836Smrg     }
435*b1e83836Smrg   else
436*b1e83836Smrg     {
437*b1e83836Smrg       allocator_data = NULL;
438*b1e83836Smrg       if (new_alignment < sizeof (void *))
439*b1e83836Smrg 	new_alignment = sizeof (void *);
440*b1e83836Smrg     }
441*b1e83836Smrg 
442*b1e83836Smrg   new_size = sizeof (struct omp_mem_header);
443*b1e83836Smrg   if (new_alignment > sizeof (void *))
444*b1e83836Smrg     new_size += new_alignment - sizeof (void *);
445*b1e83836Smrg   if (__builtin_mul_overflow (size, nmemb, &size_temp))
446*b1e83836Smrg     goto fail;
447*b1e83836Smrg   if (__builtin_add_overflow (size_temp, new_size, &new_size))
448*b1e83836Smrg     goto fail;
449*b1e83836Smrg 
450*b1e83836Smrg   if (__builtin_expect (allocator_data
451*b1e83836Smrg 			&& allocator_data->pool_size < ~(uintptr_t) 0, 0))
452*b1e83836Smrg     {
453*b1e83836Smrg       uintptr_t used_pool_size;
454*b1e83836Smrg       if (new_size > allocator_data->pool_size)
455*b1e83836Smrg 	goto fail;
456*b1e83836Smrg #ifdef HAVE_SYNC_BUILTINS
457*b1e83836Smrg       used_pool_size = __atomic_load_n (&allocator_data->used_pool_size,
458*b1e83836Smrg 					MEMMODEL_RELAXED);
459*b1e83836Smrg       do
460*b1e83836Smrg 	{
461*b1e83836Smrg 	  uintptr_t new_pool_size;
462*b1e83836Smrg 	  if (__builtin_add_overflow (used_pool_size, new_size,
463*b1e83836Smrg 				      &new_pool_size)
464*b1e83836Smrg 	      || new_pool_size > allocator_data->pool_size)
465*b1e83836Smrg 	    goto fail;
466*b1e83836Smrg 	  if (__atomic_compare_exchange_n (&allocator_data->used_pool_size,
467*b1e83836Smrg 					   &used_pool_size, new_pool_size,
468*b1e83836Smrg 					   true, MEMMODEL_RELAXED,
469*b1e83836Smrg 					   MEMMODEL_RELAXED))
470*b1e83836Smrg 	    break;
471*b1e83836Smrg 	}
472*b1e83836Smrg       while (1);
473*b1e83836Smrg #else
474*b1e83836Smrg       gomp_mutex_lock (&allocator_data->lock);
475*b1e83836Smrg       if (__builtin_add_overflow (allocator_data->used_pool_size, new_size,
476*b1e83836Smrg 				  &used_pool_size)
477*b1e83836Smrg 	  || used_pool_size > allocator_data->pool_size)
478*b1e83836Smrg 	{
479*b1e83836Smrg 	  gomp_mutex_unlock (&allocator_data->lock);
480*b1e83836Smrg 	  goto fail;
481*b1e83836Smrg 	}
482*b1e83836Smrg       allocator_data->used_pool_size = used_pool_size;
483*b1e83836Smrg       gomp_mutex_unlock (&allocator_data->lock);
484*b1e83836Smrg #endif
485*b1e83836Smrg       ptr = calloc (1, new_size);
486*b1e83836Smrg       if (ptr == NULL)
487*b1e83836Smrg 	{
488*b1e83836Smrg #ifdef HAVE_SYNC_BUILTINS
489*b1e83836Smrg 	  __atomic_add_fetch (&allocator_data->used_pool_size, -new_size,
490*b1e83836Smrg 			      MEMMODEL_RELAXED);
491*b1e83836Smrg #else
492*b1e83836Smrg 	  gomp_mutex_lock (&allocator_data->lock);
493*b1e83836Smrg 	  allocator_data->used_pool_size -= new_size;
494*b1e83836Smrg 	  gomp_mutex_unlock (&allocator_data->lock);
495*b1e83836Smrg #endif
496*b1e83836Smrg 	  goto fail;
497*b1e83836Smrg 	}
498*b1e83836Smrg     }
499*b1e83836Smrg   else
500*b1e83836Smrg     {
501*b1e83836Smrg       ptr = calloc (1, new_size);
502*b1e83836Smrg       if (ptr == NULL)
503*b1e83836Smrg 	goto fail;
504*b1e83836Smrg     }
505*b1e83836Smrg 
506*b1e83836Smrg   if (new_alignment > sizeof (void *))
507*b1e83836Smrg     ret = (void *) (((uintptr_t) ptr
508*b1e83836Smrg 		     + sizeof (struct omp_mem_header)
509*b1e83836Smrg 		     + new_alignment - sizeof (void *))
510*b1e83836Smrg 		    & ~(new_alignment - 1));
511*b1e83836Smrg   else
512*b1e83836Smrg     ret = (char *) ptr + sizeof (struct omp_mem_header);
513*b1e83836Smrg   ((struct omp_mem_header *) ret)[-1].ptr = ptr;
514*b1e83836Smrg   ((struct omp_mem_header *) ret)[-1].size = new_size;
515*b1e83836Smrg   ((struct omp_mem_header *) ret)[-1].allocator = allocator;
516*b1e83836Smrg   return ret;
517*b1e83836Smrg 
518*b1e83836Smrg fail:
519*b1e83836Smrg   if (allocator_data)
520*b1e83836Smrg     {
521*b1e83836Smrg       switch (allocator_data->fallback)
522*b1e83836Smrg 	{
523*b1e83836Smrg 	case omp_atv_default_mem_fb:
524*b1e83836Smrg 	  if ((new_alignment > sizeof (void *) && new_alignment > alignment)
525*b1e83836Smrg 	      || (allocator_data
526*b1e83836Smrg 		  && allocator_data->pool_size < ~(uintptr_t) 0))
527*b1e83836Smrg 	    {
528*b1e83836Smrg 	      allocator = omp_default_mem_alloc;
529*b1e83836Smrg 	      goto retry;
530*b1e83836Smrg 	    }
531*b1e83836Smrg 	  /* Otherwise, we've already performed default mem allocation
532*b1e83836Smrg 	     and if that failed, it won't succeed again (unless it was
533*b1e83836Smrg 	     intermittent.  Return NULL then, as that is the fallback.  */
534*b1e83836Smrg 	  break;
535*b1e83836Smrg 	case omp_atv_null_fb:
536*b1e83836Smrg 	  break;
537*b1e83836Smrg 	default:
538*b1e83836Smrg 	case omp_atv_abort_fb:
539*b1e83836Smrg 	  gomp_fatal ("Out of memory allocating %lu bytes",
540*b1e83836Smrg 		      (unsigned long) (size * nmemb));
541*b1e83836Smrg 	case omp_atv_allocator_fb:
542*b1e83836Smrg 	  allocator = allocator_data->fb_data;
543*b1e83836Smrg 	  goto retry;
544*b1e83836Smrg 	}
545*b1e83836Smrg     }
546*b1e83836Smrg   return NULL;
547*b1e83836Smrg }
548*b1e83836Smrg 
ialias(omp_aligned_calloc)549*b1e83836Smrg ialias (omp_aligned_calloc)
550*b1e83836Smrg 
551*b1e83836Smrg void *
552*b1e83836Smrg omp_calloc (size_t nmemb, size_t size, omp_allocator_handle_t allocator)
553*b1e83836Smrg {
554*b1e83836Smrg   return ialias_call (omp_aligned_calloc) (1, nmemb, size, allocator);
555*b1e83836Smrg }
556*b1e83836Smrg 
557*b1e83836Smrg void *
omp_realloc(void * ptr,size_t size,omp_allocator_handle_t allocator,omp_allocator_handle_t free_allocator)558*b1e83836Smrg omp_realloc (void *ptr, size_t size, omp_allocator_handle_t allocator,
559*b1e83836Smrg 	     omp_allocator_handle_t free_allocator)
560*b1e83836Smrg {
561*b1e83836Smrg   struct omp_allocator_data *allocator_data, *free_allocator_data;
562*b1e83836Smrg   size_t new_size, old_size, new_alignment, old_alignment;
563*b1e83836Smrg   void *new_ptr, *ret;
564*b1e83836Smrg   struct omp_mem_header *data;
565*b1e83836Smrg 
566*b1e83836Smrg   if (__builtin_expect (ptr == NULL, 0))
567*b1e83836Smrg     return ialias_call (omp_aligned_alloc) (1, size, allocator);
568*b1e83836Smrg 
569*b1e83836Smrg   if (__builtin_expect (size == 0, 0))
570*b1e83836Smrg     {
571*b1e83836Smrg       ialias_call (omp_free) (ptr, free_allocator);
572*b1e83836Smrg       return NULL;
573*b1e83836Smrg     }
574*b1e83836Smrg 
575*b1e83836Smrg   data = &((struct omp_mem_header *) ptr)[-1];
576*b1e83836Smrg   free_allocator = data->allocator;
577*b1e83836Smrg 
578*b1e83836Smrg retry:
579*b1e83836Smrg   new_alignment = sizeof (void *);
580*b1e83836Smrg   if (allocator == omp_null_allocator)
581*b1e83836Smrg     allocator = free_allocator;
582*b1e83836Smrg 
583*b1e83836Smrg   if (allocator > omp_max_predefined_alloc)
584*b1e83836Smrg     {
585*b1e83836Smrg       allocator_data = (struct omp_allocator_data *) allocator;
586*b1e83836Smrg       if (new_alignment < allocator_data->alignment)
587*b1e83836Smrg 	new_alignment = allocator_data->alignment;
588*b1e83836Smrg     }
589*b1e83836Smrg   else
590*b1e83836Smrg     allocator_data = NULL;
591*b1e83836Smrg   if (free_allocator > omp_max_predefined_alloc)
592*b1e83836Smrg     free_allocator_data = (struct omp_allocator_data *) free_allocator;
593*b1e83836Smrg   else
594*b1e83836Smrg     free_allocator_data = NULL;
595*b1e83836Smrg   old_alignment = (uintptr_t) ptr - (uintptr_t) (data->ptr);
596*b1e83836Smrg 
597*b1e83836Smrg   new_size = sizeof (struct omp_mem_header);
598*b1e83836Smrg   if (new_alignment > sizeof (void *))
599*b1e83836Smrg     new_size += new_alignment - sizeof (void *);
600*b1e83836Smrg   if (__builtin_add_overflow (size, new_size, &new_size))
601*b1e83836Smrg     goto fail;
602*b1e83836Smrg   old_size = data->size;
603*b1e83836Smrg 
604*b1e83836Smrg   if (__builtin_expect (allocator_data
605*b1e83836Smrg 			&& allocator_data->pool_size < ~(uintptr_t) 0, 0))
606*b1e83836Smrg     {
607*b1e83836Smrg       uintptr_t used_pool_size;
608*b1e83836Smrg       size_t prev_size = 0;
609*b1e83836Smrg       /* Check if we can use realloc.  Don't use it if extra alignment
610*b1e83836Smrg 	 was used previously or newly, because realloc might return a pointer
611*b1e83836Smrg 	 with different alignment and then we'd need to memmove the data
612*b1e83836Smrg 	 again.  */
613*b1e83836Smrg       if (free_allocator_data
614*b1e83836Smrg 	  && free_allocator_data == allocator_data
615*b1e83836Smrg 	  && new_alignment == sizeof (void *)
616*b1e83836Smrg 	  && old_alignment == sizeof (struct omp_mem_header))
617*b1e83836Smrg 	prev_size = old_size;
618*b1e83836Smrg       if (new_size > prev_size
619*b1e83836Smrg 	  && new_size - prev_size > allocator_data->pool_size)
620*b1e83836Smrg 	goto fail;
621*b1e83836Smrg #ifdef HAVE_SYNC_BUILTINS
622*b1e83836Smrg       used_pool_size = __atomic_load_n (&allocator_data->used_pool_size,
623*b1e83836Smrg 					MEMMODEL_RELAXED);
624*b1e83836Smrg       do
625*b1e83836Smrg 	{
626*b1e83836Smrg 	  uintptr_t new_pool_size;
627*b1e83836Smrg 	  if (new_size > prev_size)
628*b1e83836Smrg 	    {
629*b1e83836Smrg 	      if (__builtin_add_overflow (used_pool_size, new_size - prev_size,
630*b1e83836Smrg 					  &new_pool_size)
631*b1e83836Smrg 		  || new_pool_size > allocator_data->pool_size)
632*b1e83836Smrg 		goto fail;
633*b1e83836Smrg 	    }
634*b1e83836Smrg 	  else
635*b1e83836Smrg 	    new_pool_size = used_pool_size + new_size - prev_size;
636*b1e83836Smrg 	  if (__atomic_compare_exchange_n (&allocator_data->used_pool_size,
637*b1e83836Smrg 					   &used_pool_size, new_pool_size,
638*b1e83836Smrg 					   true, MEMMODEL_RELAXED,
639*b1e83836Smrg 					   MEMMODEL_RELAXED))
640*b1e83836Smrg 	    break;
641*b1e83836Smrg 	}
642*b1e83836Smrg       while (1);
643*b1e83836Smrg #else
644*b1e83836Smrg       gomp_mutex_lock (&allocator_data->lock);
645*b1e83836Smrg       if (new_size > prev_size)
646*b1e83836Smrg 	{
647*b1e83836Smrg 	  if (__builtin_add_overflow (allocator_data->used_pool_size,
648*b1e83836Smrg 				      new_size - prev_size,
649*b1e83836Smrg 				      &used_pool_size)
650*b1e83836Smrg 	      || used_pool_size > allocator_data->pool_size)
651*b1e83836Smrg 	    {
652*b1e83836Smrg 	      gomp_mutex_unlock (&allocator_data->lock);
653*b1e83836Smrg 	      goto fail;
654*b1e83836Smrg 	    }
655*b1e83836Smrg 	}
656*b1e83836Smrg       else
657*b1e83836Smrg 	used_pool_size = (allocator_data->used_pool_size
658*b1e83836Smrg 			  + new_size - prev_size);
659*b1e83836Smrg       allocator_data->used_pool_size = used_pool_size;
660*b1e83836Smrg       gomp_mutex_unlock (&allocator_data->lock);
661*b1e83836Smrg #endif
662*b1e83836Smrg       if (prev_size)
663*b1e83836Smrg 	new_ptr = realloc (data->ptr, new_size);
664*b1e83836Smrg       else
665*b1e83836Smrg 	new_ptr = malloc (new_size);
666*b1e83836Smrg       if (new_ptr == NULL)
667*b1e83836Smrg 	{
668*b1e83836Smrg #ifdef HAVE_SYNC_BUILTINS
669*b1e83836Smrg 	  __atomic_add_fetch (&allocator_data->used_pool_size,
670*b1e83836Smrg 			      prev_size - new_size,
671*b1e83836Smrg 			      MEMMODEL_RELAXED);
672*b1e83836Smrg #else
673*b1e83836Smrg 	  gomp_mutex_lock (&allocator_data->lock);
674*b1e83836Smrg 	  allocator_data->used_pool_size -= new_size - prev_size;
675*b1e83836Smrg 	  gomp_mutex_unlock (&allocator_data->lock);
676*b1e83836Smrg #endif
677*b1e83836Smrg 	  goto fail;
678*b1e83836Smrg 	}
679*b1e83836Smrg       else if (prev_size)
680*b1e83836Smrg 	{
681*b1e83836Smrg 	  ret = (char *) new_ptr + sizeof (struct omp_mem_header);
682*b1e83836Smrg 	  ((struct omp_mem_header *) ret)[-1].ptr = new_ptr;
683*b1e83836Smrg 	  ((struct omp_mem_header *) ret)[-1].size = new_size;
684*b1e83836Smrg 	  ((struct omp_mem_header *) ret)[-1].allocator = allocator;
685*b1e83836Smrg 	  return ret;
686*b1e83836Smrg 	}
687*b1e83836Smrg     }
688*b1e83836Smrg   else if (new_alignment == sizeof (void *)
689*b1e83836Smrg 	   && old_alignment == sizeof (struct omp_mem_header)
690*b1e83836Smrg 	   && (free_allocator_data == NULL
691*b1e83836Smrg 	       || free_allocator_data->pool_size == ~(uintptr_t) 0))
692*b1e83836Smrg     {
693*b1e83836Smrg       new_ptr = realloc (data->ptr, new_size);
694*b1e83836Smrg       if (new_ptr == NULL)
695*b1e83836Smrg 	goto fail;
696*b1e83836Smrg       ret = (char *) new_ptr + sizeof (struct omp_mem_header);
697*b1e83836Smrg       ((struct omp_mem_header *) ret)[-1].ptr = new_ptr;
698*b1e83836Smrg       ((struct omp_mem_header *) ret)[-1].size = new_size;
699*b1e83836Smrg       ((struct omp_mem_header *) ret)[-1].allocator = allocator;
700*b1e83836Smrg       return ret;
701*b1e83836Smrg     }
702*b1e83836Smrg   else
703*b1e83836Smrg     {
704*b1e83836Smrg       new_ptr = malloc (new_size);
705*b1e83836Smrg       if (new_ptr == NULL)
706*b1e83836Smrg 	goto fail;
707*b1e83836Smrg     }
708*b1e83836Smrg 
709*b1e83836Smrg   if (new_alignment > sizeof (void *))
710*b1e83836Smrg     ret = (void *) (((uintptr_t) new_ptr
711*b1e83836Smrg 		     + sizeof (struct omp_mem_header)
712*b1e83836Smrg 		     + new_alignment - sizeof (void *))
713*b1e83836Smrg 		    & ~(new_alignment - 1));
714*b1e83836Smrg   else
715*b1e83836Smrg     ret = (char *) new_ptr + sizeof (struct omp_mem_header);
716*b1e83836Smrg   ((struct omp_mem_header *) ret)[-1].ptr = new_ptr;
717*b1e83836Smrg   ((struct omp_mem_header *) ret)[-1].size = new_size;
718*b1e83836Smrg   ((struct omp_mem_header *) ret)[-1].allocator = allocator;
719*b1e83836Smrg   if (old_size - old_alignment < size)
720*b1e83836Smrg     size = old_size - old_alignment;
721*b1e83836Smrg   memcpy (ret, ptr, size);
722*b1e83836Smrg   if (__builtin_expect (free_allocator_data
723*b1e83836Smrg 			&& free_allocator_data->pool_size < ~(uintptr_t) 0, 0))
724*b1e83836Smrg     {
725*b1e83836Smrg #ifdef HAVE_SYNC_BUILTINS
726*b1e83836Smrg       __atomic_add_fetch (&free_allocator_data->used_pool_size, -data->size,
727*b1e83836Smrg 			  MEMMODEL_RELAXED);
728*b1e83836Smrg #else
729*b1e83836Smrg       gomp_mutex_lock (&free_allocator_data->lock);
730*b1e83836Smrg       free_allocator_data->used_pool_size -= data->size;
731*b1e83836Smrg       gomp_mutex_unlock (&free_allocator_data->lock);
732*b1e83836Smrg #endif
733*b1e83836Smrg     }
734*b1e83836Smrg   free (data->ptr);
735*b1e83836Smrg   return ret;
736*b1e83836Smrg 
737*b1e83836Smrg fail:
738*b1e83836Smrg   if (allocator_data)
739*b1e83836Smrg     {
740*b1e83836Smrg       switch (allocator_data->fallback)
741*b1e83836Smrg 	{
742*b1e83836Smrg 	case omp_atv_default_mem_fb:
743*b1e83836Smrg 	  if (new_alignment > sizeof (void *)
744*b1e83836Smrg 	      || (allocator_data
745*b1e83836Smrg 		  && allocator_data->pool_size < ~(uintptr_t) 0))
746*b1e83836Smrg 	    {
747*b1e83836Smrg 	      allocator = omp_default_mem_alloc;
748*b1e83836Smrg 	      goto retry;
749*b1e83836Smrg 	    }
750*b1e83836Smrg 	  /* Otherwise, we've already performed default mem allocation
751*b1e83836Smrg 	     and if that failed, it won't succeed again (unless it was
752*b1e83836Smrg 	     intermittent.  Return NULL then, as that is the fallback.  */
753*b1e83836Smrg 	  break;
754*b1e83836Smrg 	case omp_atv_null_fb:
755*b1e83836Smrg 	  break;
756*b1e83836Smrg 	default:
757*b1e83836Smrg 	case omp_atv_abort_fb:
758*b1e83836Smrg 	  gomp_fatal ("Out of memory allocating %lu bytes",
759*b1e83836Smrg 		      (unsigned long) size);
760*b1e83836Smrg 	case omp_atv_allocator_fb:
761*b1e83836Smrg 	  allocator = allocator_data->fb_data;
762*b1e83836Smrg 	  goto retry;
763*b1e83836Smrg 	}
764*b1e83836Smrg     }
765*b1e83836Smrg   return NULL;
766*b1e83836Smrg }
767