xref: /netbsd-src/external/gpl3/gdb/dist/gdbsupport/parallel-for.h (revision 5ba1f45f2a09259cc846f20c7c5501604d633c90)
18dffb485Schristos /* Parallel for loops
28dffb485Schristos 
3*5ba1f45fSchristos    Copyright (C) 2019-2024 Free Software Foundation, Inc.
48dffb485Schristos 
58dffb485Schristos    This file is part of GDB.
68dffb485Schristos 
78dffb485Schristos    This program is free software; you can redistribute it and/or modify
88dffb485Schristos    it under the terms of the GNU General Public License as published by
98dffb485Schristos    the Free Software Foundation; either version 3 of the License, or
108dffb485Schristos    (at your option) any later version.
118dffb485Schristos 
128dffb485Schristos    This program is distributed in the hope that it will be useful,
138dffb485Schristos    but WITHOUT ANY WARRANTY; without even the implied warranty of
148dffb485Schristos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
158dffb485Schristos    GNU General Public License for more details.
168dffb485Schristos 
178dffb485Schristos    You should have received a copy of the GNU General Public License
188dffb485Schristos    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
198dffb485Schristos 
208dffb485Schristos #ifndef GDBSUPPORT_PARALLEL_FOR_H
218dffb485Schristos #define GDBSUPPORT_PARALLEL_FOR_H
228dffb485Schristos 
238dffb485Schristos #include <algorithm>
244b169a6bSchristos #include <type_traits>
258dffb485Schristos #include "gdbsupport/thread-pool.h"
264b169a6bSchristos #include "gdbsupport/function-view.h"
278dffb485Schristos 
288dffb485Schristos namespace gdb
298dffb485Schristos {
308dffb485Schristos 
318dffb485Schristos /* A very simple "parallel for".  This splits the range of iterators
328dffb485Schristos    into subranges, and then passes each subrange to the callback.  The
338dffb485Schristos    work may or may not be done in separate threads.
348dffb485Schristos 
358dffb485Schristos    This approach was chosen over having the callback work on single
368dffb485Schristos    items because it makes it simple for the caller to do
374b169a6bSchristos    once-per-subrange initialization and destruction.
384b169a6bSchristos 
394b169a6bSchristos    The parameter N says how batching ought to be done -- there will be
404b169a6bSchristos    at least N elements processed per thread.  Setting N to 0 is not
41*5ba1f45fSchristos    allowed.  */
428dffb485Schristos 
438dffb485Schristos template<class RandomIt, class RangeFunction>
44*5ba1f45fSchristos void
454b169a6bSchristos parallel_for_each (unsigned n, RandomIt first, RandomIt last,
46*5ba1f45fSchristos 		   RangeFunction callback)
478dffb485Schristos {
484b169a6bSchristos   /* If enabled, print debug info about how the work is distributed across
494b169a6bSchristos      the threads.  */
504b169a6bSchristos   const bool parallel_for_each_debug = false;
514b169a6bSchristos 
524b169a6bSchristos   size_t n_worker_threads = thread_pool::g_thread_pool->thread_count ();
534b169a6bSchristos   size_t n_threads = n_worker_threads;
548dffb485Schristos   size_t n_elements = last - first;
554b169a6bSchristos   size_t elts_per_thread = 0;
564b169a6bSchristos   size_t elts_left_over = 0;
574b169a6bSchristos 
588dffb485Schristos   if (n_threads > 1)
598dffb485Schristos     {
604b169a6bSchristos       /* Require that there should be at least N elements in a
614b169a6bSchristos 	 thread.  */
624b169a6bSchristos       gdb_assert (n > 0);
634b169a6bSchristos       if (n_elements / n_threads < n)
644b169a6bSchristos 	n_threads = std::max (n_elements / n, (size_t) 1);
654b169a6bSchristos       elts_per_thread = n_elements / n_threads;
664b169a6bSchristos       elts_left_over = n_elements % n_threads;
674b169a6bSchristos       /* n_elements == n_threads * elts_per_thread + elts_left_over. */
684b169a6bSchristos     }
698dffb485Schristos 
704b169a6bSchristos   size_t count = n_threads == 0 ? 0 : n_threads - 1;
71*5ba1f45fSchristos   std::vector<gdb::future<void>> results;
724b169a6bSchristos 
734b169a6bSchristos   if (parallel_for_each_debug)
744b169a6bSchristos     {
754b169a6bSchristos       debug_printf (_("Parallel for: n_elements: %zu\n"), n_elements);
764b169a6bSchristos       debug_printf (_("Parallel for: minimum elements per thread: %u\n"), n);
774b169a6bSchristos       debug_printf (_("Parallel for: elts_per_thread: %zu\n"), elts_per_thread);
784b169a6bSchristos     }
794b169a6bSchristos 
804b169a6bSchristos   for (int i = 0; i < count; ++i)
814b169a6bSchristos     {
824b169a6bSchristos       RandomIt end;
834b169a6bSchristos       end = first + elts_per_thread;
844b169a6bSchristos       if (i < elts_left_over)
854b169a6bSchristos 	/* Distribute the leftovers over the worker threads, to avoid having
864b169a6bSchristos 	   to handle all of them in a single thread.  */
874b169a6bSchristos 	end++;
884b169a6bSchristos 
894b169a6bSchristos       /* This case means we don't have enough elements to really
904b169a6bSchristos 	 distribute them.  Rather than ever submit a task that does
914b169a6bSchristos 	 nothing, we short-circuit here.  */
924b169a6bSchristos       if (first == end)
934b169a6bSchristos 	end = last;
944b169a6bSchristos 
954b169a6bSchristos       if (end == last)
964b169a6bSchristos 	{
974b169a6bSchristos 	  /* We're about to dispatch the last batch of elements, which
984b169a6bSchristos 	     we normally process in the main thread.  So just truncate
994b169a6bSchristos 	     the result list here.  This avoids submitting empty tasks
1004b169a6bSchristos 	     to the thread pool.  */
1014b169a6bSchristos 	  count = i;
1024b169a6bSchristos 	  break;
1034b169a6bSchristos 	}
1044b169a6bSchristos 
1054b169a6bSchristos       if (parallel_for_each_debug)
1064b169a6bSchristos 	{
1074b169a6bSchristos 	  debug_printf (_("Parallel for: elements on worker thread %i\t: %zu"),
1084b169a6bSchristos 			i, (size_t)(end - first));
1094b169a6bSchristos 	  debug_printf (_("\n"));
1104b169a6bSchristos 	}
111*5ba1f45fSchristos       results.push_back (gdb::thread_pool::g_thread_pool->post_task ([=] ()
1124b169a6bSchristos         {
1134b169a6bSchristos 	  return callback (first, end);
114*5ba1f45fSchristos 	}));
1158dffb485Schristos       first = end;
1168dffb485Schristos     }
1174b169a6bSchristos 
1184b169a6bSchristos   for (int i = count; i < n_worker_threads; ++i)
1194b169a6bSchristos     if (parallel_for_each_debug)
1204b169a6bSchristos       {
1214b169a6bSchristos 	debug_printf (_("Parallel for: elements on worker thread %i\t: 0"), i);
1224b169a6bSchristos 	debug_printf (_("\n"));
1238dffb485Schristos       }
1248dffb485Schristos 
1258dffb485Schristos   /* Process all the remaining elements in the main thread.  */
1264b169a6bSchristos   if (parallel_for_each_debug)
1274b169a6bSchristos     {
1284b169a6bSchristos       debug_printf (_("Parallel for: elements on main thread\t\t: %zu"),
1294b169a6bSchristos 		    (size_t)(last - first));
1304b169a6bSchristos       debug_printf (_("\n"));
1314b169a6bSchristos     }
132*5ba1f45fSchristos   callback (first, last);
133*5ba1f45fSchristos 
134*5ba1f45fSchristos   for (auto &fut : results)
135*5ba1f45fSchristos     fut.get ();
1364b169a6bSchristos }
1378dffb485Schristos 
1384b169a6bSchristos /* A sequential drop-in replacement of parallel_for_each.  This can be useful
1394b169a6bSchristos    when debugging multi-threading behaviour, and you want to limit
1404b169a6bSchristos    multi-threading in a fine-grained way.  */
1414b169a6bSchristos 
1424b169a6bSchristos template<class RandomIt, class RangeFunction>
143*5ba1f45fSchristos void
1444b169a6bSchristos sequential_for_each (unsigned n, RandomIt first, RandomIt last,
145*5ba1f45fSchristos 		     RangeFunction callback)
1464b169a6bSchristos {
147*5ba1f45fSchristos   callback (first, last);
1488dffb485Schristos }
1498dffb485Schristos 
1508dffb485Schristos }
1518dffb485Schristos 
1528dffb485Schristos #endif /* GDBSUPPORT_PARALLEL_FOR_H */
153