18dffb485Schristos /* Parallel for loops 28dffb485Schristos 3*5ba1f45fSchristos Copyright (C) 2019-2024 Free Software Foundation, Inc. 48dffb485Schristos 58dffb485Schristos This file is part of GDB. 68dffb485Schristos 78dffb485Schristos This program is free software; you can redistribute it and/or modify 88dffb485Schristos it under the terms of the GNU General Public License as published by 98dffb485Schristos the Free Software Foundation; either version 3 of the License, or 108dffb485Schristos (at your option) any later version. 118dffb485Schristos 128dffb485Schristos This program is distributed in the hope that it will be useful, 138dffb485Schristos but WITHOUT ANY WARRANTY; without even the implied warranty of 148dffb485Schristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 158dffb485Schristos GNU General Public License for more details. 168dffb485Schristos 178dffb485Schristos You should have received a copy of the GNU General Public License 188dffb485Schristos along with this program. If not, see <http://www.gnu.org/licenses/>. */ 198dffb485Schristos 208dffb485Schristos #ifndef GDBSUPPORT_PARALLEL_FOR_H 218dffb485Schristos #define GDBSUPPORT_PARALLEL_FOR_H 228dffb485Schristos 238dffb485Schristos #include <algorithm> 244b169a6bSchristos #include <type_traits> 258dffb485Schristos #include "gdbsupport/thread-pool.h" 264b169a6bSchristos #include "gdbsupport/function-view.h" 278dffb485Schristos 288dffb485Schristos namespace gdb 298dffb485Schristos { 308dffb485Schristos 318dffb485Schristos /* A very simple "parallel for". This splits the range of iterators 328dffb485Schristos into subranges, and then passes each subrange to the callback. The 338dffb485Schristos work may or may not be done in separate threads. 348dffb485Schristos 358dffb485Schristos This approach was chosen over having the callback work on single 368dffb485Schristos items because it makes it simple for the caller to do 374b169a6bSchristos once-per-subrange initialization and destruction. 384b169a6bSchristos 394b169a6bSchristos The parameter N says how batching ought to be done -- there will be 404b169a6bSchristos at least N elements processed per thread. Setting N to 0 is not 41*5ba1f45fSchristos allowed. */ 428dffb485Schristos 438dffb485Schristos template<class RandomIt, class RangeFunction> 44*5ba1f45fSchristos void 454b169a6bSchristos parallel_for_each (unsigned n, RandomIt first, RandomIt last, 46*5ba1f45fSchristos RangeFunction callback) 478dffb485Schristos { 484b169a6bSchristos /* If enabled, print debug info about how the work is distributed across 494b169a6bSchristos the threads. */ 504b169a6bSchristos const bool parallel_for_each_debug = false; 514b169a6bSchristos 524b169a6bSchristos size_t n_worker_threads = thread_pool::g_thread_pool->thread_count (); 534b169a6bSchristos size_t n_threads = n_worker_threads; 548dffb485Schristos size_t n_elements = last - first; 554b169a6bSchristos size_t elts_per_thread = 0; 564b169a6bSchristos size_t elts_left_over = 0; 574b169a6bSchristos 588dffb485Schristos if (n_threads > 1) 598dffb485Schristos { 604b169a6bSchristos /* Require that there should be at least N elements in a 614b169a6bSchristos thread. */ 624b169a6bSchristos gdb_assert (n > 0); 634b169a6bSchristos if (n_elements / n_threads < n) 644b169a6bSchristos n_threads = std::max (n_elements / n, (size_t) 1); 654b169a6bSchristos elts_per_thread = n_elements / n_threads; 664b169a6bSchristos elts_left_over = n_elements % n_threads; 674b169a6bSchristos /* n_elements == n_threads * elts_per_thread + elts_left_over. */ 684b169a6bSchristos } 698dffb485Schristos 704b169a6bSchristos size_t count = n_threads == 0 ? 0 : n_threads - 1; 71*5ba1f45fSchristos std::vector<gdb::future<void>> results; 724b169a6bSchristos 734b169a6bSchristos if (parallel_for_each_debug) 744b169a6bSchristos { 754b169a6bSchristos debug_printf (_("Parallel for: n_elements: %zu\n"), n_elements); 764b169a6bSchristos debug_printf (_("Parallel for: minimum elements per thread: %u\n"), n); 774b169a6bSchristos debug_printf (_("Parallel for: elts_per_thread: %zu\n"), elts_per_thread); 784b169a6bSchristos } 794b169a6bSchristos 804b169a6bSchristos for (int i = 0; i < count; ++i) 814b169a6bSchristos { 824b169a6bSchristos RandomIt end; 834b169a6bSchristos end = first + elts_per_thread; 844b169a6bSchristos if (i < elts_left_over) 854b169a6bSchristos /* Distribute the leftovers over the worker threads, to avoid having 864b169a6bSchristos to handle all of them in a single thread. */ 874b169a6bSchristos end++; 884b169a6bSchristos 894b169a6bSchristos /* This case means we don't have enough elements to really 904b169a6bSchristos distribute them. Rather than ever submit a task that does 914b169a6bSchristos nothing, we short-circuit here. */ 924b169a6bSchristos if (first == end) 934b169a6bSchristos end = last; 944b169a6bSchristos 954b169a6bSchristos if (end == last) 964b169a6bSchristos { 974b169a6bSchristos /* We're about to dispatch the last batch of elements, which 984b169a6bSchristos we normally process in the main thread. So just truncate 994b169a6bSchristos the result list here. This avoids submitting empty tasks 1004b169a6bSchristos to the thread pool. */ 1014b169a6bSchristos count = i; 1024b169a6bSchristos break; 1034b169a6bSchristos } 1044b169a6bSchristos 1054b169a6bSchristos if (parallel_for_each_debug) 1064b169a6bSchristos { 1074b169a6bSchristos debug_printf (_("Parallel for: elements on worker thread %i\t: %zu"), 1084b169a6bSchristos i, (size_t)(end - first)); 1094b169a6bSchristos debug_printf (_("\n")); 1104b169a6bSchristos } 111*5ba1f45fSchristos results.push_back (gdb::thread_pool::g_thread_pool->post_task ([=] () 1124b169a6bSchristos { 1134b169a6bSchristos return callback (first, end); 114*5ba1f45fSchristos })); 1158dffb485Schristos first = end; 1168dffb485Schristos } 1174b169a6bSchristos 1184b169a6bSchristos for (int i = count; i < n_worker_threads; ++i) 1194b169a6bSchristos if (parallel_for_each_debug) 1204b169a6bSchristos { 1214b169a6bSchristos debug_printf (_("Parallel for: elements on worker thread %i\t: 0"), i); 1224b169a6bSchristos debug_printf (_("\n")); 1238dffb485Schristos } 1248dffb485Schristos 1258dffb485Schristos /* Process all the remaining elements in the main thread. */ 1264b169a6bSchristos if (parallel_for_each_debug) 1274b169a6bSchristos { 1284b169a6bSchristos debug_printf (_("Parallel for: elements on main thread\t\t: %zu"), 1294b169a6bSchristos (size_t)(last - first)); 1304b169a6bSchristos debug_printf (_("\n")); 1314b169a6bSchristos } 132*5ba1f45fSchristos callback (first, last); 133*5ba1f45fSchristos 134*5ba1f45fSchristos for (auto &fut : results) 135*5ba1f45fSchristos fut.get (); 1364b169a6bSchristos } 1378dffb485Schristos 1384b169a6bSchristos /* A sequential drop-in replacement of parallel_for_each. This can be useful 1394b169a6bSchristos when debugging multi-threading behaviour, and you want to limit 1404b169a6bSchristos multi-threading in a fine-grained way. */ 1414b169a6bSchristos 1424b169a6bSchristos template<class RandomIt, class RangeFunction> 143*5ba1f45fSchristos void 1444b169a6bSchristos sequential_for_each (unsigned n, RandomIt first, RandomIt last, 145*5ba1f45fSchristos RangeFunction callback) 1464b169a6bSchristos { 147*5ba1f45fSchristos callback (first, last); 1488dffb485Schristos } 1498dffb485Schristos 1508dffb485Schristos } 1518dffb485Schristos 1528dffb485Schristos #endif /* GDBSUPPORT_PARALLEL_FOR_H */ 153