dist/gdbsupport/parallel-for.h

7d62b00eSchristos/* Parallel for loops
7d62b00eSchristos
*6881a400Schristos   Copyright (C) 2019-2023 Free Software Foundation, Inc.
7d62b00eSchristos
7d62b00eSchristos   This file is part of GDB.
7d62b00eSchristos
7d62b00eSchristos   This program is free software; you can redistribute it and/or modify
7d62b00eSchristos   it under the terms of the GNU General Public License as published by
7d62b00eSchristos   the Free Software Foundation; either version 3 of the License, or
7d62b00eSchristos   (at your option) any later version.
7d62b00eSchristos
7d62b00eSchristos   This program is distributed in the hope that it will be useful,
7d62b00eSchristos   but WITHOUT ANY WARRANTY; without even the implied warranty of
7d62b00eSchristos   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
7d62b00eSchristos   GNU General Public License for more details.
7d62b00eSchristos
7d62b00eSchristos   You should have received a copy of the GNU General Public License
7d62b00eSchristos   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
7d62b00eSchristos
7d62b00eSchristos#ifndef GDBSUPPORT_PARALLEL_FOR_H
7d62b00eSchristos#define GDBSUPPORT_PARALLEL_FOR_H
7d62b00eSchristos
7d62b00eSchristos#include <algorithm>
*6881a400Schristos#include <type_traits>
*6881a400Schristos#include "gdbsupport/invoke-result.h"
7d62b00eSchristos#include "gdbsupport/thread-pool.h"
*6881a400Schristos#include "gdbsupport/function-view.h"
7d62b00eSchristos
7d62b00eSchristosnamespace gdb
7d62b00eSchristos{
7d62b00eSchristos
*6881a400Schristosnamespace detail
*6881a400Schristos{
*6881a400Schristos
*6881a400Schristos/* This is a helper class that is used to accumulate results for
*6881a400Schristos   parallel_for.  There is a specialization for 'void', below.  */
*6881a400Schristostemplate<typename T>
*6881a400Schristosstruct par_for_accumulator
*6881a400Schristos{
*6881a400Schristospublic:
*6881a400Schristos
*6881a400Schristos  explicit par_for_accumulator (size_t n_threads)
*6881a400Schristos    : m_futures (n_threads)
*6881a400Schristos  {
*6881a400Schristos  }
*6881a400Schristos
*6881a400Schristos  /* The result type that is accumulated.  */
*6881a400Schristos  typedef std::vector<T> result_type;
*6881a400Schristos
*6881a400Schristos  /* Post the Ith task to a background thread, and store a future for
*6881a400Schristos     later.  */
*6881a400Schristos  void post (size_t i, std::function<T ()> task)
*6881a400Schristos  {
*6881a400Schristos    m_futures[i]
*6881a400Schristos      = gdb::thread_pool::g_thread_pool->post_task (std::move (task));
*6881a400Schristos  }
*6881a400Schristos
*6881a400Schristos  /* Invoke TASK in the current thread, then compute all the results
*6881a400Schristos     from all background tasks and put them into a result vector,
*6881a400Schristos     which is returned.  */
*6881a400Schristos  result_type finish (gdb::function_view<T ()> task)
*6881a400Schristos  {
*6881a400Schristos    result_type result (m_futures.size () + 1);
*6881a400Schristos
*6881a400Schristos    result.back () = task ();
*6881a400Schristos
*6881a400Schristos    for (size_t i = 0; i < m_futures.size (); ++i)
*6881a400Schristos      result[i] = m_futures[i].get ();
*6881a400Schristos
*6881a400Schristos    return result;
*6881a400Schristos  }
*6881a400Schristos
*6881a400Schristos  /* Resize the results to N.  */
*6881a400Schristos  void resize (size_t n)
*6881a400Schristos  {
*6881a400Schristos    m_futures.resize (n);
*6881a400Schristos  }
*6881a400Schristos
*6881a400Schristosprivate:
*6881a400Schristos
*6881a400Schristos  /* A vector of futures coming from the tasks run in the
*6881a400Schristos     background.  */
*6881a400Schristos  std::vector<gdb::future<T>> m_futures;
*6881a400Schristos};
*6881a400Schristos
*6881a400Schristos/* See the generic template.  */
*6881a400Schristostemplate<>
*6881a400Schristosstruct par_for_accumulator<void>
*6881a400Schristos{
*6881a400Schristospublic:
*6881a400Schristos
*6881a400Schristos  explicit par_for_accumulator (size_t n_threads)
*6881a400Schristos    : m_futures (n_threads)
*6881a400Schristos  {
*6881a400Schristos  }
*6881a400Schristos
*6881a400Schristos  /* This specialization does not compute results.  */
*6881a400Schristos  typedef void result_type;
*6881a400Schristos
*6881a400Schristos  void post (size_t i, std::function<void ()> task)
*6881a400Schristos  {
*6881a400Schristos    m_futures[i]
*6881a400Schristos      = gdb::thread_pool::g_thread_pool->post_task (std::move (task));
*6881a400Schristos  }
*6881a400Schristos
*6881a400Schristos  result_type finish (gdb::function_view<void ()> task)
*6881a400Schristos  {
*6881a400Schristos    task ();
*6881a400Schristos
*6881a400Schristos    for (auto &future : m_futures)
*6881a400Schristos      {
*6881a400Schristos	/* Use 'get' and not 'wait', to propagate any exception.  */
*6881a400Schristos	future.get ();
*6881a400Schristos      }
*6881a400Schristos  }
*6881a400Schristos
*6881a400Schristos  /* Resize the results to N.  */
*6881a400Schristos  void resize (size_t n)
*6881a400Schristos  {
*6881a400Schristos    m_futures.resize (n);
*6881a400Schristos  }
*6881a400Schristos
*6881a400Schristosprivate:
*6881a400Schristos
*6881a400Schristos  std::vector<gdb::future<void>> m_futures;
*6881a400Schristos};
*6881a400Schristos
*6881a400Schristos}
*6881a400Schristos
7d62b00eSchristos/* A very simple "parallel for".  This splits the range of iterators
7d62b00eSchristos   into subranges, and then passes each subrange to the callback.  The
7d62b00eSchristos   work may or may not be done in separate threads.
7d62b00eSchristos
7d62b00eSchristos   This approach was chosen over having the callback work on single
7d62b00eSchristos   items because it makes it simple for the caller to do
*6881a400Schristos   once-per-subrange initialization and destruction.
*6881a400Schristos
*6881a400Schristos   The parameter N says how batching ought to be done -- there will be
*6881a400Schristos   at least N elements processed per thread.  Setting N to 0 is not
*6881a400Schristos   allowed.
*6881a400Schristos
*6881a400Schristos   If the function returns a non-void type, then a vector of the
*6881a400Schristos   results is returned.  The size of the resulting vector depends on
*6881a400Schristos   the number of threads that were used.  */
7d62b00eSchristos
7d62b00eSchristostemplate<class RandomIt, class RangeFunction>
*6881a400Schristostypename gdb::detail::par_for_accumulator<
*6881a400Schristos    typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type
*6881a400Schristos  >::result_type
*6881a400Schristosparallel_for_each (unsigned n, RandomIt first, RandomIt last,
*6881a400Schristos		   RangeFunction callback,
*6881a400Schristos		   gdb::function_view<size_t(RandomIt)> task_size = nullptr)
7d62b00eSchristos{
*6881a400Schristos  using result_type
*6881a400Schristos    = typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type;
7d62b00eSchristos
*6881a400Schristos  /* If enabled, print debug info about how the work is distributed across
*6881a400Schristos     the threads.  */
*6881a400Schristos  const bool parallel_for_each_debug = false;
*6881a400Schristos
*6881a400Schristos  size_t n_worker_threads = thread_pool::g_thread_pool->thread_count ();
*6881a400Schristos  size_t n_threads = n_worker_threads;
7d62b00eSchristos  size_t n_elements = last - first;
*6881a400Schristos  size_t elts_per_thread = 0;
*6881a400Schristos  size_t elts_left_over = 0;
*6881a400Schristos  size_t total_size = 0;
*6881a400Schristos  size_t size_per_thread = 0;
*6881a400Schristos  size_t max_element_size = n_elements == 0 ? 1 : SIZE_MAX / n_elements;
*6881a400Schristos
7d62b00eSchristos  if (n_threads > 1)
7d62b00eSchristos    {
*6881a400Schristos      if (task_size != nullptr)
7d62b00eSchristos	{
*6881a400Schristos	  gdb_assert (n == 1);
*6881a400Schristos	  for (RandomIt i = first; i != last; ++i)
7d62b00eSchristos	    {
*6881a400Schristos	      size_t element_size = task_size (i);
*6881a400Schristos	      gdb_assert (element_size > 0);
*6881a400Schristos	      if (element_size > max_element_size)
*6881a400Schristos		/* We could start scaling here, but that doesn't seem to be
*6881a400Schristos		   worth the effort.  */
*6881a400Schristos		element_size = max_element_size;
*6881a400Schristos	      size_t prev_total_size = total_size;
*6881a400Schristos	      total_size += element_size;
*6881a400Schristos	      /* Check for overflow.  */
*6881a400Schristos	      gdb_assert (prev_total_size < total_size);
*6881a400Schristos	    }
*6881a400Schristos	  size_per_thread = total_size / n_threads;
*6881a400Schristos	}
*6881a400Schristos      else
*6881a400Schristos	{
*6881a400Schristos	  /* Require that there should be at least N elements in a
*6881a400Schristos	     thread.  */
*6881a400Schristos	  gdb_assert (n > 0);
*6881a400Schristos	  if (n_elements / n_threads < n)
*6881a400Schristos	    n_threads = std::max (n_elements / n, (size_t) 1);
*6881a400Schristos	  elts_per_thread = n_elements / n_threads;
*6881a400Schristos	  elts_left_over = n_elements % n_threads;
*6881a400Schristos	  /* n_elements == n_threads * elts_per_thread + elts_left_over. */
*6881a400Schristos	}
*6881a400Schristos    }
7d62b00eSchristos
*6881a400Schristos  size_t count = n_threads == 0 ? 0 : n_threads - 1;
*6881a400Schristos  gdb::detail::par_for_accumulator<result_type> results (count);
*6881a400Schristos
*6881a400Schristos  if (parallel_for_each_debug)
*6881a400Schristos    {
*6881a400Schristos      debug_printf (_("Parallel for: n_elements: %zu\n"), n_elements);
*6881a400Schristos      if (task_size != nullptr)
*6881a400Schristos	{
*6881a400Schristos	  debug_printf (_("Parallel for: total_size: %zu\n"), total_size);
*6881a400Schristos	  debug_printf (_("Parallel for: size_per_thread: %zu\n"), size_per_thread);
*6881a400Schristos	}
*6881a400Schristos      else
*6881a400Schristos	{
*6881a400Schristos	  debug_printf (_("Parallel for: minimum elements per thread: %u\n"), n);
*6881a400Schristos	  debug_printf (_("Parallel for: elts_per_thread: %zu\n"), elts_per_thread);
*6881a400Schristos	}
*6881a400Schristos    }
*6881a400Schristos
*6881a400Schristos  size_t remaining_size = total_size;
*6881a400Schristos  for (int i = 0; i < count; ++i)
*6881a400Schristos    {
*6881a400Schristos      RandomIt end;
*6881a400Schristos      size_t chunk_size = 0;
*6881a400Schristos      if (task_size == nullptr)
*6881a400Schristos	{
*6881a400Schristos	  end = first + elts_per_thread;
*6881a400Schristos	  if (i < elts_left_over)
*6881a400Schristos	    /* Distribute the leftovers over the worker threads, to avoid having
*6881a400Schristos	       to handle all of them in a single thread.  */
*6881a400Schristos	    end++;
*6881a400Schristos	}
*6881a400Schristos      else
*6881a400Schristos	{
*6881a400Schristos	  RandomIt j;
*6881a400Schristos	  for (j = first; j < last && chunk_size < size_per_thread; ++j)
*6881a400Schristos	    {
*6881a400Schristos	      size_t element_size = task_size (j);
*6881a400Schristos	      if (element_size > max_element_size)
*6881a400Schristos		element_size = max_element_size;
*6881a400Schristos	      chunk_size += element_size;
*6881a400Schristos	    }
*6881a400Schristos	  end = j;
*6881a400Schristos	  remaining_size -= chunk_size;
*6881a400Schristos	}
*6881a400Schristos
*6881a400Schristos      /* This case means we don't have enough elements to really
*6881a400Schristos	 distribute them.  Rather than ever submit a task that does
*6881a400Schristos	 nothing, we short-circuit here.  */
*6881a400Schristos      if (first == end)
*6881a400Schristos	end = last;
*6881a400Schristos
*6881a400Schristos      if (end == last)
*6881a400Schristos	{
*6881a400Schristos	  /* We're about to dispatch the last batch of elements, which
*6881a400Schristos	     we normally process in the main thread.  So just truncate
*6881a400Schristos	     the result list here.  This avoids submitting empty tasks
*6881a400Schristos	     to the thread pool.  */
*6881a400Schristos	  count = i;
*6881a400Schristos	  results.resize (count);
*6881a400Schristos	  break;
*6881a400Schristos	}
*6881a400Schristos
*6881a400Schristos      if (parallel_for_each_debug)
*6881a400Schristos	{
*6881a400Schristos	  debug_printf (_("Parallel for: elements on worker thread %i\t: %zu"),
*6881a400Schristos			i, (size_t)(end - first));
*6881a400Schristos	  if (task_size != nullptr)
*6881a400Schristos	    debug_printf (_("\t(size: %zu)"), chunk_size);
*6881a400Schristos	  debug_printf (_("\n"));
*6881a400Schristos	}
*6881a400Schristos      results.post (i, [=] ()
*6881a400Schristos        {
*6881a400Schristos	  return callback (first, end);
*6881a400Schristos	});
7d62b00eSchristos      first = end;
7d62b00eSchristos    }
*6881a400Schristos
*6881a400Schristos  for (int i = count; i < n_worker_threads; ++i)
*6881a400Schristos    if (parallel_for_each_debug)
*6881a400Schristos      {
*6881a400Schristos	debug_printf (_("Parallel for: elements on worker thread %i\t: 0"), i);
*6881a400Schristos	if (task_size != nullptr)
*6881a400Schristos	  debug_printf (_("\t(size: 0)"));
*6881a400Schristos	debug_printf (_("\n"));
7d62b00eSchristos      }
7d62b00eSchristos
7d62b00eSchristos  /* Process all the remaining elements in the main thread.  */
*6881a400Schristos  if (parallel_for_each_debug)
*6881a400Schristos    {
*6881a400Schristos      debug_printf (_("Parallel for: elements on main thread\t\t: %zu"),
*6881a400Schristos		    (size_t)(last - first));
*6881a400Schristos      if (task_size != nullptr)
*6881a400Schristos	debug_printf (_("\t(size: %zu)"), remaining_size);
*6881a400Schristos      debug_printf (_("\n"));
*6881a400Schristos    }
*6881a400Schristos  return results.finish ([=] ()
*6881a400Schristos    {
*6881a400Schristos      return callback (first, last);
*6881a400Schristos    });
*6881a400Schristos}
7d62b00eSchristos
*6881a400Schristos/* A sequential drop-in replacement of parallel_for_each.  This can be useful
*6881a400Schristos   when debugging multi-threading behaviour, and you want to limit
*6881a400Schristos   multi-threading in a fine-grained way.  */
*6881a400Schristos
*6881a400Schristostemplate<class RandomIt, class RangeFunction>
*6881a400Schristostypename gdb::detail::par_for_accumulator<
*6881a400Schristos    typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type
*6881a400Schristos  >::result_type
*6881a400Schristossequential_for_each (unsigned n, RandomIt first, RandomIt last,
*6881a400Schristos		     RangeFunction callback,
*6881a400Schristos		     gdb::function_view<size_t(RandomIt)> task_size = nullptr)
*6881a400Schristos{
*6881a400Schristos  using result_type = typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type;
*6881a400Schristos
*6881a400Schristos  gdb::detail::par_for_accumulator<result_type> results (0);
*6881a400Schristos
*6881a400Schristos  /* Process all the remaining elements in the main thread.  */
*6881a400Schristos  return results.finish ([=] ()
*6881a400Schristos    {
*6881a400Schristos      return callback (first, last);
*6881a400Schristos    });
7d62b00eSchristos}
7d62b00eSchristos
7d62b00eSchristos}
7d62b00eSchristos
7d62b00eSchristos#endif /* GDBSUPPORT_PARALLEL_FOR_H */