grppi/0.3.1/parallel__execution__ff_8h_source.html

 #ifndef GRPPI_FF_PARALLEL_EXECUTION_FF_H
 #define GRPPI_FF_PARALLEL_EXECUTION_FF_H

 #ifdef GRPPI_FF

 #include "detail/pipeline_impl.h"

 #include "../common/iterator.h"
 #include "../common/execution_traits.h"

 #include <type_traits>
 #include <tuple>
 #include <thread>
 #include <experimental/optional>

 #include <ff/parallel_for.hpp>
 #include <ff/dc.hpp>

 namespace grppi {

 class parallel_execution_ff {

 public:
   parallel_execution_ff() noexcept :
       parallel_execution_ff{
         static_cast<int>(std::thread::hardware_concurrency())}
   {}

   parallel_execution_ff(int concurrency_degree, bool order = true) noexcept :
     concurrency_degree_{concurrency_degree},
     ordering_{order}
   {
   }

   void set_concurrency_degree(int degree) noexcept {
     concurrency_degree_ = degree;
   }

   int concurrency_degree() const noexcept {
     return concurrency_degree_;
   }

   void enable_ordering() noexcept { ordering_=true; }

   void disable_ordering() noexcept { ordering_=false; }

   bool is_ordered() const noexcept { return ordering_; }

   template <typename ... InputIterators, typename OutputIterator,
             typename Transformer>
   void map(std::tuple<InputIterators...> firsts,
       OutputIterator first_out,
       std::size_t sequence_size, Transformer transform_op) const;

   template <typename InputIterator, typename Identity, typename Combiner>
   auto reduce(InputIterator first,
       std::size_t sequence_size,
       Identity && identity,
       Combiner && combine_op) const;

   template <typename ... InputIterators, typename Identity,
   typename Transformer, typename Combiner>
   auto map_reduce(std::tuple<InputIterators...> firsts,
       std::size_t sequence_size,
       Identity && identity,
       Transformer && transform_op,
       Combiner && combine_op) const;

   template <typename ... InputIterators, typename OutputIterator,
             typename StencilTransformer, typename Neighbourhood>
   void stencil(std::tuple<InputIterators...> firsts,
       OutputIterator first_out,
       std::size_t sequence_size,
       StencilTransformer && transform_op,
       Neighbourhood && neighbour_op) const;

   template <typename Generator, typename ... Transformers>
   void pipeline(Generator && generate_op,
       Transformers && ... transform_op) const;


   template <typename InputType, typename Transformer, typename OutputType>
   void pipeline(mpmc_queue<InputType> & input_queue, Transformer && transform_op,
                 mpmc_queue<OutputType> & output_queue) const
   {
     ::std::atomic<long> order {0};
     pipeline(
       [&](){
         auto item = input_queue.pop();
         if(!item.first) input_queue.push(item);
         return item.first;
       },
       std::forward<Transformer>(transform_op),
       [&](auto & item ){
         output_queue.push(make_pair(typename OutputType::first_type{item}, order.load()));
         order++;
       }
     );
     output_queue.push(make_pair(typename OutputType::first_type{}, order.load()));
   }

   template <typename Input, typename Divider,typename Predicate,
             typename Solver, typename Combiner>
   auto divide_conquer(Input & input,
       Divider && divide_op,
       Predicate && condition_op,
       Solver && solve_op,
       Combiner && combine_op) const;

 private:

   int concurrency_degree_ =
     static_cast<int>(std::thread::hardware_concurrency());
   bool ordering_ = true;
 };

 template <typename E>
 constexpr bool is_parallel_execution_ff() {
   return std::is_same<E, parallel_execution_ff>::value;
 }

 template <>
 constexpr bool is_supported<parallel_execution_ff>() { return true; }

 template <>
 constexpr bool supports_map<parallel_execution_ff>() { return true; }

 template <>
 constexpr bool supports_reduce<parallel_execution_ff>() { return true; }

 template <>
 constexpr bool supports_map_reduce<parallel_execution_ff>() { return true; }

 template <>
 constexpr bool supports_stencil<parallel_execution_ff>() { return true; }

 /*
 \brief Determines if an execution policy supports the divide_conquer pattern.
 \note Specialization for parallel_execution_ff when GRPPI_FF is enabled.
 */
 template <>
 constexpr bool supports_divide_conquer<parallel_execution_ff>() { return true; }

 template <>
 constexpr bool supports_pipeline<parallel_execution_ff>() { return true; }


 template <typename ... InputIterators, typename OutputIterator,
           typename Transformer>
 void parallel_execution_ff::map(
     std::tuple<InputIterators...> firsts,
     OutputIterator first_out,
     std::size_t sequence_size, Transformer transform_op) const
 {
   ff::ParallelFor pf{concurrency_degree_, true};
   pf.parallel_for(0, sequence_size,
     [=](const long delta) {
       *std::next(first_out, delta) = apply_iterators_indexed(transform_op, firsts, delta);
     },
     concurrency_degree_);
 }

 template <typename InputIterator, typename Identity, typename Combiner>
 auto parallel_execution_ff::reduce(InputIterator first,
     std::size_t sequence_size,
     Identity && identity,
     Combiner && combine_op) const
 {
   ff::ParallelForReduce<Identity> pfr{concurrency_degree_, true};
   Identity result{identity};

   pfr.parallel_reduce(result, identity, 0, sequence_size,
       [combine_op,first](long delta, auto & value) {
         value = combine_op(value, *std::next(first,delta));
       },
       [&result, combine_op](auto a, auto b) { result = combine_op(a,b); },
       concurrency_degree_);

   return result;
 }

 template <typename ... InputIterators, typename Identity,
           typename Transformer, typename Combiner>
 auto parallel_execution_ff::map_reduce(std::tuple<InputIterators...> firsts,
     std::size_t sequence_size,
     Identity && identity,
     Transformer && transform_op,
     Combiner && combine_op) const
 {
   std::vector<Identity> partial_outs(sequence_size);
   map(firsts, partial_outs.begin(), sequence_size,
       std::forward<Transformer>(transform_op));

   return reduce(partial_outs.begin(), sequence_size,
       std::forward<Identity>(identity),
       std::forward<Combiner>(combine_op));
 }

 template <typename ... InputIterators, typename OutputIterator,
           typename StencilTransformer, typename Neighbourhood>
 void parallel_execution_ff::stencil(std::tuple<InputIterators...> firsts,
     OutputIterator first_out,
     std::size_t sequence_size,
     StencilTransformer && transform_op,
     Neighbourhood && neighbour_op) const
 {
   ff::ParallelFor pf(concurrency_degree_, true);
   pf.parallel_for(0, sequence_size,
     [&](long delta) {
       const auto first_it = std::get<0>(firsts);
       auto next_chunks = iterators_next(firsts, delta);
       *std::next(first_out,delta) = transform_op(std::next(first_it,delta),
           apply_increment(neighbour_op, next_chunks) );
     },
     concurrency_degree_);
 }

 template <typename Generator, typename ... Transformers>
 void parallel_execution_ff::pipeline(
     Generator && generate_op,
     Transformers && ... transform_ops) const
 {
   detail_ff::pipeline_impl pipe{
       concurrency_degree_,
       ordering_,
       std::forward<Generator>(generate_op),
       std::forward<Transformers>(transform_ops)...};

   pipe.setFixedSize(false);
   pipe.run_and_wait_end();
 }

 template <typename Input, typename Divider,typename Predicate,
           typename Solver, typename Combiner>
 auto parallel_execution_ff::divide_conquer(Input & input,
     Divider && divide_op,
     Predicate && condition_op,
     Solver && solve_op,
     Combiner && combine_op) const
 {
   using output_type = typename std::result_of<Solver(Input)>::type;

   // divide
   auto divide_fn = [&](const Input &in, std::vector<Input> &subin) {
     subin = divide_op(in);
   };
   // combine
   auto combine_fn = [&] (std::vector<output_type>& in, output_type& out) {
     using index_t = typename std::vector<output_type>::size_type;
     out = in[0];
     for(index_t i = 1; i < in.size(); ++i)
       out = combine_op(out, in[i]);
   };
   // sequential solver (base-case)
   auto seq_fn = [&] (const Input & in , output_type & out) {
     out = solve_op(in);
   };
   // condition
   auto cond_fn = [&] (const Input &in) {
     return condition_op(in);
   };
   output_type out_var{};

   using dac_t = ff::ff_DC<Input,output_type>;
   auto ncores = static_cast<int>(std::thread::hardware_concurrency());
   int max_nworkers = std::max(concurrency_degree_, ncores);
   dac_t dac(divide_fn, combine_fn, seq_fn, cond_fn, //kernel functions
       input, out_var, //input/output variables
       concurrency_degree_, //parallelism degree
       dac_t::DEFAULT_OUTSTANDING_TASKS, max_nworkers //ff-specific params
       );

   // run
   dac.run_and_wait_end();

   return out_var;
 }

 } // end namespace grppi

 #else // GRPPI_FF undefined

 namespace grppi {


 struct parallel_execution_ff {};

 template <typename E>
 constexpr bool is_parallel_execution_ff() {
   return false;
 }

 }

 #endif // GRPPI_FF

 #endif
grppi
Definition: callable_traits.h:26

grppi::pipeline
void pipeline(const Execution &ex, Generator &&generate_op, Transformers &&...transform_ops)
Invoke Pipeline pattern on a data stream.
Definition: pipeline.h:51

grppi::stencil
void stencil(const Execution &ex, std::tuple< InputIterators... > firsts, std::size_t size, OutputIt out, StencilTransformer &&transform_op, Neighbourhood &&neighbour_op)
Invoke Stencil pattern on a data sequence with sequential execution.
Definition: stencil.h:59

grppi::is_parallel_execution_ff
constexpr bool is_parallel_execution_ff()
Metafunction that determines if type E is parallel_execution_ff This metafunction evaluates to false ...
Definition: parallel_execution_ff.h:462

grppi::reduce
auto reduce(const Execution &ex, InputIt first, std::size_t size, Result &&identity, Combiner &&combine_op)
Invoke Reduce pattern with identity value on a data sequence with sequential execution.
Definition: reduce.h:55

pipeline_impl.h

grppi::iterators_next
auto iterators_next(T &&t, int n)
Computes next n steps from a tuple of iterators.
Definition: iterator.h:175

grppi::apply_iterators_indexed
decltype(auto) apply_iterators_indexed(F &&f, T &&t, std::size_t i)
Applies a callable object to the values obtained from the iterators in a tuple by indexing...
Definition: iterator.h:147

grppi::parallel_execution_ff
Definition: parallel_execution_ff.h:454

grppi::map_reduce
auto map_reduce(const Execution &ex, std::tuple< InputIterators... > firsts, std::size_t size, Identity &&identity, Transformer &&transform_op, Combiner &&combine_op)
Invoke Map/reduce pattern on a data sequence.
Definition: mapreduce.h:57

grppi::divide_conquer
auto divide_conquer(const Execution &ex, Input &&input, Divider &&divider_op, Solver &&solver_op, Combiner &&combiner_op)
Invoke Divide/conquer pattern.  Execution Execution type.
Definition: divideconquer.h:53

grppi::apply_increment
decltype(auto) apply_increment(F &&f, T< Iterators... > &iterators)
Applies a callable object to the iterators in a tuple like-object and the increments those iterators...
Definition: iterator.h:106

grppi::map
void map(const Execution &ex, std::tuple< InputIterators... > firsts, InputIt last, OutputIt first_out, Transformer transform_op)
Invoke Map pattern on a data sequence.
Definition: map.h:56