grppi/0.4/parallel__execution__ff_8h_source.html

 /*

  * Copyright 2018 Universidad Carlos III de Madrid

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */

 #ifndef GRPPI_FF_PARALLEL_EXECUTION_FF_H

 #define GRPPI_FF_PARALLEL_EXECUTION_FF_H


 #ifdef GRPPI_FF


 #include "detail/pipeline_impl.h"


 #include "../common/iterator.h"

 #include "../common/execution_traits.h"


 #include <type_traits>

 #include <tuple>

 #include <thread>

 #include <experimental/optional>


 #include <ff/parallel_for.hpp>

 #include <ff/dc.hpp>


 namespace grppi {


 class parallel_execution_ff {


 public:

   parallel_execution_ff() noexcept :

       parallel_execution_ff{

         static_cast<int>(std::thread::hardware_concurrency())}

   {}


   parallel_execution_ff(int concurrency_degree, bool order = true) noexcept :

     concurrency_degree_{concurrency_degree},

     ordering_{order}

   {

   }


   void set_concurrency_degree(int degree) noexcept {

     concurrency_degree_ = degree;

   }


   int concurrency_degree() const noexcept {

     return concurrency_degree_;

   }


   void enable_ordering() noexcept { ordering_=true; }


   void disable_ordering() noexcept { ordering_=false; }


   bool is_ordered() const noexcept { return ordering_; }


   template <typename ... InputIterators, typename OutputIterator,

             typename Transformer>

   void map(std::tuple<InputIterators...> firsts,

       OutputIterator first_out,

       std::size_t sequence_size, Transformer transform_op) const;


   template <typename InputIterator, typename Identity, typename Combiner>

   auto reduce(InputIterator first,

       std::size_t sequence_size,

       Identity && identity,

       Combiner && combine_op) const;


   template <typename ... InputIterators, typename Identity,

   typename Transformer, typename Combiner>

   auto map_reduce(std::tuple<InputIterators...> firsts,

       std::size_t sequence_size,

       Identity && identity,

       Transformer && transform_op,

       Combiner && combine_op) const;


   template <typename ... InputIterators, typename OutputIterator,

             typename StencilTransformer, typename Neighbourhood>

   void stencil(std::tuple<InputIterators...> firsts,

       OutputIterator first_out,

       std::size_t sequence_size,

       StencilTransformer && transform_op,

       Neighbourhood && neighbour_op) const;


   template <typename Generator, typename ... Transformers>

   void pipeline(Generator && generate_op,

       Transformers && ... transform_op) const;


   template <typename InputType, typename Transformer, typename OutputType>

   void pipeline(mpmc_queue<InputType> & input_queue, Transformer && transform_op,

                 mpmc_queue<OutputType> & output_queue) const

   {

     ::std::atomic<long> order {0};

     pipeline(

       [&](){

         auto item = input_queue.pop();

         if(!item.first) input_queue.push(item);

         return item.first;

       },

       std::forward<Transformer>(transform_op),

       [&](auto & item ){

         output_queue.push(make_pair(typename OutputType::first_type{item}, order.load()));

         order++;

       }

     );

     output_queue.push(make_pair(typename OutputType::first_type{}, order.load()));

   }


   template <typename Input, typename Divider,typename Predicate,

             typename Solver, typename Combiner>

   auto divide_conquer(Input & input,

       Divider && divide_op,

       Predicate && condition_op,

       Solver && solve_op,

       Combiner && combine_op) const;


 private:


   int concurrency_degree_ =

     static_cast<int>(std::thread::hardware_concurrency());

   bool ordering_ = true;

 };


 template <typename E>

 constexpr bool is_parallel_execution_ff() {

   return std::is_same<E, parallel_execution_ff>::value;

 }


 template <>

 constexpr bool is_supported<parallel_execution_ff>() { return true; }


 template <>

 constexpr bool supports_map<parallel_execution_ff>() { return true; }


 template <>

 constexpr bool supports_reduce<parallel_execution_ff>() { return true; }


 template <>

 constexpr bool supports_map_reduce<parallel_execution_ff>() { return true; }


 template <>

 constexpr bool supports_stencil<parallel_execution_ff>() { return true; }


 /*

 \brief Determines if an execution policy supports the divide_conquer pattern.

 \note Specialization for parallel_execution_ff when GRPPI_FF is enabled.

 */

 template <>

 constexpr bool supports_divide_conquer<parallel_execution_ff>() { return true; }


 template <>

 constexpr bool supports_pipeline<parallel_execution_ff>() { return true; }


 template <typename ... InputIterators, typename OutputIterator,

           typename Transformer>

 void parallel_execution_ff::map(

     std::tuple<InputIterators...> firsts,

     OutputIterator first_out,

     std::size_t sequence_size, Transformer transform_op) const

 {

   ff::ParallelFor pf{concurrency_degree_, true};

   pf.parallel_for(0, sequence_size,

     [=](const long delta) {

       *std::next(first_out, delta) = apply_iterators_indexed(transform_op, firsts, delta);

     },

     concurrency_degree_);

 }


 template <typename InputIterator, typename Identity, typename Combiner>

 auto parallel_execution_ff::reduce(InputIterator first,

     std::size_t sequence_size,

     Identity && identity,

     Combiner && combine_op) const

 {

   ff::ParallelForReduce<Identity> pfr{concurrency_degree_, true};

   Identity result{identity};


   pfr.parallel_reduce(result, identity, 0, sequence_size,

       [combine_op,first](long delta, auto & value) {

         value = combine_op(value, *std::next(first,delta));

       },

       [&result, combine_op](auto a, auto b) { result = combine_op(a,b); },

       concurrency_degree_);


   return result;

 }


 template <typename ... InputIterators, typename Identity,

           typename Transformer, typename Combiner>

 auto parallel_execution_ff::map_reduce(std::tuple<InputIterators...> firsts,

     std::size_t sequence_size,

     Identity && identity,

     Transformer && transform_op,

     Combiner && combine_op) const

 {

   std::vector<Identity> partial_outs(sequence_size);

   map(firsts, partial_outs.begin(), sequence_size,

       std::forward<Transformer>(transform_op));


   return reduce(partial_outs.begin(), sequence_size,

       std::forward<Identity>(identity),

       std::forward<Combiner>(combine_op));

 }


 template <typename ... InputIterators, typename OutputIterator,

           typename StencilTransformer, typename Neighbourhood>

 void parallel_execution_ff::stencil(std::tuple<InputIterators...> firsts,

     OutputIterator first_out,

     std::size_t sequence_size,

     StencilTransformer && transform_op,

     Neighbourhood && neighbour_op) const

 {

   ff::ParallelFor pf(concurrency_degree_, true);

   pf.parallel_for(0, sequence_size,

     [&](long delta) {

       const auto first_it = std::get<0>(firsts);

       auto next_chunks = iterators_next(firsts, delta);

       *std::next(first_out,delta) = transform_op(std::next(first_it,delta),

           apply_increment(neighbour_op, next_chunks) );

     },

     concurrency_degree_);

 }


 template <typename Generator, typename ... Transformers>

 void parallel_execution_ff::pipeline(

     Generator && generate_op,

     Transformers && ... transform_ops) const

 {

   detail_ff::pipeline_impl pipe{

       concurrency_degree_,

       ordering_,

       std::forward<Generator>(generate_op),

       std::forward<Transformers>(transform_ops)...};


   pipe.setFixedSize(false);

   pipe.run_and_wait_end();

 }


 template <typename Input, typename Divider,typename Predicate,

           typename Solver, typename Combiner>

 auto parallel_execution_ff::divide_conquer(Input & input,

     Divider && divide_op,

     Predicate && condition_op,

     Solver && solve_op,

     Combiner && combine_op) const

 {

   using output_type = typename std::result_of<Solver(Input)>::type;


   // divide

   auto divide_fn = [&](const Input &in, std::vector<Input> &subin) {

     subin = divide_op(in);

   };

   // combine

   auto combine_fn = [&] (std::vector<output_type>& in, output_type& out) {

     using index_t = typename std::vector<output_type>::size_type;

     out = in[0];

     for(index_t i = 1; i < in.size(); ++i)

       out = combine_op(out, in[i]);

   };

   // sequential solver (base-case)

   auto seq_fn = [&] (const Input & in , output_type & out) {

     out = solve_op(in);

   };

   // condition

   auto cond_fn = [&] (const Input &in) {

     return condition_op(in);

   };

   output_type out_var{};


   using dac_t = ff::ff_DC<Input,output_type>;

   auto ncores = static_cast<int>(std::thread::hardware_concurrency());

   int max_nworkers = std::max(concurrency_degree_, ncores);

   dac_t dac(divide_fn, combine_fn, seq_fn, cond_fn, //kernel functions

       input, out_var, //input/output variables

       concurrency_degree_, //parallelism degree

       dac_t::DEFAULT_OUTSTANDING_TASKS, max_nworkers //ff-specific params

       );


   // run

   dac.run_and_wait_end();


   return out_var;

 }


 } // end namespace grppi


 #else // GRPPI_FF undefined


 namespace grppi {


 struct parallel_execution_ff {};


 template <typename E>

 constexpr bool is_parallel_execution_ff() {

   return false;

 }


 }


 #endif // GRPPI_FF


 #endif

grppi::divide_conquer
auto divide_conquer(const Execution &ex, Input &&input, Divider &&divider_op, Solver &&solver_op, Combiner &&combiner_op)
Invoke md_divide-conquer. \parapm Execution Execution type.
Definition: divideconquer.h:49

grppi::map
void map(const Execution &ex, std::tuple< InputIterators... > firsts, InputIt last, OutputIt first_out, Transformer transform_op)
Invoke Map pattern on a data sequence.
Definition: map.h:51

grppi::map_reduce
auto map_reduce(const Execution &ex, std::tuple< InputIterators... > firsts, std::size_t size, Identity &&identity, Transformer &&transform_op, Combiner &&combine_op)
Invoke md_map-reduce on a data sequence.
Definition: mapreduce.h:52

grppi::pipeline
void pipeline(const Execution &ex, Generator &&generate_op, Transformers &&... transform_ops)
Invoke Pipeline pattern on a data stream.
Definition: pipeline.h:46

grppi::reduce
auto reduce(const Execution &ex, InputIt first, std::size_t size, Result &&identity, Combiner &&combine_op)
Invoke Reduce pattern with identity value on a data sequence with sequential execution.
Definition: reduce.h:50

grppi::stencil
void stencil(const Execution &ex, std::tuple< InputIterators... > firsts, std::size_t size, OutputIt out, StencilTransformer &&transform_op, Neighbourhood &&neighbour_op)
Invoke Stencil pattern on a data sequence with sequential execution.
Definition: stencil.h:54

grppi
Definition: callable_traits.h:21

grppi::apply_increment
decltype(auto) apply_increment(F &&f, T< Iterators... > &iterators)
Applies a callable object to the iterators in a tuple like-object and the increments those iterators....
Definition: iterator.h:101

grppi::is_parallel_execution_ff
constexpr bool is_parallel_execution_ff()
Metafunction that determines if type E is parallel_execution_ff This metafunction evaluates to false ...
Definition: parallel_execution_ff.h:457

grppi::iterators_next
auto iterators_next(T &&t, int n)
Computes next n steps from a tuple of iterators.
Definition: iterator.h:170

grppi::apply_iterators_indexed
decltype(auto) apply_iterators_indexed(F &&f, T &&t, std::size_t i)
Applies a callable object to the values obtained from the iterators in a tuple by indexing....
Definition: iterator.h:142

pipeline_impl.h

grppi::parallel_execution_ff
Definition: parallel_execution_ff.h:449