GrPPI  0.2
Generic and Reusable Parallel Pattern Interface
tbb/stencil.h
Go to the documentation of this file.
1 
21 #ifndef GRPPI_TBB_STENCIL_H
22 #define GRPPI_TBB_STENCIL_H
23 
24 #ifdef GRPPI_TBB
25 
26 #include "parallel_execution_tbb.h"
27 
28 #include <tbb/tbb.h>
29 
30 namespace grppi {
31 
32 template <typename InputIt, typename OutputIt, typename StencilTransformer,
33  typename Neighbourhood, typename ... OtherInputIts>
35  int elements_per_thread, int index,
36  InputIt first, InputIt last, OutputIt first_out,
37  StencilTransformer transform_op, Neighbourhood neighbour_op,
38  OtherInputIts ... other_firsts){
39  auto begin = next(first, elements_per_thread * index);
40  auto end = (index==ex.concurrency_degree()-1)?
41  last :
42  next(first, elements_per_thread * (index+1));
43  auto out = next(first_out, elements_per_thread * index);
44  advance_iterators(elements_per_thread* index, other_firsts ...);
45  while(begin!=end){
46  *out = transform_op(begin, neighbour_op(begin, other_firsts ...));
47  begin++;
48  advance_iterators(other_firsts...);
49  out++;
50  }
51 }
52 
53 
76 template <typename InputIt, typename OutputIt, typename StencilTransformer,
77  typename Neighbourhood>
79  InputIt first, InputIt last, OutputIt first_out,
80  StencilTransformer transform_op,
81  Neighbourhood neighbour_op)
82 {
83  int size = last - first;
84  int elements_per_thread = size/ex.concurrency_degree();
85  tbb::task_group g;
86 
87  for (int i=1; i<ex.concurrency_degree(); ++i) {
88  g.run(
89  [&neighbour_op, &transform_op, first, first_out, elements_per_thread,
90  i, last, ex]() {
91  auto begin = first + (elements_per_thread * i);
92  auto end = (i==ex.concurrency_degree()-1)?
93  last :
94  next(first, elements_per_thread * (i+1));
95 
96  auto out = next(first_out, elements_per_thread * i);
97  while (begin!=end) {
98  *out = transform_op(begin, neighbour_op(begin));
99  begin++;
100  out++;
101  }
102  }
103  );
104  }
105 
106  auto end = first + elements_per_thread;
107  while (first!=end) {
108  *first_out = transform_op(first, neighbour_op(first));
109  first++;
110  first_out++;
111  }
112 
113  g.wait();
114 }
115 
132 template <typename InputIt, typename OutputIt, typename StencilTransformer,
133  typename Neighbourhood, typename ... OtherInputIts>
135  InputIt first, InputIt last, OutputIt first_out,
136  StencilTransformer transform_op, Neighbourhood neighbour_op,
137  OtherInputIts ... other_firsts )
138 {
139  int size = distance(first,last);
140  int elements_per_thread = size/ex.concurrency_degree();
141  tbb::task_group g;
142  for(int index=1; index<ex.concurrency_degree(); ++index) {
143  g.run(
144  [neighbour_op, transform_op, first, first_out, elements_per_thread,
145  index, last, &ex, other_firsts...]()
146  {
147  stencil_impl(ex, elements_per_thread, index,
148  first, last, first_out, transform_op,
149  neighbour_op, other_firsts...);
150  }
151  );
152  }
153 
154  auto end = next(first, elements_per_thread);
155  while(first!=end){
156  *first_out = transform_op(first, neighbour_op(first,other_firsts ...));
157  first++;
158  advance_iterators( other_firsts ... );
159  first_out++;
160  }
161 
162  g.wait();
163 }
164 
170 }
171 
172 #endif
173 
174 #endif
Definition: callable_traits.h:24
void stencil(parallel_execution_native &ex, InputIt first, InputIt last, OutputIt first_out, StencilTransformer transform_op, Neighbourhood neighbour_op)
Invoke Stencil pattern on a data sequence with native parallel execution.
Definition: native/stencil.h:53
TBB parallel execution policy.
Definition: parallel_execution_tbb.h:37
void advance_iterators(size_t delta, InputIt &...in)
Definition: iterator.h:29
void stencil_impl(parallel_execution_tbb &ex, int elements_per_thread, int index, InputIt first, InputIt last, OutputIt first_out, StencilTransformer transform_op, Neighbourhood neighbour_op, OtherInputIts...other_firsts)
Definition: tbb/stencil.h:34
int concurrency_degree() const noexcept
Get number of grppi trheads.
Definition: parallel_execution_tbb.h:73