GrPPI  0.2
Generic and Reusable Parallel Pattern Interface
omp/stencil.h
Go to the documentation of this file.
1 
21 #ifndef GRPPI_OMP_STENCIL_H
22 #define GRPPI_OMP_STENCIL_H
23 
24 #ifdef GRPPI_OMP
25 
26 #include "parallel_execution_omp.h"
27 
28 namespace grppi {
29 
30 template <typename InputIt, typename OutputIt, typename StencilTransformer, typename Neighbourhood,
31  typename ... OtherInputIts>
33  InputIt first, InputIt last, OutputIt first_out,
34  StencilTransformer transform_op,
35  Neighbourhood neighbour_op,
36  int i, int elements_per_thread,
37  OtherInputIts ... other_firsts )
38 {
39  auto begin = next(first, elements_per_thread * i);
40  auto end = (i==ex.concurrency_degree()-1)?
41  last :
42  next(first, elements_per_thread * (i+1));
43 
44  auto out = next(first_out, elements_per_thread * i);
45 
46  advance_iterators(elements_per_thread*i, other_firsts ...);
47  while (begin!=end) {
48  *out = transform_op(begin, neighbour_op(begin,other_firsts ... ));
49  begin++;
50  advance_iterators(other_firsts...);
51  out++;
52  }
53 }
54 
77 template <typename InputIt, typename OutputIt, typename StencilTransformer,
78  typename Neighbourhood>
80  InputIt first, InputIt last, OutputIt first_out,
81  StencilTransformer transform_op, Neighbourhood neighbour_op)
82 {
83  int size = last - first;
84  int elements_per_thread = size/ex.concurrency_degree();
85  #pragma omp parallel
86  {
87  #pragma omp single nowait
88  {
89  for(int i=1; i<ex.concurrency_degree(); i++) {
90  #pragma omp task firstprivate(i)
91  {
92  auto begin = next(first, elements_per_thread * i);
93  auto end = (i==ex.concurrency_degree()-1)?
94  last :
95  next(first, elements_per_thread * (i+1));
96  auto out = next(first_out, elements_per_thread * i);
97 
98  while(begin!=end){
99  *out = transform_op(begin, neighbour_op(begin));
100  begin++;
101  out++;
102  }
103  }
104  }
105 
106  auto begin = first;
107  auto end = next(first, elements_per_thread);
108  auto out = first_out;
109  while (begin!=end) {
110  *out = transform_op(begin, neighbour_op(begin));
111  begin++;
112  out++;
113  }
114  #pragma omp taskwait
115  }
116  }
117 }
118 
135 template <typename InputIt, typename OutputIt, typename StencilTransformer,
136  typename Neighbourhood, typename ... OtherInputIts>
138  InputIt first, InputIt last, OutputIt first_out,
139  StencilTransformer && transform_op, Neighbourhood && neighbour_op,
140  OtherInputIts ... other_firsts )
141 {
142  int size = distance(first,last);
143  int elements_per_thread = size/ex.concurrency_degree();
144  #pragma omp parallel
145  {
146  #pragma omp single nowait
147  {
148  for (int i=1; i<ex.concurrency_degree(); ++i) {
149  #pragma omp task firstprivate(i)
150  {
151  internal_stencil(ex, first, last, first_out,
152  std::forward<StencilTransformer>(transform_op),
153  std::forward<Neighbourhood>(neighbour_op),
154  i,elements_per_thread,
155  other_firsts...);
156  }
157  }
158 
159  auto begin = first;
160  auto out = first_out;
161  auto end = next(first, elements_per_thread);
162  while (begin!=end) {
163  *out = transform_op(*begin, neighbour_op(begin,other_firsts...));
164  begin++;
165  advance_iterators( other_firsts ... );
166  out++;
167  }
168 
169  #pragma omp taskwait
170  }
171  }
172 }
173 
179 }
180 
181 #endif
182 
183 #endif
Definition: callable_traits.h:24
void internal_stencil(parallel_execution_omp &ex, InputIt first, InputIt last, OutputIt first_out, StencilTransformer transform_op, Neighbourhood neighbour_op, int i, int elements_per_thread, OtherInputIts...other_firsts)
Definition: omp/stencil.h:32
void stencil(parallel_execution_native &ex, InputIt first, InputIt last, OutputIt first_out, StencilTransformer transform_op, Neighbourhood neighbour_op)
Invoke Stencil pattern on a data sequence with native parallel execution.
Definition: native/stencil.h:53
OpenMP parallel execution policy.
Definition: parallel_execution_omp.h:40
void advance_iterators(size_t delta, InputIt &...in)
Definition: iterator.h:29
int concurrency_degree() const noexcept
Get number of grppi trheads.
Definition: parallel_execution_omp.h:85