mdds
aos/block_util.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
3  *
4  * Copyright (c) 2021 Kohei Yoshida
5  *
6  * Permission is hereby granted, free of charge, to any person
7  * obtaining a copy of this software and associated documentation
8  * files (the "Software"), to deal in the Software without
9  * restriction, including without limitation the rights to use,
10  * copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following
13  * conditions:
14  *
15  * The above copyright notice and this permission notice shall be
16  * included in all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25  * OTHER DEALINGS IN THE SOFTWARE.
26  *
27  ************************************************************************/
28 
29 #ifndef INCLUDED_MDDS_MULTI_TYPE_VECTOR_DIR_AOS_BLOCK_UTIL_HPP
30 #define INCLUDED_MDDS_MULTI_TYPE_VECTOR_DIR_AOS_BLOCK_UTIL_HPP
31 
32 #include "mdds/global.hpp"
33 #include "../types.hpp"
34 
35 namespace mdds { namespace mtv { namespace aos { namespace detail {
36 
37 template<typename Blks, lu_factor_t F>
39 {
40  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
41  {
42  static_assert(invalid_static_int<F>, "The loop-unrolling factor must be one of 0, 4, 8, 16, or 32.");
43  }
44 };
45 
46 template<typename Blks>
47 struct adjust_block_positions<Blks, lu_factor_t::none>
48 {
49  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
50  {
51  int64_t n = blocks.size();
52 
53  if (start_block_index >= n)
54  return;
55 
56 #if MDDS_USE_OPENMP
57 #pragma omp parallel for
58 #endif
59  for (int64_t i = start_block_index; i < n; ++i)
60  blocks[i].position += delta;
61  }
62 };
63 
64 template<typename Blks>
65 struct adjust_block_positions<Blks, lu_factor_t::lu4>
66 {
67  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
68  {
69  int64_t n = blocks.size();
70 
71  if (start_block_index >= n)
72  return;
73 
74  // Ensure that the section length is divisible by 4.
75  int64_t len = n - start_block_index;
76  int64_t rem = len & 3; // % 4
77  len -= rem;
78  len += start_block_index;
79 #if MDDS_USE_OPENMP
80 #pragma omp parallel for
81 #endif
82  for (int64_t i = start_block_index; i < len; i += 4)
83  {
84  blocks[i].position += delta;
85  blocks[i + 1].position += delta;
86  blocks[i + 2].position += delta;
87  blocks[i + 3].position += delta;
88  }
89 
90  rem += len;
91  for (int64_t i = len; i < rem; ++i)
92  blocks[i].position += delta;
93  }
94 };
95 
96 template<typename Blks>
97 struct adjust_block_positions<Blks, lu_factor_t::lu8>
98 {
99  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
100  {
101  int64_t n = blocks.size();
102 
103  if (start_block_index >= n)
104  return;
105 
106  // Ensure that the section length is divisible by 8.
107  int64_t len = n - start_block_index;
108  int64_t rem = len & 7; // % 8
109  len -= rem;
110  len += start_block_index;
111 #if MDDS_USE_OPENMP
112 #pragma omp parallel for
113 #endif
114  for (int64_t i = start_block_index; i < len; i += 8)
115  {
116  blocks[i].position += delta;
117  blocks[i + 1].position += delta;
118  blocks[i + 2].position += delta;
119  blocks[i + 3].position += delta;
120  blocks[i + 4].position += delta;
121  blocks[i + 5].position += delta;
122  blocks[i + 6].position += delta;
123  blocks[i + 7].position += delta;
124  }
125 
126  rem += len;
127  for (int64_t i = len; i < rem; ++i)
128  blocks[i].position += delta;
129  }
130 };
131 
132 template<typename Blks>
133 struct adjust_block_positions<Blks, lu_factor_t::lu16>
134 {
135  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
136  {
137  int64_t n = blocks.size();
138 
139  if (start_block_index >= n)
140  return;
141 
142  // Ensure that the section length is divisible by 16.
143  int64_t len = n - start_block_index;
144  int64_t rem = len & 15; // % 16
145  len -= rem;
146  len += start_block_index;
147 #if MDDS_USE_OPENMP
148 #pragma omp parallel for
149 #endif
150  for (int64_t i = start_block_index; i < len; i += 16)
151  {
152  blocks[i].position += delta;
153  blocks[i + 1].position += delta;
154  blocks[i + 2].position += delta;
155  blocks[i + 3].position += delta;
156  blocks[i + 4].position += delta;
157  blocks[i + 5].position += delta;
158  blocks[i + 6].position += delta;
159  blocks[i + 7].position += delta;
160  blocks[i + 8].position += delta;
161  blocks[i + 9].position += delta;
162  blocks[i + 10].position += delta;
163  blocks[i + 11].position += delta;
164  blocks[i + 12].position += delta;
165  blocks[i + 13].position += delta;
166  blocks[i + 14].position += delta;
167  blocks[i + 15].position += delta;
168  }
169 
170  rem += len;
171  for (int64_t i = len; i < rem; ++i)
172  blocks[i].position += delta;
173  }
174 };
175 
176 template<typename Blks>
177 struct adjust_block_positions<Blks, lu_factor_t::lu32>
178 {
179  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
180  {
181  int64_t n = blocks.size();
182 
183  if (start_block_index >= n)
184  return;
185 
186  // Ensure that the section length is divisible by 32.
187  int64_t len = n - start_block_index;
188  int64_t rem = len & 31; // % 32
189  len -= rem;
190  len += start_block_index;
191 #if MDDS_USE_OPENMP
192 #pragma omp parallel for
193 #endif
194  for (int64_t i = start_block_index; i < len; i += 32)
195  {
196  blocks[i].position += delta;
197  blocks[i + 1].position += delta;
198  blocks[i + 2].position += delta;
199  blocks[i + 3].position += delta;
200  blocks[i + 4].position += delta;
201  blocks[i + 5].position += delta;
202  blocks[i + 6].position += delta;
203  blocks[i + 7].position += delta;
204  blocks[i + 8].position += delta;
205  blocks[i + 9].position += delta;
206  blocks[i + 10].position += delta;
207  blocks[i + 11].position += delta;
208  blocks[i + 12].position += delta;
209  blocks[i + 13].position += delta;
210  blocks[i + 14].position += delta;
211  blocks[i + 15].position += delta;
212  blocks[i + 16].position += delta;
213  blocks[i + 17].position += delta;
214  blocks[i + 18].position += delta;
215  blocks[i + 19].position += delta;
216  blocks[i + 20].position += delta;
217  blocks[i + 21].position += delta;
218  blocks[i + 22].position += delta;
219  blocks[i + 23].position += delta;
220  blocks[i + 24].position += delta;
221  blocks[i + 25].position += delta;
222  blocks[i + 26].position += delta;
223  blocks[i + 27].position += delta;
224  blocks[i + 28].position += delta;
225  blocks[i + 29].position += delta;
226  blocks[i + 30].position += delta;
227  blocks[i + 31].position += delta;
228  }
229 
230  rem += len;
231  for (int64_t i = len; i < rem; ++i)
232  blocks[i].position += delta;
233  }
234 };
235 
236 }}}} // namespace mdds::mtv::aos::detail
237 
238 #endif
239 
240 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: aos/block_util.hpp:39