Kokkos Core Kernels Package  Version of the Day
Kokkos_Tuners.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #ifndef KOKKOS_KOKKOS_TUNERS_HPP
46 #define KOKKOS_KOKKOS_TUNERS_HPP
47 
48 #include <Kokkos_Macros.hpp>
49 #include <Kokkos_Core_fwd.hpp>
50 #include <Kokkos_ExecPolicy.hpp>
51 #include <KokkosExp_MDRangePolicy.hpp>
52 #include <impl/Kokkos_Profiling_Interface.hpp>
53 
54 #include <array>
55 #include <utility>
56 #include <tuple>
57 #include <string>
58 #include <vector>
59 #include <map>
60 #include <cassert>
61 
62 namespace Kokkos {
63 namespace Tools {
64 
65 namespace Experimental {
66 
67 // forward declarations
68 SetOrRange make_candidate_set(size_t size, int64_t* data);
69 bool have_tuning_tool();
70 size_t declare_output_type(const std::string&,
71  Kokkos::Tools::Experimental::VariableInfo);
72 void request_output_values(size_t, size_t,
73  Kokkos::Tools::Experimental::VariableValue*);
74 VariableValue make_variable_value(size_t, int64_t);
75 VariableValue make_variable_value(size_t, double);
76 SetOrRange make_candidate_range(double lower, double upper, double step,
77  bool openLower, bool openUpper);
78 size_t get_new_context_id();
79 void begin_context(size_t context_id);
80 void end_context(size_t context_id);
81 namespace Impl {
82 
88 template <typename ValueType, typename ContainedType>
90 
91 template <typename ValueType, typename ContainedType>
92 struct ValueHierarchyNode {
93  std::vector<ValueType> root_values;
94  std::vector<ContainedType> sub_values;
95  void add_root_value(const ValueType& in) noexcept {
96  root_values.push_back(in);
97  }
98  void add_sub_container(const ContainedType& in) { sub_values.push_back(in); }
99  const ValueType& get_root_value(const size_t index) const {
100  return root_values[index];
101  }
102  const ContainedType& get_sub_value(const size_t index) const {
103  return sub_values[index];
104  }
105 };
106 
107 template <typename ValueType>
108 struct ValueHierarchyNode<ValueType, void> {
109  std::vector<ValueType> root_values;
110  explicit ValueHierarchyNode(std::vector<ValueType> rv)
111  : root_values(std::move(rv)) {}
112  void add_root_value(const ValueType& in) noexcept {
113  root_values.push_back(in);
114  }
115  const ValueType& get_root_value(const size_t index) const {
116  return root_values[index];
117  }
118 };
119 
125 template <class NestedMap>
127 
128 // Vectors are our lowest-level, no nested values
129 template <class T>
130 struct MapTypeConverter<std::vector<T>> {
131  using type = ValueHierarchyNode<T, void>;
132 };
133 
134 // Maps contain both the "root" types and sub-vectors
135 template <class K, class V>
136 struct MapTypeConverter<std::map<K, V>> {
138 };
139 
145 template <class NestedMap>
147 
148 // Vectors are our lowest-level, no nested values. Just fill in the fundamental
149 // values
150 template <class T>
151 struct ValueHierarchyConstructor<std::vector<T>> {
152  using return_type = typename MapTypeConverter<std::vector<T>>::type;
153  static return_type build(const std::vector<T>& in) { return return_type{in}; }
154 };
155 
156 // For maps, we need to fill in the fundamental values, and construct child
157 // nodes
158 template <class K, class V>
159 struct ValueHierarchyConstructor<std::map<K, V>> {
160  using return_type = typename MapTypeConverter<std::map<K, V>>::type;
161  static return_type build(const std::map<K, V>& in) {
162  return_type node_to_build;
163  for (auto& entry : in) {
164  node_to_build.add_root_value(entry.first);
165  node_to_build.add_sub_container(
166  ValueHierarchyConstructor<V>::build(entry.second));
167  }
168  return node_to_build;
169  }
170 };
171 
180 template <class InspectForDepth>
182 
183 // The dimensionality of a vector is 1
184 template <class T>
185 struct get_space_dimensionality<std::vector<T>> {
186  static constexpr int value = 1;
187 };
188 
189 // The dimensionality of a map is 1 (the map) plus the dimensionality
190 // of the map's value type
191 template <class K, class V>
192 struct get_space_dimensionality<std::map<K, V>> {
193  static constexpr int value = 1 + get_space_dimensionality<V>::value;
194 };
195 
196 template <class T, int N>
197 struct n_dimensional_sparse_structure;
198 
199 template <class T>
200 struct n_dimensional_sparse_structure<T, 1> {
201  using type = std::vector<T>;
202 };
203 
204 template <class T, int N>
205 struct n_dimensional_sparse_structure {
206  using type =
207  std::map<T, typename n_dimensional_sparse_structure<T, N - 1>::type>;
208 };
209 
216 // First, a helper to get the value in one dimension
217 template <class Container>
219 
220 // At any given level, just return your value at that level
221 template <class RootType, class Subtype>
222 struct DimensionValueExtractor<ValueHierarchyNode<RootType, Subtype>> {
223  static RootType get(const ValueHierarchyNode<RootType, Subtype>& dimension,
224  double fraction_to_traverse) {
225  size_t index = dimension.root_values.size() * fraction_to_traverse;
226  return dimension.get_root_value(index);
227  }
228 };
229 
235 // At the bottom level, we have one double and a base-level ValueHierarchyNode
236 
237 template <class HierarchyNode, class... InterpolationIndices>
239 
240 template <class ValueType>
241 struct GetMultidimensionalPoint<ValueHierarchyNode<ValueType, void>, double> {
242  using node_type = ValueHierarchyNode<ValueType, void>;
243  using return_type = std::tuple<ValueType>;
244  static return_type build(const node_type& in, double index) {
245  return std::make_tuple(DimensionValueExtractor<node_type>::get(in, index));
246  }
247 };
248 
249 // At levels above the bottom, we tuple_cat the result of our child on the end
250 // of our own tuple
251 template <class ValueType, class Subtype, class... Indices>
252 struct GetMultidimensionalPoint<ValueHierarchyNode<ValueType, Subtype>, double,
253  Indices...> {
254  using node_type = ValueHierarchyNode<ValueType, Subtype>;
255  using sub_tuple =
256  typename GetMultidimensionalPoint<Subtype, Indices...>::return_type;
257  using return_type = decltype(std::tuple_cat(
258  std::declval<std::tuple<ValueType>>(), std::declval<sub_tuple>()));
259  static return_type build(const node_type& in, double fraction_to_traverse,
260  Indices... indices) {
261  size_t index = in.sub_values.size() * fraction_to_traverse;
262  auto dimension_value = std::make_tuple(
263  DimensionValueExtractor<node_type>::get(in, fraction_to_traverse));
264  return std::tuple_cat(dimension_value,
265  GetMultidimensionalPoint<Subtype, Indices...>::build(
266  in.get_sub_value(index), indices...));
267  }
268 };
269 
270 template <typename PointType, class ArrayType, size_t... Is>
271 auto get_point_helper(const PointType& in, const ArrayType& indices,
272  std::index_sequence<Is...>) {
273  using helper = GetMultidimensionalPoint<
274  PointType,
275  decltype(std::get<Is>(std::declval<ArrayType>()).value.double_value)...>;
276  return helper::build(in, std::get<Is>(indices).value.double_value...);
277 }
278 
279 template <typename PointType, typename ArrayType>
280 struct GetPoint;
281 
282 template <typename PointType, size_t X>
283 struct GetPoint<PointType,
284  std::array<Kokkos::Tools::Experimental::VariableValue, X>> {
285  using index_set_type =
286  std::array<Kokkos::Tools::Experimental::VariableValue, X>;
287  static auto build(const PointType& in, const index_set_type& indices) {
288  return get_point_helper(in, indices, std::make_index_sequence<X>{});
289  }
290 };
291 
292 template <typename PointType, typename ArrayType>
293 auto get_point(const PointType& point, const ArrayType& indices) {
294  return GetPoint<PointType, ArrayType>::build(point, indices);
295 }
296 
297 } // namespace Impl
298 
299 template <template <class...> class Container, size_t MaxDimensionSize = 100,
300  class... TemplateArguments>
301 class MultidimensionalSparseTuningProblem {
302  public:
303  using ProblemSpaceInput = Container<TemplateArguments...>;
304  static constexpr int space_dimensionality =
305  Impl::get_space_dimensionality<ProblemSpaceInput>::value;
306  static constexpr size_t max_space_dimension_size = MaxDimensionSize;
307  static constexpr double tuning_min = 0.0;
308  static constexpr double tuning_max = 0.999;
309  static constexpr double tuning_step = tuning_max / max_space_dimension_size;
310 
311  using StoredProblemSpace =
312  typename Impl::MapTypeConverter<ProblemSpaceInput>::type;
313  using HierarchyConstructor =
314  typename Impl::ValueHierarchyConstructor<Container<TemplateArguments...>>;
315 
316  using ValueArray = std::array<Kokkos::Tools::Experimental::VariableValue,
317  space_dimensionality>;
318 
319  private:
320  StoredProblemSpace m_space;
321  std::array<size_t, space_dimensionality> variable_ids;
322  size_t context;
323 
324  public:
325  MultidimensionalSparseTuningProblem() = default;
326  MultidimensionalSparseTuningProblem(ProblemSpaceInput space,
327  const std::vector<std::string>& names)
328  : m_space(HierarchyConstructor::build(space)) {
329  assert(names.size() == space_dimensionality);
330  for (unsigned long x = 0; x < names.size(); ++x) {
331  VariableInfo info;
332  info.type = Kokkos::Tools::Experimental::ValueType::kokkos_value_double;
333  info.category = Kokkos::Tools::Experimental::StatisticalCategory::
334  kokkos_value_interval;
335  info.valueQuantity =
336  Kokkos::Tools::Experimental::CandidateValueType::kokkos_value_range;
337  info.candidates = Kokkos::Tools::Experimental::make_candidate_range(
338  tuning_min, tuning_max, tuning_step, true, true);
339  variable_ids[x] = declare_output_type(names[x], info);
340  }
341  }
342 
343  auto begin() {
344  context = Kokkos::Tools::Experimental::get_new_context_id();
345  ValueArray values;
346  for (int x = 0; x < space_dimensionality; ++x) {
347  values[x] = Kokkos::Tools::Experimental::make_variable_value(
348  variable_ids[x], 0.0);
349  }
350  begin_context(context);
351  request_output_values(context, space_dimensionality, values.data());
352  return get_point(m_space, values);
353  }
354 
355  auto end() { end_context(context); }
356 };
357 
358 template <size_t MaxDimensionSize = 100, template <class...> class Container,
359  class... TemplateArguments>
360 auto make_multidimensional_sparse_tuning_problem(
361  const Container<TemplateArguments...>& in, std::vector<std::string> names) {
362  return MultidimensionalSparseTuningProblem<Container, MaxDimensionSize,
363  TemplateArguments...>(in, names);
364 }
365 class TeamSizeTuner {
366  private:
367  using SpaceDescription = std::map<int64_t, std::vector<int64_t>>;
368  using TunerType = decltype(make_multidimensional_sparse_tuning_problem<20>(
369  std::declval<SpaceDescription>(),
370  std::declval<std::vector<std::string>>()));
371  TunerType tuner;
372 
373  public:
374  TeamSizeTuner() = default;
375  TeamSizeTuner& operator=(const TeamSizeTuner& other) = default;
376  TeamSizeTuner(const TeamSizeTuner& other) = default;
377  TeamSizeTuner& operator=(TeamSizeTuner&& other) = default;
378  TeamSizeTuner(TeamSizeTuner&& other) = default;
379  template <typename ViableConfigurationCalculator, typename Functor,
380  typename TagType, typename... Properties>
381  TeamSizeTuner(const std::string& name,
383  const Functor& functor, const TagType& tag,
384  ViableConfigurationCalculator calc) {
385  using PolicyType = Kokkos::TeamPolicy<Properties...>;
386  auto initial_vector_length = policy.impl_vector_length();
387  if (initial_vector_length < 1) {
388  policy.impl_set_vector_length(1);
389  }
415  SpaceDescription space_description;
416 
417  auto max_vector_length = PolicyType::vector_length_max();
418  std::vector<int64_t> allowed_vector_lengths;
419 
420  if (policy.impl_auto_vector_length()) { // case 1 or 2
421  for (int vector_length = max_vector_length; vector_length >= 1;
422  vector_length /= 2) {
423  policy.impl_set_vector_length(vector_length);
436  auto max_team_size = calc.get_max_team_size(policy, functor, tag);
437  if ((policy.impl_auto_team_size()) ||
438  (policy.team_size() <= max_team_size)) {
439  allowed_vector_lengths.push_back(vector_length);
440  }
441  }
442  } else { // case 3, there's only one vector length to care about
443  allowed_vector_lengths.push_back(policy.impl_vector_length());
444  }
445 
446  for (const auto vector_length : allowed_vector_lengths) {
447  std::vector<int64_t> allowed_team_sizes;
448  policy.impl_set_vector_length(vector_length);
449  auto max_team_size = calc.get_max_team_size(policy, functor, tag);
450  if (policy.impl_auto_team_size()) { // case 1 or 3, try all legal team
451  // sizes
452  for (int team_size = max_team_size; team_size >= 1; team_size /= 2) {
453  allowed_team_sizes.push_back(team_size);
454  }
455  } else { // case 2, just try the provided team size
456  allowed_team_sizes.push_back(policy.team_size());
457  }
458  space_description[vector_length] = allowed_team_sizes;
459  }
460  tuner = make_multidimensional_sparse_tuning_problem<20>(
461  space_description, {std::string(name + "_vector_length"),
462  std::string(name + "_team_size")});
463  policy.impl_set_vector_length(initial_vector_length);
464  }
465 
466  template <typename... Properties>
467  void tune(Kokkos::TeamPolicy<Properties...>& policy) {
468  if (Kokkos::Tools::Experimental::have_tuning_tool()) {
469  auto configuration = tuner.begin();
470  auto team_size = std::get<1>(configuration);
471  auto vector_length = std::get<0>(configuration);
472  if (vector_length > 0) {
473  policy.impl_set_team_size(team_size);
474  policy.impl_set_vector_length(vector_length);
475  }
476  }
477  }
478  void end() {
479  if (Kokkos::Tools::Experimental::have_tuning_tool()) {
480  tuner.end();
481  }
482  }
483 
484  private:
485 };
486 
487 namespace Impl {
488 
489 template <typename T>
490 void fill_tile(std::vector<T>& cont, int tile_size) {
491  for (int x = 1; x < tile_size; x *= 2) {
492  cont.push_back(x);
493  }
494 }
495 template <typename T, typename Mapped>
496 void fill_tile(std::map<T, Mapped>& cont, int tile_size) {
497  for (int x = 1; x < tile_size; x *= 2) {
498  fill_tile(cont[x], tile_size / x);
499  }
500 }
501 } // namespace Impl
502 
503 template <int MDRangeRank>
504 struct MDRangeTuner {
505  private:
506  static constexpr int rank = MDRangeRank;
507  static constexpr int max_slices = 15;
508  using SpaceDescription =
509  typename Impl::n_dimensional_sparse_structure<int, rank>::type;
510  using TunerType =
511  decltype(make_multidimensional_sparse_tuning_problem<max_slices>(
512  std::declval<SpaceDescription>(),
513  std::declval<std::vector<std::string>>()));
514  TunerType tuner;
515 
516  public:
517  MDRangeTuner() = default;
518  template <typename Functor, typename TagType, typename Calculator,
519  typename... Properties>
520  MDRangeTuner(const std::string& name,
521  const Kokkos::MDRangePolicy<Properties...>& policy,
522  const Functor& functor, const TagType& tag, Calculator calc) {
523  SpaceDescription desc;
524  int max_tile_size =
525  calc.get_mdrange_max_tile_size_product(policy, functor, tag);
526  Impl::fill_tile(desc, max_tile_size);
527  std::vector<std::string> feature_names;
528  for (int x = 0; x < rank; ++x) {
529  feature_names.push_back(name + "_tile_size_" + std::to_string(x));
530  }
531  tuner = make_multidimensional_sparse_tuning_problem<max_slices>(
532  desc, feature_names);
533  }
534  template <typename Policy, typename Tuple, size_t... Indices>
535  void set_policy_tile(Policy& policy, const Tuple& tuple,
536  const std::index_sequence<Indices...>&) {
537  policy.impl_change_tile_size({std::get<Indices>(tuple)...});
538  }
539  template <typename... Properties>
540  void tune(Kokkos::MDRangePolicy<Properties...>& policy) {
541  if (Kokkos::Tools::Experimental::have_tuning_tool()) {
542  auto configuration = tuner.begin();
543  set_policy_tile(policy, configuration, std::make_index_sequence<rank>{});
544  }
545  }
546  void end() {
547  if (Kokkos::Tools::Experimental::have_tuning_tool()) {
548  tuner.end();
549  }
550  }
551 };
552 
553 } // namespace Experimental
554 } // namespace Tools
555 } // namespace Kokkos
556 
557 #endif
Execution policy for parallel work over a league of teams of threads.
Definition: dummy.cpp:3