openPMD-api
StatefulIterator.hpp
1 /* Copyright 2021 Franz Poeschel
2  *
3  * This file is part of openPMD-api.
4  *
5  * openPMD-api is free software: you can redistribute it and/or modify
6  * it under the terms of of either the GNU General Public License or
7  * the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * openPMD-api is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License and the GNU Lesser General Public License
15  * for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * and the GNU Lesser General Public License along with openPMD-api.
19  * If not, see <http://www.gnu.org/licenses/>.
20  */
21 #pragma once
22 
23 #include "openPMD/Error.hpp"
24 #include "openPMD/Iteration.hpp"
25 #include "openPMD/Series.hpp"
26 #include "openPMD/Streaming.hpp"
27 #include "openPMD/auxiliary/Variant.hpp"
28 #include "openPMD/backend/ParsePreference.hpp"
29 #include "openPMD/snapshots/IteratorTraits.hpp"
30 
31 #include <deque>
32 #include <iostream>
33 #include <optional>
34 #include <set>
35 #include <unordered_map>
36 #include <variant>
37 #include <vector>
38 
39 /*
40  * Private header not included in user code.
41  * Implements the Iterator interface for the stateful/synchronous workflow.
42  */
43 
44 namespace openPMD
45 {
46 namespace internal
47 {
48  class SeriesData;
49 }
50 
51 namespace detail
52 {
53  /* The iterator status is either of the following:
54  */
55  namespace step_status_types
56  {
57  /* No step was opened yet, the Series was just opened.
58  */
59  struct Before_t
60  {};
61  /* A step is currently active
62  */
63  struct During_t
64  {
65  // The index of the current Step.
66  size_t step_count;
67  // The current Iteration within the Step.
68  // Empty optional indicates that no Iteration is left in the current
69  // step for processing, i.e. a new step must be opened or the Series
70  // is over.
71  std::optional<Iteration::IterationIndex_t> iteration_idx;
72  // Iteration indexes that are accessible within the current step.
73  // These are not modified when closing an Iteration as long as the
74  // current IO step stays active.
75  std::vector<Iteration::IterationIndex_t>
76  available_iterations_in_step;
77 
78  During_t(
79  size_t step_count,
80  std::optional<Iteration::IterationIndex_t> iteration_idx,
81  std::vector<Iteration::IterationIndex_t>
82  available_iterations_in_step);
83  };
84  /* No further data available in the Series.
85  */
86  struct After_t
87  {};
88  } // namespace step_status_types
89 
90  /* This class unifies the current step status as described above into a
91  * std::variant with some helper functions.
92  */
93  struct CurrentStep
94  : std::variant<
95  step_status_types::Before_t,
96  step_status_types::During_t,
97  step_status_types::After_t>
98  {
100  constexpr static Before_t Before{};
103  constexpr static After_t After{};
104 
105  using variant_t = std::variant<
109 
110  using variant_t::operator=;
111 
112  template <typename V>
113  auto get_variant() -> std::optional<V *>;
114  template <typename V>
115  auto get_variant() const -> std::optional<V const *>;
116 
117  auto get_iteration_index() const
118  -> std::optional<Iteration::IterationIndex_t>;
119 
120  /* Passed as first param of create_new lambda in map_during_t, so the
121  * lambda can make an appropriate case distinction.
122  */
123  enum class AtTheEdge : bool
124  {
125  Begin,
126  End
127  };
128 
129  /*
130  * Helper for a common way to access the underlying variant.
131  * `map` has type `auto (During_t &) -> void`, i.e. it can modify the
132  * `During_t` struct if the variant holds it. In other cases,
133  * `create_new` is called, it has the type
134  * `auto (AtTheEdge) -> std::optional<T>`.
135  * `AtTheEdge` is used for specifying if the variant status is Begin or
136  * End. If the returned optional contains a value, that value is swapped
137  * with the current variant.
138  */
139  template <typename F, typename G>
140  void map_during_t(F &&map, G &&create_new);
141 
142  /*
143  * Overload where `create_new` is a no-op.
144  */
145  template <typename F>
146  void map_during_t(F &&map);
147 
148  // casts needed because of
149  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90943
150  inline auto as_base() const -> variant_t const &
151  {
152  return *this;
153  }
154  inline auto as_base() -> variant_t &
155  {
156  return *this;
157  }
158  };
159 
160  /*
161  * Types for telling the Iterator where to go next.
162  */
163  namespace seek_types
164  {
165  /* Just give me the next Iteration */
166  struct Next_t
167  {};
168  /* Give me some specific Iteration */
170  {
171  Iteration::IterationIndex_t iteration_idx;
172  };
173  } // namespace seek_types
174 
175  struct Seek : std::variant<seek_types::Next_t, seek_types::Seek_Iteration_t>
176  {
177  using Next_t = seek_types::Next_t;
179 
180  static constexpr Next_t const Next{};
181 
182  using variant_t =
183  std::variant<seek_types::Next_t, seek_types::Seek_Iteration_t>;
184  // casts needed because of
185  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90943
186  inline auto as_base() const -> variant_t const &
187  {
188  return *this;
189  }
190  inline auto as_base() -> variant_t &
191  {
192  return *this;
193  }
194  };
195 } // namespace detail
196 
201  : public AbstractSeriesIterator<
202  StatefulIterator,
203  Container<Iteration, Iteration::IterationIndex_t>::value_type>
204 {
205  friend class StatefulSnapshotsContainer;
206  friend class Series;
207  friend class internal::SeriesData;
208 
209  using iteration_index_t = IndexedIteration::index_t;
210 
212 
213  struct SharedData
214  {
215  SharedData() = default;
216  SharedData(SharedData const &) = delete;
217  SharedData(SharedData &&) = delete;
218  SharedData &operator=(SharedData const &) = delete;
219  SharedData &operator=(SharedData &&) = delete;
220 
221  ~SharedData();
222 
223  using step_index = size_t;
224 
225  /*
226  * This must be a non-owning internal handle to break reference cycles.
227  * A non-owning handle is fine due to the usual semantics for iterator
228  * invalidation.
229  */
230  Series series;
231  /*
232  * No step opened yet, so initialize this with CurrentStep::Before. Look
233  * to the documentation of Before_t, During_t, After_t and CurrentStep
234  * classes above for more info.
235  */
236  CurrentStep currentStep = {CurrentStep::Before};
237  /*
238  * Stores the parse preference optionally passed in the constructor.
239  * Decides if IO step logic is actually used.
240  */
241  std::optional<internal::ParsePreference> parsePreference;
242  /*
243  * Store which Iterations we already saw and in which IO step we did.
244  * Currently used for eliminating repetitions when (e.g. due to
245  * checkpoint-restart workflows) Iterations repeat in different steps.
246  *
247  * Possible future uses:
248  *
249  * 1. Support jumping back to a previous step in order to reopen an
250  * Iteration previously seen. (Would require reopening files in
251  * ADIOS2, but so be it.)
252  * 2. Pre-parsing a variable-based file for repeating Iterations and
253  * eliminating the earlier instances of repeated Iterations (instead
254  * of the later instances as is done now).
255  */
256  std::unordered_map<iteration_index_t, step_index> seen_iterations;
257 
258  /*
259  * This returns the current value of `During_t::iteration_idx` if that
260  * exists.
261  */
262  auto currentIteration() const
263  -> std::optional<Iteration::IterationIndex_t>;
264  };
265 
266  /*
267  * The shared pointer is never empty,
268  * emptiness is indicated by std::optional.
269  */
270  std::shared_ptr<std::optional<SharedData>> m_data =
271  std::make_shared<std::optional<SharedData>>(std::nullopt);
272 
273  auto get() -> SharedData &;
274  auto get() const -> SharedData const &;
275 
279 
280 public:
281  using value_type =
283  using typename parent_t ::difference_type;
284  using Seek = detail::Seek;
285 
287  explicit StatefulIterator();
288  ~StatefulIterator() override;
289 
290  StatefulIterator(StatefulIterator const &other);
291  StatefulIterator(StatefulIterator &&other) noexcept;
292 
293  StatefulIterator &operator=(StatefulIterator const &other);
294  StatefulIterator &operator=(StatefulIterator &&other) noexcept;
295 
297  {};
298  static constexpr tag_write_t const tag_write{};
300  {};
301  static constexpr tag_read_t const tag_read{};
302 
304  tag_read_t,
305  Series const &,
306  std::optional<internal::ParsePreference> const &parsePreference);
307 
308  StatefulIterator(tag_write_t, Series const &);
309 
310  // dereference
311  auto operator*() -> value_type &;
312  auto operator*() const -> value_type const &;
313 
314  // increment/decrement
315  auto operator++() -> StatefulIterator &;
316  auto operator--() -> StatefulIterator &;
317  auto operator--(int) -> StatefulIterator;
318  auto operator++(int) -> StatefulIterator;
319 
320  // comparison
321  auto operator-(StatefulIterator const &) const -> difference_type;
322  bool operator==(StatefulIterator const &other) const;
323  auto operator<(StatefulIterator const &) const -> bool;
324 
325  static auto end() -> StatefulIterator;
326  /*
327  * This is considered an end iterator if:
328  *
329  * 1. The iterator has no state at all
330  * (generic statically created end iterator)
331  * 2. The state is During_t with no iteration index
332  * (finished reading iterations in a randomly-accessible Series)
333  * 3. The state is After_t
334  * (closed the last step in a step-wise Series)
335  */
336  auto is_end() const -> bool;
337 
338  operator bool() const;
339 
340  /*
341  * Try moving this Iterator to the location specified by Seek, i.e.:
342  *
343  * 1. Either the next available Iteration
344  * 2. Or a specific Iteration specified by an index.
345  *
346  * A new step will be opened for this purpose if needed.
347  */
348  auto seek(Seek const &) -> StatefulIterator &;
349 
350 private:
351  std::optional<StatefulIterator *> nextIterationInStep();
352  std::optional<StatefulIterator *> skipToIterationInStep(iteration_index_t);
353 
354  /*
355  * When a step cannot successfully be opened, the method nextStep() calls
356  * itself again recursively.
357  * (Recursion massively simplifies the logic here, and it only happens
358  * in case of error.)
359  * After successfully beginning a step, this methods needs to remember, how
360  * many broken steps have been skipped. In case the Series does not use
361  * the /data/snapshot attribute, this helps figuring out which iteration
362  * is now active. Hence, recursion_depth.
363  */
364  std::optional<StatefulIterator *> nextStep(size_t recursion_depth);
365 
366  std::optional<StatefulIterator *> loopBody(Seek const &);
367 
368  void initIteratorFilebased();
369 
370  /*
371  * Called when an Iteration was just opened but entirely fails parsing.
372  */
373  void deactivateDeadIteration(iteration_index_t);
374 
375  void initSeriesInLinearReadMode();
376 
377  void close();
378 
379  /* When not using IO steps, the status should not be set to After_t, but be
380  * kept as During_t. This way, Iterations can still be opened without the
381  * Iterator thinking it's from a past step.
382  */
383  enum class TypeOfEndIterator : char
384  {
385  NoMoreSteps,
386  NoMoreIterationsInStep
387  };
388  auto turn_into_end_iterator(TypeOfEndIterator) -> void;
389  auto assert_end_iterator() const -> void;
390 
391  auto resetCurrentIterationToBegin(
392  size_t num_skipped_iterations,
393  std::vector<iteration_index_t> current_iterations) -> void;
394  auto peekCurrentlyOpenIteration() const
395  -> std::optional<value_type const *>;
396  auto peekCurrentlyOpenIteration() -> std::optional<value_type *>;
397 
398  auto reparse_possibly_deleted_iteration(iteration_index_t) -> void;
399 };
400 } // namespace openPMD
401 
402 // Template definitions
403 
404 namespace openPMD::detail
405 {
406 template <typename F, typename G>
407 void CurrentStep::map_during_t(F &&map, G &&create_new)
408 {
409  std::visit(
410  auxiliary::overloaded{
411  [&](During_t &during) { std::forward<F>(map)(during); },
412  [&](Before_t const &) {
413  std::optional<variant_t> res =
414  std::forward<G>(create_new)(AtTheEdge::Begin);
415  if (res.has_value())
416  {
417  this->swap(*res);
418  }
419  },
420  [&](After_t const &) {
421  std::optional<variant_t> res =
422  std::forward<G>(create_new)(AtTheEdge::End);
423  if (res.has_value())
424  {
425  this->swap(*res);
426  }
427  }},
428  this->as_base());
429 }
430 
431 template <typename F>
432 void CurrentStep::map_during_t(F &&map)
433 {
434  map_during_t(
435  std::forward<F>(map), [](auto const &) { return std::nullopt; });
436 }
437 } // namespace openPMD::detail
Definition: IteratorTraits.hpp:93
Map-like container that enforces openPMD requirements and handles IO.
Definition: Container.hpp:104
Implementation for the root level of the openPMD hierarchy.
Definition: Series.hpp:288
Definition: StatefulIterator.hpp:300
Definition: StatefulIterator.hpp:297
Based on the logic of the former class ReadIterations, integrating into itself the logic of former Wr...
Definition: StatefulIterator.hpp:204
StatefulIterator()
construct the end() iterator
Definition: ContainerImpls.hpp:36
Data members for Series.
Definition: Series.hpp:90
Public definitions of openPMD-api.
Definition: Date.cpp:29
Definition: StatefulIterator.hpp:98
Definition: StatefulIterator.hpp:176
Definition: StatefulIterator.hpp:167
Definition: StatefulIterator.hpp:170
Definition: StatefulIterator.hpp:87
Definition: StatefulIterator.hpp:60
Definition: StatefulIterator.hpp:64