openPMD-api
Series.hpp
1 /* Copyright 2017-2025 Fabian Koller, Axel Huebl, Franz Poeschel, Luca Fedeli
2  *
3  * This file is part of openPMD-api.
4  *
5  * openPMD-api is free software: you can redistribute it and/or modify
6  * it under the terms of of either the GNU General Public License or
7  * the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * openPMD-api is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License and the GNU Lesser General Public License
15  * for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * and the GNU Lesser General Public License along with openPMD-api.
19  * If not, see <http://www.gnu.org/licenses/>.
20  */
21 #pragma once
22 
23 #include "openPMD/Error.hpp"
24 #include "openPMD/IO/AbstractIOHandler.hpp"
25 #include "openPMD/IO/Access.hpp"
26 #include "openPMD/IO/Format.hpp"
27 #include "openPMD/Iteration.hpp"
28 #include "openPMD/IterationEncoding.hpp"
29 #include "openPMD/Streaming.hpp"
30 #include "openPMD/auxiliary/TypeTraits.hpp"
31 #include "openPMD/auxiliary/Variant.hpp"
32 #include "openPMD/backend/Attributable.hpp"
33 #include "openPMD/backend/Container.hpp"
34 #include "openPMD/backend/ParsePreference.hpp"
35 #include "openPMD/config.hpp"
36 #include "openPMD/snapshots/Snapshots.hpp"
37 #include "openPMD/version.hpp"
38 
39 #if openPMD_HAVE_MPI
40 #include <mpi.h>
41 #endif
42 
43 #include <cstdint> // uint64_t
44 #include <deque>
45 #include <functional>
46 #include <map>
47 #include <memory>
48 #include <optional>
49 #include <set>
50 #include <stdexcept>
51 #include <string>
52 #include <tuple>
53 #include <unordered_map>
54 #include <variant>
55 #include <vector>
56 
57 // expose private and protected members for invasive testing
58 #ifndef OPENPMD_private
59 #define OPENPMD_private private:
60 #endif
61 
62 namespace openPMD
63 {
64 class ReadIterations;
65 class StatefulIterator;
66 class Series;
67 class Series;
68 
69 namespace internal
70 {
71  /* Just a more self-documenting boolean used for
72  * m_iterationEncodingSetExplicitly */
73  enum class default_or_explicit : bool
74  {
75  default_,
76  explicit_
77  };
89  class SeriesData final : public AttributableData
90  {
91  public:
92  explicit SeriesData() = default;
93 
94  virtual ~SeriesData();
95 
96  SeriesData(SeriesData const &) = delete;
97  SeriesData(SeriesData &&) = delete;
98 
99  SeriesData &operator=(SeriesData const &) = delete;
100  SeriesData &operator=(SeriesData &&) = delete;
101 
102  using IterationIndex_t = Iteration::IterationIndex_t;
104  IterationsContainer_t iterations{};
105 
121  std::unique_ptr<StatefulIterator> m_sharedStatefulIterator;
127  std::set<IterationIndex_t> m_currentlyActiveIterations;
131  std::unordered_map<IterationIndex_t, size_t> m_snapshotToStep;
151  std::unordered_map<IterationIndex_t, std::string> m_iterationFilenames;
161  std::optional<std::string> m_overrideFilebasedFilename;
168  std::string m_name;
173  std::string m_filenamePrefix;
177  std::string m_filenamePostfix;
182  std::string m_filenameExtension;
193  /*
194  * ADIOS2 should use variable-based encoding as default rather than
195  * group-based encoding as much as possible.
196  * Since this cannot be decided at construction time, groupBased
197  * encoding is selected first, and re-decided later.
198  * However, when group-based encoding is selected by the user explcitly,
199  * that selection should not be changed again.
200  * Hence, remember that here.
201  */
202  default_or_explicit m_iterationEncodingSetExplicitly =
203  default_or_explicit::default_;
215  StepStatus m_stepStatus = StepStatus::NoStep;
219  bool m_parseLazily = false;
220  uint64_t m_hintLazyParsingAfterTimeout = 20; // seconds
221 
230 
237  std::optional<ParsePreference> m_parsePreference;
238 
239  std::optional<std::function<AbstractIOHandler *(Series &)>>
240  m_deferred_initialization = std::nullopt;
241 
242  void close();
243 
244 #if openPMD_HAVE_MPI
245  /*
246  * @todo Once we have separate MPI headers, move this there.
247  */
248  std::optional<MPI_Comm> m_communicator;
249 #endif
250 
252  {};
254  {
255  std::string value;
256  };
258  {
259  std::string value;
260  };
261 
263  {
264  Attributable m_attributable;
265  std::variant<
269  m_rankTableSource;
270  std::optional<chunk_assignment::RankMeta> m_bufferedRead;
271  };
272  RankTableData m_rankTable;
273  }; // SeriesData
274 
275  class SeriesInternal;
276 } // namespace internal
277 
287 class Series : public Attributable
288 {
289  friend class Attributable;
290  friend class Iteration;
291  friend class Writable;
292  friend class ReadIterations;
293  friend class StatefulIterator;
294  friend class internal::SeriesData;
295  friend class internal::AttributableData;
296  friend class StatefulSnapshotsContainer;
297 
298 public:
299  explicit Series();
300 
301 #if openPMD_HAVE_MPI
314  Series(
315  std::string const &filepath,
316  Access at,
317  MPI_Comm comm,
318  std::string const &options = "{}");
319 #endif
320 
367  Series(
368  std::string const &filepath,
369  Access at,
370  std::string const &options = "{}");
371 
372  Series(Series const &) = default;
373  Series(Series &&) = default;
374 
375  Series &operator=(Series const &) = default;
376  Series &operator=(Series &&) = default;
377 
378  ~Series() override = default;
379 
383  using IterationIndex_t = Iteration::IterationIndex_t;
388  IterationsContainer_t iterations;
389 
396  operator bool() const;
397 
403  std::string openPMD() const;
412  Series &setOpenPMD(std::string const &openPMD);
413 
419  uint32_t openPMDextension() const;
429 
434  std::string basePath() const;
442  Series &setBasePath(std::string const &basePath);
443 
449  std::string meshesPath() const;
459  Series &setMeshesPath(std::string const &meshesPath);
460 
471  bool hasRankTableRead();
472 
484 #if openPMD_HAVE_MPI
485  chunk_assignment::RankMeta rankTable(bool collective);
486 #else
487  chunk_assignment::RankMeta rankTable(bool collective = false);
488 #endif
489 
497  Series &setRankTable(std::string const &myRankInfo);
498 
504  std::string particlesPath() const;
514  Series &setParticlesPath(std::string const &particlesPath);
515 
521  std::string author() const;
528  Series &setAuthor(std::string const &author);
529 
535  std::string software() const;
545  std::string const &newName,
546  std::string const &newVersion = std::string("unspecified"));
547 
553  std::string softwareVersion() const;
563  [[deprecated(
564  "Set the version with the second argument of setSoftware()")]] Series &
565  setSoftwareVersion(std::string const &softwareVersion);
566 
571  std::string date() const;
577  Series &setDate(std::string const &date);
578 
584  std::string softwareDependencies() const;
592  Series &setSoftwareDependencies(std::string const &newSoftwareDependencies);
593 
599  std::string machine() const;
606  Series &setMachine(std::string const &newMachine);
607 
630 
636  std::string iterationFormat() const;
648  Series &setIterationFormat(std::string const &iterationFormat);
649 
653  std::string name() const;
654 
661  Series &setName(std::string const &name);
662 
669  std::string backend() const;
670  std::string backend();
671 
679  void flush(std::string backendConfig = "{}");
680 
697 
741 
756  void parseBase();
757 
768 
779  void close();
780 
784  template <typename X = void, typename... Args>
785  auto iterationFlush(Args &&...)
786  {
787  static_assert(
788  auxiliary::dependent_false_v<X>,
789  "Cannot call this on an instance of Series.");
790  }
791 
792  // clang-format off
793 OPENPMD_private
794  // clang-format on
795 
796  static constexpr char const *const BASEPATH = "/data/%T/";
797 
798  struct ParsedInput;
799  using iterations_t = decltype(internal::SeriesData::iterations);
800  using iterations_iterator = iterations_t::iterator;
801 
802  using Data_t = internal::SeriesData;
803  std::shared_ptr<Data_t> m_series = nullptr;
804 
805  inline Data_t &get()
806  {
807  if (m_series)
808  {
809  return *m_series;
810  }
811  else
812  {
813  throw std::runtime_error(
814  "[Series] Cannot use default-constructed Series.");
815  }
816  }
817 
818  inline Data_t const &get() const
819  {
820  if (m_series)
821  {
822  return *m_series;
823  }
824  else
825  {
826  throw std::runtime_error(
827  "[Series] Cannot use default-constructed Series.");
828  }
829  }
830 
831  inline void setData(std::shared_ptr<internal::SeriesData> series)
832  {
833  m_series = std::move(series);
834  iterations = m_series->iterations;
835  Attributable::setData(m_series);
836  }
837 
838  std::unique_ptr<ParsedInput> parseInput(std::string);
848  template <typename TracingJSON>
849  void parseJsonOptions(TracingJSON &options, ParsedInput &);
850  bool hasExpansionPattern(std::string filenameWithExtension);
851  bool reparseExpansionPattern(std::string filenameWithExtension);
852  template <typename... MPI_Communicator>
853  void init(
854  std::string const &filepath,
855  Access at,
856  std::string const &options,
857  MPI_Communicator &&...);
858  template <typename TracingJSON, typename... MPI_Communicator>
859  std::tuple<std::unique_ptr<ParsedInput>, TracingJSON> initIOHandler(
860  std::string const &filepath,
861  std::string const &options,
862  Access at,
863  bool resolve_generic_extension,
864  MPI_Communicator &&...);
865  void initSeries(
866  std::unique_ptr<AbstractIOHandler>, std::unique_ptr<ParsedInput>);
867  void initDefaults(IterationEncoding, bool initAll = false);
879  std::future<void> flush_impl(
880  iterations_iterator begin,
881  iterations_iterator end,
882  internal::FlushParams const &flushParams,
883  bool flushIOHandler = true);
884  void flushFileBased(
885  iterations_iterator begin,
886  iterations_iterator end,
887  internal::FlushParams const &flushParams,
888  bool flushIOHandler = true);
889  /*
890  * Group-based and variable-based iteration layouts share a lot of logic
891  * (realistically, the variable-based iteration layout only throws out
892  * one layer in the hierarchy).
893  * As a convention, methods that deal with both layouts are called
894  * .*GorVBased, short for .*GroupOrVariableBased
895  */
896  void flushGorVBased(
897  iterations_iterator begin,
898  iterations_iterator end,
899  internal::FlushParams const &flushParams,
900  bool flushIOHandler = true);
901  void flushMeshesPath();
902  void flushParticlesPath();
903  void flushRankTable();
904  /* Parameter `read_only_this_single_iteration` used for reopening an
905  * Iteration after closing it.
906  */
907  void readFileBased(
908  std::optional<IterationIndex_t> read_only_this_single_iteration);
909  void readOneIterationFileBased(std::string const &filePath);
925  std::vector<IterationIndex_t> readGorVBased(
926  bool do_always_throw_errors,
927  bool init,
928  std::optional<IterationIndex_t> read_only_this_single_iteration);
929  void readBase();
930  std::string iterationFilename(IterationIndex_t i);
931 
932  enum class IterationOpened : bool
933  {
934  HasBeenOpened,
935  RemainsClosed
936  };
937  /*
938  * For use by flushFileBased, flushGorVBased
939  * Open an iteration, but only if necessary.
940  * Only open if the iteration is dirty and if it is not in deferred
941  * parse state.
942  */
943  IterationOpened
944  openIterationIfDirty(IterationIndex_t index, Iteration &iteration);
945  /*
946  * Open an iteration. Ensures that the iteration's m_closed status
947  * is set properly and that any files pertaining to the iteration
948  * is opened.
949  * Does not create files when called in CREATE mode.
950  */
951  void openIteration(IterationIndex_t index, Iteration &iteration);
952 
957  iterations_iterator indexOf(Iteration const &);
958 
974  AdvanceMode mode,
975  internal::AttributableData &file,
976  iterations_iterator it);
977 
979 
987  void flushStep(bool doFlush);
988 
989  /*
990  * setIterationEncoding() should only be called by users of our public API,
991  * but never internally. We need to distinguish if the iteration encoding
992  * was selected explicitly or implicitly, see
993  * m_iterationEncodingSetExplicitly for further details.
994  */
995  Series &setIterationEncoding_internal(
996  IterationEncoding iterationEncoding, internal::default_or_explicit);
997 
998  /*
999  * Returns the current content of the /data/snapshot attribute.
1000  * (We could also add this to the public API some time)
1001  */
1002  std::optional<std::vector<IterationIndex_t>> currentSnapshot();
1003 
1004  AbstractIOHandler *runDeferredInitialization();
1005 
1006  AbstractIOHandler *IOHandler();
1007  AbstractIOHandler const *IOHandler() const;
1008 
1009  /* adios2::Mode::ReadRandomAccess does not support reading modifiable
1010  * attributes. However, we need the values of /data/snapshot as a modifiable
1011  * attribute, so this function quickly opens the file in adios2::Mode::Read
1012  * and retrieves the changings values over time.
1013  * Return std::nullopt if /data/snapshot is not present.
1014  */
1015  std::optional<std::vector<std::vector<IterationIndex_t>>>
1016  preparseSnapshots();
1017 
1018  Snapshots makeRandomAccessSnapshots();
1019  Snapshots makeSynchronousSnapshots();
1020  /* Should adios2::Variable<T>::SetStepSelection() be used for accessing
1021  * steps?
1022  */
1023  [[nodiscard]] bool randomAccessSteps() const;
1024 
1025  std::vector<std::string> availableDatasets();
1026 }; // Series
1027 
1028 namespace debug
1029 {
1030  void printDirty(Series const &);
1031 }
1032 } // namespace openPMD
1033 
1034 // Make sure that this legacy header is always included if Series.hpp is
1035 // included, otherwise Series::readIterations() cannot be used
1036 #include "openPMD/ReadIterations.hpp"
Interface for communicating between logical and physically persistent data.
Definition: AbstractIOHandler.hpp:206
Layer to manage storage of attributes associated with file objects.
Definition: Attributable.hpp:222
Logical compilation of data from one snapshot (e.g.
Definition: Iteration.hpp:146
Legacy class as return type for Series::readIterations().
Definition: ReadIterations.hpp:75
Implementation for the root level of the openPMD hierarchy.
Definition: Series.hpp:288
Series & setName(std::string const &name)
Set the pattern for file names.
Definition: Series.cpp:731
std::string softwareVersion() const
Definition: Series.cpp:639
Series & setMeshesPath(std::string const &meshesPath)
Set the path to mesh records, relative(!) to basePath.
Definition: Series.cpp:275
std::string iterationFormat() const
Definition: Series.cpp:695
void flush(std::string backendConfig="{}")
Execute all required remaining IO operations to write or read data.
Definition: Series.cpp:780
auto iterationFlush(Args &&...)
This overrides Attributable::iterationFlush() which will fail on Series.
Definition: Series.hpp:785
void parseBase()
Parse the Series.
Definition: Series.cpp:3510
std::string basePath() const
Definition: Series.cpp:249
Series & setSoftwareDependencies(std::string const &newSoftwareDependencies)
Indicate dependencies of software that were used to create the file.
Definition: Series.cpp:667
iterations_iterator indexOf(Iteration const &)
Find the given iteration in Series::iterations and return an iterator into Series::iterations at that...
Definition: Series.cpp:2598
Iteration::IterationIndex_t IterationIndex_t
An unsigned integer type, used to identify Iterations in a Series.
Definition: Series.hpp:383
Series & setParticlesPath(std::string const &particlesPath)
Set the path to groups for each particle species, relative(!) to basePath.
Definition: Series.cpp:594
std::future< void > flush_impl(iterations_iterator begin, iterations_iterator end, internal::FlushParams const &flushParams, bool flushIOHandler=true)
Internal call for flushing a Series.
Definition: Series.cpp:1364
std::string machine() const
Definition: Series.cpp:673
std::string date() const
Definition: Series.cpp:650
WriteIterations writeIterations()
Entry point to the writing end of the streaming API.
Definition: Series.cpp:3515
std::string backend() const
The currently used backend.
Definition: Series.cpp:769
void close()
Close the Series and release the data storage/transport backends.
Definition: Series.cpp:3531
Series & setOpenPMD(std::string const &openPMD)
Set the version of the enforced openPMD standard.
Definition: Series.cpp:226
void flushStep(bool doFlush)
Called at the end of an IO step to store the iterations defined in the IO step to the snapshot attrib...
Definition: Series.cpp:2812
std::string openPMD() const
Definition: Series.cpp:221
Series & setAuthor(std::string const &author)
Indicate the author and contact for the information in the file.
Definition: Series.cpp:620
Snapshots snapshots()
Preferred way to access Iterations/Snapshots.
Definition: Series.cpp:3400
Series & setSoftwareVersion(std::string const &softwareVersion)
Indicate the version of the software/code/simulation that created the file.
Definition: Series.cpp:644
AdvanceStatus advance(AdvanceMode mode, internal::AttributableData &file, iterations_iterator it)
In step-based IO mode, begin or end an IO step for the given iteration.
Definition: Series.cpp:2613
std::string author() const
Definition: Series.cpp:615
std::string meshesPath() const
Definition: Series.cpp:270
Series & setMachine(std::string const &newMachine)
Indicate the machine or relevant hardware that created the file.
Definition: Series.cpp:678
std::string software() const
Definition: Series.cpp:626
std::vector< IterationIndex_t > readGorVBased(bool do_always_throw_errors, bool init, std::optional< IterationIndex_t > read_only_this_single_iteration)
Note on re-parsing of a Series: If init == false, the parsing process will seek for new Iterations/Re...
Definition: Series.cpp:2068
uint32_t openPMDextension() const
Definition: Series.cpp:238
chunk_assignment::RankMeta rankTable(bool collective)
Definition: Series.cpp:329
Series & setBasePath(std::string const &basePath)
Set the common prefix for all data sets and sub-groups of a specific iteration.
Definition: Series.cpp:254
bool hasRankTableRead()
Definition: Series.cpp:312
Series & setRankTable(std::string const &myRankInfo)
Set the Mpi Ranks Meta Info attribute, i.e.
Definition: Series.cpp:460
void parseJsonOptions(TracingJSON &options, ParsedInput &)
Parse non-backend-specific configuration in JSON config.
Definition: Series.cpp:3149
std::string particlesPath() const
Definition: Series.cpp:589
Series & setDate(std::string const &date)
Indicate the date of creation.
Definition: Series.cpp:655
std::string name() const
Definition: Series.cpp:726
Series & setSoftware(std::string const &newName, std::string const &newVersion=std::string("unspecified"))
Indicate the software/code/simulation that created the file.
Definition: Series.cpp:632
std::string softwareDependencies() const
Definition: Series.cpp:661
ReadIterations readIterations()
Entry point to the reading end of the streaming API.
Definition: Series.cpp:3352
Series & setOpenPMDextension(uint32_t openPMDextension)
Set a 32-bit mask of applied extensions to the openPMD standard.
Definition: Series.cpp:243
Series & setIterationEncoding(IterationEncoding iterationEncoding)
Set the encoding style for multiple iterations in this series.
Definition: Series.cpp:689
IterationEncoding iterationEncoding() const
Definition: Series.cpp:684
Series & setIterationFormat(std::string const &iterationFormat)
Set a pattern describing how to access single iterations in the raw file.
Definition: Series.cpp:700
Entry point for accessing Snapshots/Iterations.
Definition: Snapshots.hpp:50
Based on the logic of the former class ReadIterations, integrating into itself the logic of former Wr...
Definition: StatefulIterator.hpp:204
Definition: ContainerImpls.hpp:36
Layer to mirror structure of logical data and persistent data in file.
Definition: Writable.hpp:76
Definition: Attributable.hpp:107
Data members for Series.
Definition: Series.hpp:90
std::unordered_map< IterationIndex_t, size_t > m_snapshotToStep
For reading: In which IO step do I need to look for an Iteration?
Definition: Series.hpp:131
std::unique_ptr< StatefulIterator > m_sharedStatefulIterator
Series::readIterations() returns an iterator type that modifies the state of the Series (by proceedin...
Definition: Series.hpp:121
std::string m_filenamePrefix
Filename leading up to the expansion pattern.
Definition: Series.hpp:173
std::set< IterationIndex_t > m_currentlyActiveIterations
For writing: Remember which iterations have been written in the currently active output step.
Definition: Series.hpp:127
std::unordered_map< IterationIndex_t, std::string > m_iterationFilenames
This map contains the filenames of those Iterations which were found on the file system upon opening ...
Definition: Series.hpp:151
std::optional< std::string > m_overrideFilebasedFilename
Needed if reading a single iteration of a file-based series.
Definition: Series.hpp:161
StepStatus m_stepStatus
Whether a step is currently active for this iteration.
Definition: Series.hpp:215
std::string m_name
Name of the iteration without filename suffix.
Definition: Series.hpp:168
std::optional< ParsePreference > m_parsePreference
Remember the preference that the backend specified for parsing.
Definition: Series.hpp:237
std::string m_filenamePostfix
Filename after the expansion pattern without filename extension.
Definition: Series.hpp:177
int m_filenamePadding
The padding in file-based iteration encoding.
Definition: Series.hpp:188
bool m_parseLazily
True if a user opts into lazy parsing.
Definition: Series.hpp:219
bool m_wroteAtLeastOneIOStep
In variable-based encoding, all backends except ADIOS2 can only write one single iteration.
Definition: Series.hpp:229
std::string m_filenameExtension
Filename extension as specified by the user.
Definition: Series.hpp:182
IterationEncoding m_iterationEncoding
The iteration encoding used in this series.
Definition: Series.hpp:192
Format m_format
Detected IO format (backend).
Definition: Series.hpp:207
Public definitions of openPMD-api.
Definition: Date.cpp:29
Access
File access mode to use during IO.
Definition: Access.hpp:58
AdvanceMode
In step-based mode (i.e.
Definition: Streaming.hpp:46
StepStatus
Used in step-based mode (i.e.
Definition: Streaming.hpp:57
AdvanceStatus
In step-based mode (i.e.
Definition: Streaming.hpp:32
Format
File format to use during IO.
Definition: Format.hpp:30
IterationEncoding
Encoding scheme of an Iterations Series'.
Definition: IterationEncoding.hpp:33