diff --git a/src/stagpy/_caching.py b/src/stagpy/_caching.py index b2a7c33..01a41f0 100644 --- a/src/stagpy/_caching.py +++ b/src/stagpy/_caching.py @@ -12,6 +12,7 @@ if typing.TYPE_CHECKING: from collections.abc import Mapping + from pathlib import Path from .datatypes import Field from .stagyydata import StagyyData @@ -89,15 +90,14 @@ class StepSnapInfo: @dataclass(frozen=True) class StepSnapH5(StepSnap): - sdat: StagyyData + timeh5: Path @cached_property def _info(self) -> StepSnapInfo: - assert self.sdat.hdf5 is not None isnap = -1 step_to_snap = {} snap_to_step = {} - for isnap, istep in parsers.h5.extras.isnap_istep(self.sdat.hdf5): + for isnap, istep in parsers.h5.extras.isnap_istep(self.timeh5): step_to_snap[istep] = isnap snap_to_step[isnap] = istep return StepSnapInfo( @@ -131,7 +131,7 @@ def _snap_to_step(self) -> dict[int, int | None]: @cached_property def isnap_max(self) -> int: imax = -1 - out_stem = re.escape(self.sdat.par.legacy_output("_").name[:-1]) + out_stem = re.escape(self.sdat.par.legacy_output("").name[:-1]) rgx = re.compile(f"^{out_stem}_([a-zA-Z]+)([0-9]{{5}})$") fstems = set(fstem for fstem in phyvars.FIELD_FILES) for fname in self.sdat._files: diff --git a/src/stagpy/parfile.py b/src/stagpy/parfile.py index 2370bed..622121c 100644 --- a/src/stagpy/parfile.py +++ b/src/stagpy/parfile.py @@ -68,7 +68,7 @@ def from_main_par(parfile: Path, read_parameters_dat: bool = True) -> StagyyPar: par_main = StagyyPar(nml=par_dflt.nml, root=par_main.root) if read_parameters_dat: - outfile = par_main.legacy_output("_parameters.dat") + outfile = par_main.legacy_output("parameters.dat") if outfile.is_file(): par_main._update(StagyyPar._from_file(outfile)) outfile = par_main.h5_output("parameters.dat") @@ -80,9 +80,11 @@ def get(self, section: str, option: str, default: T) -> T: sec = self.nml.get(section, {}) return sec.get(option, default) - def legacy_output(self, suffix: str) -> Path: + def legacy_output(self, suffix: str, isnap: int | None = None) -> Path: + if isnap is not None: + suffix += f"{isnap:05d}" stem = self.get("ioin", "output_file_stem", "output") - return self.root / (stem + suffix) + return self.root / f"{stem}_{suffix}" def h5_output(self, filename: str) -> Path: h5folder = self.get("ioin", "hdf5_output_folder", "+hdf5") diff --git a/src/stagpy/parsers/h5/extras.py b/src/stagpy/parsers/h5/extras.py index fa047ba..8a571c1 100644 --- a/src/stagpy/parsers/h5/extras.py +++ b/src/stagpy/parsers/h5/extras.py @@ -9,16 +9,16 @@ from pathlib import Path -def isnap_istep(h5folder: Path) -> Iterator[tuple[int, int]]: - """Iterate through (isnap, istep) recorded in h5folder/'time_botT.h5'. +def isnap_istep(timeh5: Path) -> Iterator[tuple[int, int]]: + """Iterate through (isnap, istep) recorded in 'time_botT.h5'. Args: - h5folder: directory of HDF5 output files. + timeh5: path of the time h5 file. Yields: tuple (isnap, istep). """ - with h5py.File(h5folder / "time_botT.h5", "r") as h5f: + with h5py.File(timeh5, "r") as h5f: for name, dset in h5f.items(): isnap = int(name[-5:]) if len(dset) == 3: diff --git a/src/stagpy/stagyydata.py b/src/stagpy/stagyydata.py index 889adcb..fec7673 100644 --- a/src/stagpy/stagyydata.py +++ b/src/stagpy/stagyydata.py @@ -78,11 +78,10 @@ class Refstate: @cached_property def _data(self) -> tuple[list[list[DataFrame]], list[DataFrame]]: """Read reference state profile.""" - reffile = self.sdat.filename("refstat.dat") - if self.sdat.hdf5 and not reffile.is_file(): - # check legacy folder as well - reffile = self.sdat.filename("refstat.dat", force_legacy=True) - data = parsers.txt.refstate(reffile) + reffile = self.sdat._find_file("refstat.dat") + data = None + if reffile is not None: + data = parsers.txt.refstate(reffile) if data is None: raise error.NoRefstateError(self.sdat) return data @@ -147,15 +146,14 @@ def _cached_extra(self) -> dict[str, dt.Tseries]: @cached_property def _data(self) -> DataFrame | None: - timefile = self.sdat.filename("TimeSeries.h5") + timefile: Path | None + timefile = self.sdat.par.h5_output("TimeSeries.h5") data = parsers.h5.tseries.tseries(timefile) if data is not None: return data - timefile = self.sdat.filename("time.dat") - if self.sdat.hdf5 and not timefile.is_file(): - # check legacy folder as well - timefile = self.sdat.filename("time.dat", force_legacy=True) - data = parsers.txt.tseries(timefile) + timefile = self.sdat._find_file("time.dat") + if timefile is not None: + data = parsers.txt.tseries(timefile) return data @property @@ -671,12 +669,6 @@ def parpath(self) -> Path: return parpath return parpath / "par" - @cached_property - def hdf5(self) -> Path | None: - """Path of output hdf5 folder if relevant, None otherwise.""" - h5xmf = self.par.h5_output("Data.xmf") - return h5xmf.parent if h5xmf.is_file() else None - @cached_property def steps(self) -> Steps: """Collection of time steps.""" @@ -698,32 +690,32 @@ def refstate(self) -> Refstate: return Refstate(self) @cached_property - def _dataxmf(self) -> FieldXmf: - assert self.hdf5 is not None - return FieldXmf( - path=self.hdf5 / "Data.xmf", - ) + def _dataxmf(self) -> FieldXmf | None: + path = self.par.h5_output("Data.xmf") + if path.is_file(): + return FieldXmf(path=path) + return None @cached_property - def _topxmf(self) -> FieldXmf: - assert self.hdf5 is not None - return FieldXmf( - path=self.hdf5 / "DataSurface.xmf", - ) + def _topxmf(self) -> FieldXmf | None: + path = self.par.h5_output("DataSurface.xmf") + if path.is_file(): + return FieldXmf(path=path) + return None @cached_property - def _botxmf(self) -> FieldXmf: - assert self.hdf5 is not None - return FieldXmf( - path=self.hdf5 / "DataBottom.xmf", - ) + def _botxmf(self) -> FieldXmf | None: + path = self.par.h5_output("DataBottom.xmf") + if path.is_file(): + return FieldXmf(path=path) + return None @cached_property - def _traxmf(self) -> TracersXmf: - assert self.hdf5 is not None - return TracersXmf( - path=self.hdf5 / "DataTracers.xmf", - ) + def _traxmf(self) -> TracersXmf | None: + path = self.par.h5_output("DataTracers.xmf") + if path.is_file(): + return TracersXmf(path=path) + return None @cached_property def par(self) -> StagyyPar: @@ -732,15 +724,15 @@ def par(self) -> StagyyPar: @cached_property def _rprof_and_times(self) -> tuple[dict[int, DataFrame], DataFrame | None]: - rproffile = self.filename("rprof.h5") + rproffile: Path | None + rproffile = self.par.h5_output("rprof.h5") data = parsers.h5.rprof.rprof(rproffile) if data[1] is not None: return data - rproffile = self.filename("rprof.dat") - if self.hdf5 and not rproffile.is_file(): - # check legacy folder as well - rproffile = self.filename("rprof.dat", force_legacy=True) - return parsers.txt.rprof(rproffile) + rproffile = self._find_file("rprof.dat") + if rproffile is not None: + return parsers.txt.rprof(rproffile) + return {}, None @property def rtimes(self) -> DataFrame | None: @@ -750,7 +742,7 @@ def rtimes(self) -> DataFrame | None: @cached_property def _files(self) -> set[Path]: """Set of found binary files output by StagYY.""" - out_dir = self.par.legacy_output("_").parent + out_dir = self.par.legacy_output("").parent if out_dir.is_dir(): return set(out_dir.iterdir()) return set() @@ -767,32 +759,19 @@ def set_nfields_max(self, nfields: int | None) -> None: raise error.InvalidNfieldsError(nfields) self._field_cache.resize(nfields) - def filename( - self, - fname: str, - timestep: int | None = None, - suffix: str = "", - force_legacy: bool = False, - ) -> Path: - """Return name of StagYY output file. - - Args: - fname: name stem. - timestep: snapshot number if relevant. - suffix: optional suffix of file name. - force_legacy: force returning the legacy output path. + def _find_file(self, fname: str) -> Path | None: + """Return path of StagYY output file if found. - Returns: - the path of the output file constructed with the provided segments. + This searches in the legacy folder first, and then the hdf5 + output folder. """ - if timestep is not None: - fname += f"{timestep:05d}" - fname += suffix - if not force_legacy and self.hdf5: - fpath = self.par.h5_output(fname) - else: - fpath = self.par.legacy_output(f"_{fname}") - return fpath + fpath = self.par.legacy_output(fname) + if fpath.is_file(): + return fpath + fpath = self.par.h5_output(fname) + if fpath.is_file(): + return fpath + return None def _binfiles_set(self, isnap: int) -> set[Path]: """Set of existing binary files at a given snap. @@ -804,8 +783,7 @@ def _binfiles_set(self, isnap: int) -> set[Path]: the set of output files available for this snapshot number. """ possible_files = set( - self.filename(fstem, isnap, force_legacy=True) - for fstem in phyvars.FIELD_FILES + self.par.legacy_output(fstem, isnap) for fstem in phyvars.FIELD_FILES ) return possible_files & self._files @@ -815,6 +793,7 @@ def _field_cache(self) -> FieldCache: @cached_property def _step_snap(self) -> StepSnap: - if self.hdf5 is not None: - return StepSnapH5(sdat=self) + timeh5 = self.par.h5_output("time_botT.h5") + if timeh5.is_file(): + return StepSnapH5(timeh5=timeh5) return StepSnapLegacy(sdat=self) diff --git a/src/stagpy/step.py b/src/stagpy/step.py index fbbef37..1433c9f 100644 --- a/src/stagpy/step.py +++ b/src/stagpy/step.py @@ -50,7 +50,7 @@ def _maybe_header(self) -> dict[str, Any] | None: header = None if binfiles: header = parsers.bin.field.header(binfiles.pop()) - elif sdat.hdf5: + elif sdat._dataxmf is not None: header = parsers.h5.field.read_geom(sdat._dataxmf, self.step.isnap) return header if header else None @@ -352,32 +352,32 @@ def _get_raw_data(self, name: str) -> tuple[list[str], Any]: parsed_data = None if self.step.isnap is None: return list_fvar, None - fieldfile = self.step.sdat.filename( - filestem, self.step.isnap, force_legacy=True - ) - if not fieldfile.is_file(): - fieldfile = self.step.sdat.filename(filestem, self.step.isnap) + fieldfile = self.step.sdat.par.legacy_output(filestem, self.step.isnap) if fieldfile.is_file(): parsed_data = parsers.bin.field.field(fieldfile) - elif self.step.sdat.hdf5 and self.filesh5: - # files in which the requested data can be found - files = [ - (stem, fvars) for stem, fvars in self.filesh5.items() if name in fvars - ] - for filestem, list_fvar in files: - sdat = self.step.sdat - if filestem in phyvars.SFIELD_FILES_H5: - xmff = sdat._botxmf if name.endswith("bot") else sdat._topxmf - header = self.step.geom._maybe_header - assert header is not None - else: - xmff = sdat._dataxmf - header = None - parsed_data = parsers.h5.field.field( - xmff, filestem, self.step.isnap, header - ) - if parsed_data is not None: - break + return list_fvar, parsed_data + if not self.filesh5: + return list_fvar, parsed_data + # files in which the requested data can be found + files = [(stem, fvars) for stem, fvars in self.filesh5.items() if name in fvars] + sdat = self.step.sdat + if filestem in phyvars.SFIELD_FILES_H5: + xmff = sdat._botxmf if name.endswith("bot") else sdat._topxmf + else: + xmff = sdat._dataxmf + if xmff is None: + return list_fvar, parsed_data + for filestem, list_fvar in files: + if filestem in phyvars.SFIELD_FILES_H5: + header = self.step.geom._maybe_header + assert header is not None + else: + header = None + parsed_data = parsers.h5.field.field( + xmff, filestem, self.step.isnap, header + ) + if parsed_data is not None: + break return list_fvar, parsed_data @@ -403,12 +403,13 @@ def __getitem__(self, name: str) -> list[NDArray[np.floating]] | None: return self._data[name] if self.step.isnap is None: return None + sdat = self.step.sdat data = parsers.bin.tracers.tracers( - self.step.sdat.filename("tra", timestep=self.step.isnap, force_legacy=True) + sdat.par.legacy_output("tra", self.step.isnap) ) - if data is None and self.step.sdat.hdf5: + if data is None and sdat._traxmf is not None: self._data[name] = parsers.h5.tracers.tracers( # type: ignore - self.step.sdat._traxmf, + sdat._traxmf, name, self.step.isnap, ) diff --git a/tests/test_parsers.py b/tests/test_parsers.py index c882e80..9669914 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -6,15 +6,14 @@ def test_time_series_prs(sdat_legacy: StagyyData) -> None: sdat = sdat_legacy - data = parsers.txt.tseries(sdat.filename("time.dat")) + data = parsers.txt.tseries(sdat.par.legacy_output("time.dat")) assert data is not None assert (data.columns[3:6] == ["Tmin", "Tmean", "Tmax"]).all() def test_time_series_h5(sdat_h5: StagyyData) -> None: - sdat = sdat_h5 - assert sdat.hdf5 is not None - data = parsers.h5.tseries.tseries(sdat.hdf5 / "TimeSeries.h5") + path = sdat_h5.par.h5_output("TimeSeries.h5") + data = parsers.h5.tseries.tseries(path) assert data is not None assert (data.columns[3:6] == ["Tmin", "Tmean", "Tmax"]).all() @@ -25,14 +24,13 @@ def test_time_series_invalid_prs() -> None: def test_rprof_prs(sdat_legacy: StagyyData) -> None: sdat = sdat_legacy - data, _time = parsers.txt.rprof(sdat.filename("rprof.dat")) + data, _time = parsers.txt.rprof(sdat.par.legacy_output("rprof.dat")) assert all((df.columns[:3] == ["r", "Tmean", "Tmin"]).all() for df in data.values()) def test_rprof_h5(sdat_h5: StagyyData) -> None: - sdat = sdat_h5 - assert sdat.hdf5 is not None - data, _times = parsers.h5.rprof.rprof(sdat.hdf5 / "rprof.h5") + path = sdat_h5.par.h5_output("rprof.h5") + data, _times = parsers.h5.rprof.rprof(path) assert data is not None assert (data[1000].columns[:3] == ["r", "Tmean", "Tmin"]).all() @@ -43,7 +41,7 @@ def test_rprof_invalid_prs() -> None: def test_fields_prs(sdat_legacy: StagyyData) -> None: sdat = sdat_legacy - parsed = parsers.bin.field.field(sdat.filename("t", len(sdat.snaps) - 1)) + parsed = parsers.bin.field.field(sdat.par.legacy_output("t", len(sdat.snaps) - 1)) assert parsed is not None hdr, flds = parsed assert flds.shape[0] == 1 @@ -53,14 +51,14 @@ def test_fields_prs(sdat_legacy: StagyyData) -> None: def test_field_header_prs(sdat_legacy: StagyyData) -> None: sdat = sdat_legacy - hdr = parsers.bin.field.header(sdat.filename("t", len(sdat.snaps) - 1)) + hdr = parsers.bin.field.header(sdat.par.legacy_output("t", len(sdat.snaps) - 1)) assert hdr is not None assert hdr["nts"].shape == (3,) def test_fields_istep_prs(sdat_legacy: StagyyData) -> None: sdat = sdat_legacy - istep = parsers.bin.field.istep(sdat.filename("t", len(sdat.snaps) - 1)) + istep = parsers.bin.field.istep(sdat.par.legacy_output("t", len(sdat.snaps) - 1)) assert istep == sdat.snaps[-1].istep