diff --git a/openeo/metadata.py b/openeo/metadata.py index 49edef428..8a94d3cea 100644 --- a/openeo/metadata.py +++ b/openeo/metadata.py @@ -35,8 +35,6 @@ class DimensionAlreadyExistsException(MetadataException): # TODO: make these dimension classes immutable data classes -# TODO: align better with STAC datacube extension -# TODO: align/adapt/integrate with pystac's datacube extension implementation? class Dimension: """Base class for dimensions.""" @@ -71,7 +69,6 @@ def rename_labels(self, target, source) -> Dimension: class SpatialDimension(Dimension): # TODO: align better with STAC datacube extension: e.g. support "axis" (x or y) - DEFAULT_CRS = 4326 def __init__( @@ -679,30 +676,16 @@ def metadata_from_stac(url: str) -> CubeMetadata: """ Reads the band metadata a static STAC catalog or a STAC API Collection and returns it as a :py:class:`CubeMetadata` + Policy: + - If cube:dimensions exists: treat it as source of truth (it may omit x/y/t/bands). + - Otherwise: apply openEO-style defaults (x, y, t) and (for Collection/Item) keep bands dimension even if empty. + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url. """ stac_object = pystac.read_file(href=url) - bands = _StacMetadataParser().bands_from_stac_object(stac_object) - - # At least assume there are spatial dimensions - # TODO #743: are there conditions in which we even should not assume the presence of spatial dimensions? - dimensions = [ - SpatialDimension(name="x", extent=[None, None]), - SpatialDimension(name="y", extent=[None, None]), - ] - - # TODO #743: conditionally include band dimension when there was actual indication of band metadata? - band_dimension = BandDimension(name="bands", bands=bands) - dimensions.append(band_dimension) - - # TODO: is it possible to derive the actual name of temporal dimension that the backend will use? - temporal_dimension = _StacMetadataParser().get_temporal_dimension(stac_object) - if temporal_dimension: - dimensions.append(temporal_dimension) - - metadata = CubeMetadata(dimensions=dimensions) - return metadata + parser = _StacMetadataParser() + return parser.metadata_from_stac_object(stac_object) # Sniff for PySTAC extension API since version 1.9.0 (which is not available below Python 3.9) # TODO: remove this once support for Python 3.7 and 3.8 is dropped @@ -760,39 +743,172 @@ def __init__(self, *, logger=_log, log_level=logging.DEBUG, supress_duplicate_wa # Use caching trick to avoid duplicate warnings self._warn = functools.lru_cache(maxsize=1000)(self._warn) + def metadata_from_stac_object(self, stac_object: pystac.STACObject) -> CubeMetadata: + """ + Build cube metadata from a STAC object. + """ + bands = self.bands_from_stac_object(stac_object) + dimensions = self.dimensions_from_stac_object(stac_object=stac_object, bands=bands) + return CubeMetadata(dimensions=dimensions) + + def dimensions_from_stac_object(self, stac_object: pystac.STACObject, bands: _BandList) -> List[Dimension]: + """ + Build dimension metadata from a STAC object. + + Philosophy: + - If cube:dimensions exists: treat it as source of truth (it may omit x/y/t/bands). + - Otherwise: apply openEO-style defaults (x, y, t) and (for Collection/Item) keep bands dimension even if empty. + """ + if self.has_cube_dimensions(stac_object): + dimensions = self.parse_declared_dimensions(stac_object=stac_object, bands=bands) + if not any(isinstance(d, BandDimension) for d in dimensions) and isinstance( + stac_object, (pystac.Collection, pystac.Item) + ): + dimensions.append(BandDimension(name="bands", bands=list(bands))) + return dimensions + + dimensions: List[Dimension] = [ + SpatialDimension(name="x", extent=[None, None]), + SpatialDimension(name="y", extent=[None, None]), + TemporalDimension(name="t", extent=self.infer_temporal_extent(stac_object)), + ] + if isinstance(stac_object, (pystac.Collection, pystac.Item)): + dimensions.append(BandDimension(name="bands", bands=list(bands))) + return dimensions + def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalDimension, None]: """ Extract the temporal dimension from a STAC Collection/Item (if any) """ - # TODO: also extract temporal dimension from assets? - if _PYSTAC_1_9_EXTENSION_INTERFACE: - if stac_obj.ext.has("cube") and hasattr(stac_obj.ext, "cube"): - temporal_dims = [ - (n, d.extent or [None, None]) - for (n, d) in stac_obj.ext.cube.dimensions.items() - if d.dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL + if self.has_cube_dimensions(stac_obj): + temporal_dimensions = [ + d + for d in self.parse_declared_dimensions(stac_object=stac_obj, bands=_BandList([])) + if isinstance(d, TemporalDimension) + ] + if len(temporal_dimensions) == 1: + return temporal_dimensions[0] + + if isinstance(stac_obj, (pystac.Collection, pystac.Item)): + return TemporalDimension(name="t", extent=self.infer_temporal_extent(stac_obj)) + + def has_cube_dimensions(self, stac_object: pystac.STACObject) -> bool: + cube_dimensions = self.cube_dimensions_dict(stac_object) + return isinstance(cube_dimensions, dict) and len(cube_dimensions) > 0 + + def cube_dimensions_dict(self, stac_object: pystac.STACObject) -> Dict[str, dict]: + """ + Return raw cube:dimensions dict from a Collection/Item, or {}. + """ + if isinstance(stac_object, pystac.Item): + return stac_object.properties.get("cube:dimensions", {}) or {} + if isinstance(stac_object, pystac.Collection): + return stac_object.extra_fields.get("cube:dimensions", {}) or {} + return {} + + def infer_temporal_extent(self, stac_object: pystac.STACObject) -> List[Optional[str]]: + """ + Best-effort temporal extent: + - Collection: extent.temporal interval + - Item: datetime or start/end + """ + if isinstance(stac_object, pystac.Collection) and stac_object.extent and stac_object.extent.temporal: + interval = stac_object.extent.temporal.intervals[0] + return [Rfc3339(propagate_none=True).normalize(d) for d in interval] + + if isinstance(stac_object, pystac.Item): + props = getattr(stac_object, "properties", {}) or {} + dt_ = props.get("datetime") + if dt_: + norm = Rfc3339(propagate_none=True).normalize(dt_) + return [norm, norm] + start = props.get("start_datetime") + end = props.get("end_datetime") + if start or end: + return [ + Rfc3339(propagate_none=True).normalize(start), + Rfc3339(propagate_none=True).normalize(end), ] - if len(temporal_dims) == 1: - name, extent = temporal_dims[0] - return TemporalDimension(name=name, extent=extent) - elif isinstance(stac_obj, pystac.Collection) and stac_obj.extent.temporal: - # No explicit "cube:dimensions": build fallback from "extent.temporal", - # with dimension name "t" (openEO API recommendation). - extent = [Rfc3339(propagate_none=True).normalize(d) for d in stac_obj.extent.temporal.intervals[0]] - return TemporalDimension(name="t", extent=extent) - else: - if isinstance(stac_obj, pystac.Item): - cube_dimensions = stac_obj.properties.get("cube:dimensions", {}) - elif isinstance(stac_obj, pystac.Collection): - cube_dimensions = stac_obj.extra_fields.get("cube:dimensions", {}) + + return [None, None] + + @staticmethod + def _safe_extent_from_pystac_cube_dim(dim) -> list: + """ + PySTAC cube dimension wrapper may raise if 'extent' is missing. + Also, depending on serialization/version, extent might live in extra_fields. + """ + try: + ext = dim.extent + except Exception: + ext = None + + if not ext: + extra = getattr(dim, "extra_fields", {}) or {} + ext = extra.get("extent") + + return ext or [None, None] + + def parse_declared_dimensions(self, stac_object: pystac.STACObject, bands: _BandList) -> List[Dimension]: + """ + Parse dimensions declared through cube:dimensions. + """ + if ( + _PYSTAC_1_9_EXTENSION_INTERFACE + and getattr(stac_object, "ext", None) is not None + and stac_object.ext.has("cube") + and hasattr(stac_object.ext, "cube") + ): + return self._parse_cube_dimensions_from_pystac_extension(stac_object=stac_object, bands=bands) + return self._parse_cube_dimensions_from_raw_dict(stac_object=stac_object, bands=bands) + + def _parse_cube_dimensions_from_pystac_extension( + self, stac_object: pystac.STACObject, bands: _BandList + ) -> List[Dimension]: + """ + Parse dimensions from PySTAC's cube extension wrapper (when present). + Important: PySTAC DimensionType only has SPATIAL + TEMPORAL. + Everything else is treated as band-like. + """ + dimensions = [] + for name, dim in stac_object.ext.cube.dimensions.items(): + dim_type = getattr(dim, "dim_type", None) + extent = self._safe_extent_from_pystac_cube_dim(dim) + + if dim_type == pystac.extensions.datacube.DimensionType.SPATIAL: + dimensions.append(SpatialDimension(name=name, extent=extent)) + elif dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL: + dimensions.append(TemporalDimension(name=name, extent=extent)) else: - cube_dimensions = {} - temporal_dims = [ - (n, d.get("extent", [None, None])) for (n, d) in cube_dimensions.items() if d.get("type") == "temporal" - ] - if len(temporal_dims) == 1: - name, extent = temporal_dims[0] - return TemporalDimension(name=name, extent=extent) + dimensions.append(BandDimension(name=name, bands=list(bands))) + + return dimensions + + def _parse_cube_dimensions_from_raw_dict(self, stac_object: pystac.STACObject, bands: _BandList) -> List[Dimension]: + """ + Parse dimensions from raw cube:dimensions dict. + Supports 'spatial', 'temporal', and ('bands' or 'spectral' as an alias). + """ + dimensions = [] + cube_dimensions = self.cube_dimensions_dict(stac_object) + + for name, dim in cube_dimensions.items(): + if not isinstance(dim, dict): + continue + + dim_type = dim.get("type") + extent = dim.get("extent", [None, None]) + + if dim_type == "spatial": + dimensions.append(SpatialDimension(name=name, extent=extent)) + elif dim_type == "temporal": + dimensions.append(TemporalDimension(name=name, extent=extent)) + elif dim_type in ("bands", "spectral"): + dimensions.append(BandDimension(name=name, bands=list(bands))) + else: + dimensions.append(Dimension(name=name, type=dim_type)) + + return dimensions def _band_from_eo_bands_metadata(self, band: Union[dict, pystac.extensions.eo.Band]) -> Band: """Construct band from metadata in eo v1.1 style""" diff --git a/tests/test_metadata.py b/tests/test_metadata.py index ef8fbf65e..a633ef87a 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -1156,7 +1156,10 @@ def test_metadata_from_stac_bands(tmp_path, test_stac, expected): # TODO #738 real request mocking of STAC resources compatible with pystac? path.write_text(json.dumps(test_stac)) metadata = metadata_from_stac(str(path)) - assert metadata.band_names == expected + if expected: + assert metadata.band_names == expected + else: + assert not metadata.has_band_dimension() @@ -1253,7 +1256,73 @@ def test_metadata_from_stac_temporal_dimension(tmp_path, stac_dict, expected): assert isinstance(dim, TemporalDimension) assert (dim.name, dim.extent) == expected else: - assert not metadata.has_temporal_dimension() + # With openEO defaults, a temporal dimension name ('t') can exist even when extent is unknown. + # Depending on STAC input, pystac/datacube parsing can produce: + # - a degenerate extent [d, d] when a single `datetime` is present + # - an "unknown" extent [None, None] when there is no temporal info + assert metadata.has_temporal_dimension() + extent = metadata.temporal_dimension.extent + if extent is None: + pass + else: + assert isinstance(extent, list) and len(extent) == 2 + # Allow "unknown" temporal extent representation + if extent == [None, None]: + pass + else: + # Allow degenerate interval when a single datetime is available + assert extent[0] == extent[1] + + + +# Dimension name resolution policy (STAC cube:dimensions vs openEO defaults) +@pytest.mark.parametrize( + ["stac_dict", "expected_dims"], + [ + ( + # No cube:dimensions -> fall back to openEO default naming convention + StacDummyBuilder.collection(summaries={"eo:bands": [{"name": "B01"}]}), + {"t", "bands", "y", "x"}, + ), + ( + # No cube:dimensions (item) -> fall back to openEO default naming convention + StacDummyBuilder.item( + properties={"datetime": "2020-05-22T00:00:00Z", "eo:bands": [{"name": "B01"}]} + ), + {"t", "bands", "y", "x"}, + ), + ( + # cube:dimensions present -> use the dimension names as suggested by cube:dimensions keys + StacDummyBuilder.collection( + cube_dimensions={ + "time": {"type": "temporal", "axis": "t", "extent": ["2024-04-04", "2024-06-06"]}, + "band": {"type": "bands", "axis": "bands", "values": ["B01"]}, + "y": {"type": "spatial", "axis": "y", "extent": [0, 1]}, + "x": {"type": "spatial", "axis": "x", "extent": [0, 1]}, + } + ), + {"time", "band", "y", "x"}, + ), + ], +) +def test_metadata_from_stac_dimension_policy_cube_dimensions_vs_default(tmp_path, stac_dict, expected_dims): + path = tmp_path / "stac.json" + # TODO #738 real request mocking of STAC resources compatible with pystac? + path.write_text(json.dumps(stac_dict)) + metadata = metadata_from_stac(str(path)) + + got = tuple(metadata.dimension_names() or ()) + + # Order-insensitive check: names only + assert set(got) == expected_dims + + # Ensure the policy logic is exercised correctly: + # cube:dimensions can be located at root (collection) or in properties (item) + cube_dims = stac_dict.get("cube:dimensions") or (stac_dict.get("properties") or {}).get("cube:dimensions") + if cube_dims is None: + assert set(got) == {"t", "bands", "y", "x"} + else: + assert set(got) == set(cube_dims.keys()) @pytest.mark.parametrize(