Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
216 changes: 166 additions & 50 deletions openeo/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ class DimensionAlreadyExistsException(MetadataException):


# TODO: make these dimension classes immutable data classes
# TODO: align better with STAC datacube extension
# TODO: align/adapt/integrate with pystac's datacube extension implementation?
class Dimension:
"""Base class for dimensions."""

Expand Down Expand Up @@ -71,7 +69,6 @@ def rename_labels(self, target, source) -> Dimension:

class SpatialDimension(Dimension):
# TODO: align better with STAC datacube extension: e.g. support "axis" (x or y)

DEFAULT_CRS = 4326

def __init__(
Expand Down Expand Up @@ -679,30 +676,16 @@ def metadata_from_stac(url: str) -> CubeMetadata:
"""
Reads the band metadata a static STAC catalog or a STAC API Collection and returns it as a :py:class:`CubeMetadata`

Policy:
- If cube:dimensions exists: treat it as source of truth (it may omit x/y/t/bands).
- Otherwise: apply openEO-style defaults (x, y, t) and (for Collection/Item) keep bands dimension even if empty.

:param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection
:return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url.
"""
stac_object = pystac.read_file(href=url)
bands = _StacMetadataParser().bands_from_stac_object(stac_object)

# At least assume there are spatial dimensions
# TODO #743: are there conditions in which we even should not assume the presence of spatial dimensions?
dimensions = [
SpatialDimension(name="x", extent=[None, None]),
SpatialDimension(name="y", extent=[None, None]),
]

# TODO #743: conditionally include band dimension when there was actual indication of band metadata?
band_dimension = BandDimension(name="bands", bands=bands)
dimensions.append(band_dimension)

# TODO: is it possible to derive the actual name of temporal dimension that the backend will use?
temporal_dimension = _StacMetadataParser().get_temporal_dimension(stac_object)
if temporal_dimension:
dimensions.append(temporal_dimension)

metadata = CubeMetadata(dimensions=dimensions)
return metadata
parser = _StacMetadataParser()
return parser.metadata_from_stac_object(stac_object)

# Sniff for PySTAC extension API since version 1.9.0 (which is not available below Python 3.9)
# TODO: remove this once support for Python 3.7 and 3.8 is dropped
Expand Down Expand Up @@ -760,39 +743,172 @@ def __init__(self, *, logger=_log, log_level=logging.DEBUG, supress_duplicate_wa
# Use caching trick to avoid duplicate warnings
self._warn = functools.lru_cache(maxsize=1000)(self._warn)

def metadata_from_stac_object(self, stac_object: pystac.STACObject) -> CubeMetadata:
"""
Build cube metadata from a STAC object.
"""
bands = self.bands_from_stac_object(stac_object)
dimensions = self.dimensions_from_stac_object(stac_object=stac_object, bands=bands)
return CubeMetadata(dimensions=dimensions)

def dimensions_from_stac_object(self, stac_object: pystac.STACObject, bands: _BandList) -> List[Dimension]:
"""
Build dimension metadata from a STAC object.

Philosophy:
- If cube:dimensions exists: treat it as source of truth (it may omit x/y/t/bands).
- Otherwise: apply openEO-style defaults (x, y, t) and (for Collection/Item) keep bands dimension even if empty.
"""
if self.has_cube_dimensions(stac_object):
dimensions = self.parse_declared_dimensions(stac_object=stac_object, bands=bands)
if not any(isinstance(d, BandDimension) for d in dimensions) and isinstance(
stac_object, (pystac.Collection, pystac.Item)
):
dimensions.append(BandDimension(name="bands", bands=list(bands)))
return dimensions

dimensions: List[Dimension] = [
SpatialDimension(name="x", extent=[None, None]),
SpatialDimension(name="y", extent=[None, None]),
TemporalDimension(name="t", extent=self.infer_temporal_extent(stac_object)),
]
if isinstance(stac_object, (pystac.Collection, pystac.Item)):
dimensions.append(BandDimension(name="bands", bands=list(bands)))
return dimensions

def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalDimension, None]:
"""
Extract the temporal dimension from a STAC Collection/Item (if any)
"""
# TODO: also extract temporal dimension from assets?
if _PYSTAC_1_9_EXTENSION_INTERFACE:
if stac_obj.ext.has("cube") and hasattr(stac_obj.ext, "cube"):
temporal_dims = [
(n, d.extent or [None, None])
for (n, d) in stac_obj.ext.cube.dimensions.items()
if d.dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL
if self.has_cube_dimensions(stac_obj):
temporal_dimensions = [
d
for d in self.parse_declared_dimensions(stac_object=stac_obj, bands=_BandList([]))
if isinstance(d, TemporalDimension)
]
if len(temporal_dimensions) == 1:
return temporal_dimensions[0]

if isinstance(stac_obj, (pystac.Collection, pystac.Item)):
return TemporalDimension(name="t", extent=self.infer_temporal_extent(stac_obj))

def has_cube_dimensions(self, stac_object: pystac.STACObject) -> bool:
cube_dimensions = self.cube_dimensions_dict(stac_object)
return isinstance(cube_dimensions, dict) and len(cube_dimensions) > 0

def cube_dimensions_dict(self, stac_object: pystac.STACObject) -> Dict[str, dict]:
"""
Return raw cube:dimensions dict from a Collection/Item, or {}.
"""
if isinstance(stac_object, pystac.Item):
return stac_object.properties.get("cube:dimensions", {}) or {}
if isinstance(stac_object, pystac.Collection):
return stac_object.extra_fields.get("cube:dimensions", {}) or {}
return {}

def infer_temporal_extent(self, stac_object: pystac.STACObject) -> List[Optional[str]]:
"""
Best-effort temporal extent:
- Collection: extent.temporal interval
- Item: datetime or start/end
"""
if isinstance(stac_object, pystac.Collection) and stac_object.extent and stac_object.extent.temporal:
interval = stac_object.extent.temporal.intervals[0]
return [Rfc3339(propagate_none=True).normalize(d) for d in interval]

if isinstance(stac_object, pystac.Item):
props = getattr(stac_object, "properties", {}) or {}
dt_ = props.get("datetime")
if dt_:
norm = Rfc3339(propagate_none=True).normalize(dt_)
return [norm, norm]
start = props.get("start_datetime")
end = props.get("end_datetime")
if start or end:
return [
Rfc3339(propagate_none=True).normalize(start),
Rfc3339(propagate_none=True).normalize(end),
]
if len(temporal_dims) == 1:
name, extent = temporal_dims[0]
return TemporalDimension(name=name, extent=extent)
elif isinstance(stac_obj, pystac.Collection) and stac_obj.extent.temporal:
# No explicit "cube:dimensions": build fallback from "extent.temporal",
# with dimension name "t" (openEO API recommendation).
extent = [Rfc3339(propagate_none=True).normalize(d) for d in stac_obj.extent.temporal.intervals[0]]
return TemporalDimension(name="t", extent=extent)
else:
if isinstance(stac_obj, pystac.Item):
cube_dimensions = stac_obj.properties.get("cube:dimensions", {})
elif isinstance(stac_obj, pystac.Collection):
cube_dimensions = stac_obj.extra_fields.get("cube:dimensions", {})

return [None, None]

@staticmethod
def _safe_extent_from_pystac_cube_dim(dim) -> list:
"""
PySTAC cube dimension wrapper may raise if 'extent' is missing.
Also, depending on serialization/version, extent might live in extra_fields.
"""
try:
ext = dim.extent
except Exception:
ext = None

if not ext:
extra = getattr(dim, "extra_fields", {}) or {}
ext = extra.get("extent")

return ext or [None, None]

def parse_declared_dimensions(self, stac_object: pystac.STACObject, bands: _BandList) -> List[Dimension]:
"""
Parse dimensions declared through cube:dimensions.
"""
if (
_PYSTAC_1_9_EXTENSION_INTERFACE
and getattr(stac_object, "ext", None) is not None
and stac_object.ext.has("cube")
and hasattr(stac_object.ext, "cube")
):
return self._parse_cube_dimensions_from_pystac_extension(stac_object=stac_object, bands=bands)
return self._parse_cube_dimensions_from_raw_dict(stac_object=stac_object, bands=bands)

def _parse_cube_dimensions_from_pystac_extension(
self, stac_object: pystac.STACObject, bands: _BandList
) -> List[Dimension]:
"""
Parse dimensions from PySTAC's cube extension wrapper (when present).
Important: PySTAC DimensionType only has SPATIAL + TEMPORAL.
Everything else is treated as band-like.
"""
dimensions = []
for name, dim in stac_object.ext.cube.dimensions.items():
dim_type = getattr(dim, "dim_type", None)
extent = self._safe_extent_from_pystac_cube_dim(dim)

if dim_type == pystac.extensions.datacube.DimensionType.SPATIAL:
dimensions.append(SpatialDimension(name=name, extent=extent))
elif dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL:
dimensions.append(TemporalDimension(name=name, extent=extent))
else:
cube_dimensions = {}
temporal_dims = [
(n, d.get("extent", [None, None])) for (n, d) in cube_dimensions.items() if d.get("type") == "temporal"
]
if len(temporal_dims) == 1:
name, extent = temporal_dims[0]
return TemporalDimension(name=name, extent=extent)
dimensions.append(BandDimension(name=name, bands=list(bands)))

return dimensions

def _parse_cube_dimensions_from_raw_dict(self, stac_object: pystac.STACObject, bands: _BandList) -> List[Dimension]:
"""
Parse dimensions from raw cube:dimensions dict.
Supports 'spatial', 'temporal', and ('bands' or 'spectral' as an alias).
"""
dimensions = []
cube_dimensions = self.cube_dimensions_dict(stac_object)

for name, dim in cube_dimensions.items():
if not isinstance(dim, dict):
continue

dim_type = dim.get("type")
extent = dim.get("extent", [None, None])

if dim_type == "spatial":
dimensions.append(SpatialDimension(name=name, extent=extent))
elif dim_type == "temporal":
dimensions.append(TemporalDimension(name=name, extent=extent))
elif dim_type in ("bands", "spectral"):
dimensions.append(BandDimension(name=name, bands=list(bands)))
else:
dimensions.append(Dimension(name=name, type=dim_type))

return dimensions

def _band_from_eo_bands_metadata(self, band: Union[dict, pystac.extensions.eo.Band]) -> Band:
"""Construct band from metadata in eo v1.1 style"""
Expand Down
73 changes: 71 additions & 2 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1156,7 +1156,10 @@ def test_metadata_from_stac_bands(tmp_path, test_stac, expected):
# TODO #738 real request mocking of STAC resources compatible with pystac?
path.write_text(json.dumps(test_stac))
metadata = metadata_from_stac(str(path))
assert metadata.band_names == expected
if expected:
assert metadata.band_names == expected
else:
assert not metadata.has_band_dimension()



Expand Down Expand Up @@ -1253,7 +1256,73 @@ def test_metadata_from_stac_temporal_dimension(tmp_path, stac_dict, expected):
assert isinstance(dim, TemporalDimension)
assert (dim.name, dim.extent) == expected
else:
assert not metadata.has_temporal_dimension()
# With openEO defaults, a temporal dimension name ('t') can exist even when extent is unknown.
# Depending on STAC input, pystac/datacube parsing can produce:
# - a degenerate extent [d, d] when a single `datetime` is present
# - an "unknown" extent [None, None] when there is no temporal info
assert metadata.has_temporal_dimension()
extent = metadata.temporal_dimension.extent
if extent is None:
pass
else:
assert isinstance(extent, list) and len(extent) == 2
# Allow "unknown" temporal extent representation
if extent == [None, None]:
pass
else:
# Allow degenerate interval when a single datetime is available
assert extent[0] == extent[1]



# Dimension name resolution policy (STAC cube:dimensions vs openEO defaults)
@pytest.mark.parametrize(
["stac_dict", "expected_dims"],
[
(
# No cube:dimensions -> fall back to openEO default naming convention
StacDummyBuilder.collection(summaries={"eo:bands": [{"name": "B01"}]}),
{"t", "bands", "y", "x"},
),
(
# No cube:dimensions (item) -> fall back to openEO default naming convention
StacDummyBuilder.item(
properties={"datetime": "2020-05-22T00:00:00Z", "eo:bands": [{"name": "B01"}]}
),
{"t", "bands", "y", "x"},
),
(
# cube:dimensions present -> use the dimension names as suggested by cube:dimensions keys
StacDummyBuilder.collection(
cube_dimensions={
"time": {"type": "temporal", "axis": "t", "extent": ["2024-04-04", "2024-06-06"]},
"band": {"type": "bands", "axis": "bands", "values": ["B01"]},
"y": {"type": "spatial", "axis": "y", "extent": [0, 1]},
"x": {"type": "spatial", "axis": "x", "extent": [0, 1]},
}
),
{"time", "band", "y", "x"},
),
],
)
def test_metadata_from_stac_dimension_policy_cube_dimensions_vs_default(tmp_path, stac_dict, expected_dims):
path = tmp_path / "stac.json"
# TODO #738 real request mocking of STAC resources compatible with pystac?
path.write_text(json.dumps(stac_dict))
metadata = metadata_from_stac(str(path))

got = tuple(metadata.dimension_names() or ())

# Order-insensitive check: names only
assert set(got) == expected_dims

# Ensure the policy logic is exercised correctly:
# cube:dimensions can be located at root (collection) or in properties (item)
cube_dims = stac_dict.get("cube:dimensions") or (stac_dict.get("properties") or {}).get("cube:dimensions")
if cube_dims is None:
assert set(got) == {"t", "bands", "y", "x"}
else:
assert set(got) == set(cube_dims.keys())


@pytest.mark.parametrize(
Expand Down