From 84e07bfd8b72ad1ebfcea787fc5d0e402f0a6be5 Mon Sep 17 00:00:00 2001 From: Florian de Boissieu <fdeboiss@gmail.com> Date: Fri, 24 Jan 2025 10:43:45 +0100 Subject: [PATCH 1/6] add function and method filter_assets --- simplestac/utils.py | 79 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 10 deletions(-) diff --git a/simplestac/utils.py b/simplestac/utils.py index e887a35..2708ccc 100644 --- a/simplestac/utils.py +++ b/simplestac/utils.py @@ -81,17 +81,8 @@ class ExtendPystacClasses: object If `inplace` is False, a cloned collection is returned. """ - if inplace: - x = self - else: - x = self.clone() + return self.filter_assets(pattern=pattern, inplace=inplace) - for item in x.items: - drop_assets_without_proj(item, pattern=pattern, inplace=True) - - if not inplace: - return x - def to_xarray(self, xy_coords="center", bbox=None, geometry=None, gdal_env=DEFAULT_GDAL_ENV, **kwargs): """Returns a DASK xarray() @@ -165,6 +156,18 @@ class ExtendPystacClasses: arr = arr.rio.clip(geometry) return arr + def filter_assets(self, assets=None, pattern=None, drop=False, inplace=False): + if inplace: + x = self + else: + x = self.clone() + + for item in x.items: + filter_assets(item, assets=assets, pattern=pattern, drop=drop, inplace=True) + + if not inplace: + return x + def filter(self, assets=None, with_assets=None, clone_items=True, **kwargs): """Filter items with stac-static search. Additional args: @@ -1054,6 +1057,62 @@ def drop_assets_without_proj(item, pattern="^proj:|^raster:", inplace=False): return item +def filter_assets( + item: pystac.Item, + assets: Union[str, list]=None, + pattern: str="^proj:|^raster:", + drop: bool=False, + inplace: bool=False): + """ + Filter assets from the given item according to pattern and asset keys. + + Parameters + ---------- + item: pystac.Item + The item from which to filter assets. + assets: Union[str, list], optional + The asset keys to match. + pattern: str, optional. + The pattern to search for in asset extra_fields keys. + drop: bool, optional + If True, the assets matching the pattern and the asset keys + are dropped. + inplace: bool, optional + If True, the assets will be filtered in place. + Otherwise, a clone of the item will be created and modified. + + Returns + ------ + pystac.Item + The modified item. + """ + if not inplace: + item = item.clone() + + if not pattern: + keep = item.assets.keys() + else: + keep = [] + for k,v in item.assets.items(): + if any([bool(re.search(pattern, p)) for p in v.extra_fields]): + keep.append(k) + + if assets is not None: + if not isinstance(assets, list): + assets = [assets] + keep = [k for k in keep if k in assets] + + if drop: + item.assets = {k:v for k,v in item.assets.items() if k not in keep} + else: + item.assets = {k:v for k,v in item.assets.items() if k in keep} + + if len(item.assets) == 0: + logger.warning(f"Item {item.id} has no assets left after filtering.") + + return item + + def harmonize_sen2cor_offset(x, assets=S2_SEN2COR_BANDS, inplace=False): """ Harmonize new Sentinel-2 item collection (Sen2Cor v4+, 2022-01-25) -- GitLab From 02f68db998e0e991083ae320c414a95e1cd47a9f Mon Sep 17 00:00:00 2001 From: Florian de Boissieu <fdeboiss@gmail.com> Date: Fri, 24 Jan 2025 10:45:39 +0100 Subject: [PATCH 2/6] add tests for filter_assets --- tests/test_remote.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_remote.py b/tests/test_remote.py index 225770b..05949b4 100644 --- a/tests/test_remote.py +++ b/tests/test_remote.py @@ -4,6 +4,16 @@ import pystac_client from tempfile import TemporaryDirectory import numpy as np + +def test_filter_assets(pc_col): + col = ItemCollection(pc_col) + col1 = col.filter_assets(assets=["B02", "B03"]) + assert len(col1[0].assets) == 2 + col1 = col.filter_assets(assets=["B02"], drop=True) + assert "B02" not in col1[0].assets + col1 = col.filter_assets(pattern="^proj:bbox", drop=False) + assert all(["proj:bbox" in a.extra_fields for a in col1[0].assets.values()]) + def test_to_xarray(pc_col): col = ItemCollection(pc_col) x = col.drop_non_raster().to_xarray() -- GitLab From 643423f6d407a87894453c577c15da44cdd46c1f Mon Sep 17 00:00:00 2001 From: Florian de Boissieu <fdeboiss@gmail.com> Date: Fri, 24 Jan 2025 10:46:22 +0100 Subject: [PATCH 3/6] fix for issue #2 --- simplestac/local.py | 24 ++++++++++++++-------- simplestac/utils.py | 50 +++++++++++++++++++++++++++++++-------------- 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/simplestac/local.py b/simplestac/local.py index c78d644..f90613b 100644 --- a/simplestac/local.py +++ b/simplestac/local.py @@ -381,20 +381,26 @@ def properties_from_assets(assets, update_assets=True): Bounding box in WGS84, WGS84 geometry in GeoJSON, and properties. """ properties = {} - assets = [(k, v) for k, v in assets.items()] - df_assets = DataFrame(assets, columns=["key", "asset"]) - epsg_list = df_assets["asset"].apply(lambda x: x.extra_fields["proj:epsg"]) - bbox_list = df_assets["asset"].apply(lambda x: box(*x.extra_fields["proj:bbox"])) - if len(epsg_list.unique()) == 1: + epsg_list = [] + bbox_list = [] + for k, v in assets.items(): + if "proj:epsg" in v.extra_fields and "proj:bbox" in v.extra_fields: + epsg = v.extra_fields["proj:epsg"] + bbox = gpd.GeoSeries(box(*v.extra_fields["proj:bbox"]), crs=epsg).to_crs(4326) + epsg_list.append(epsg) + bbox_list.append(bbox) + + if len(set(epsg_list)) == 1 and epsg_list[0] is not None: properties.update({ "proj:epsg" : int(epsg_list[0]) }) - if update_assets: - # remove epsg from extra_fields - df_assets["asset"].apply(lambda x: x.extra_fields.pop("proj:epsg")) + # remove epsg from extra_fields + for k, v in assets.items(): + if "proj:epsg" in v.extra_fields: + v.extra_fields.pop("proj:epsg") - g = unary_union([gpd.GeoSeries(bbox, crs=epsg).to_crs(4326).geometry for bbox, epsg in zip(bbox_list, epsg_list)]) + g = unary_union(bbox_list) bbox_wgs = list(g.bounds) geometry = json.loads(to_geojson(g)) return bbox_wgs, geometry, properties diff --git a/simplestac/utils.py b/simplestac/utils.py index 2708ccc..561cec1 100644 --- a/simplestac/utils.py +++ b/simplestac/utils.py @@ -17,6 +17,7 @@ import stackstac import xarray as xr import rioxarray # necessary to activate rio plugin in xarray from tempfile import TemporaryDirectory, NamedTemporaryFile +import time from tqdm import tqdm from typing import Union, Iterable import warnings @@ -654,7 +655,7 @@ def write_assets(x: Union[ItemCollection, pystac.Item], output_dir: str, bbox=None, geometry=None, - update=True, + keep_asset_attrs=True, xy_coords='center', remove_item_props=DEFAULT_REMOVE_PROPS, overwrite=False, @@ -759,33 +760,52 @@ def write_assets(x: Union[ItemCollection, pystac.Item], wa = writer_args[b] else: wa = kwargs - try: if file.exists() and not overwrite: - logger.debug(f"File already exists, skipping asset: {file}") + logger.info(f"File already exists, skipping asset: {file}") else: - write_raster(arr.sel(band=b), file, **wa) - + done = False + max_retry = 10 + retry = 0 + wait = 2 + while not done and retry != max_retry: + try: + write_raster(arr.sel(band=b), file, **wa) + done=True + except RuntimeError as e: + logger.info(e) + if 'HTTP response code: 403' in str(e): + retry += 1 + logger.info( + f"Failed to read the asset '{b}' of item '{item.id}', " + f"retrying in {wait*retry} minutes ({retry}/{max_retry}).") + time.sleep(wait*60) + else: + raise e + + if not file.exists(): + raise Exception(f"File was not written: {file}") + # update stac asset info stac_info = stac_asset_info_from_raster(file) - if update: + if keep_asset_attrs: asset_info = item.assets[b].to_dict() asset_info.update(stac_info) stac_info = asset_info asset = pystac.Asset.from_dict(stac_info) item.add_asset(key=b, asset=asset) except RuntimeError as e: - logger.debug(e) - logger.debug(f'Skipping asset "{b}" for "{item.id}".') + logger.info(e) + logger.info(f"There was an error writing the asset '{b}' of item '{item.id}', skipping it.") file.remove_p() item.assets.pop(b, None) - try: - update_item_properties(item, remove_item_props=remove_item_props) - items.append(item) - except RuntimeError as e: - logger.debug(e) - logger.info(f'Item "{item.id}" is empty, skipping it.') - item_dir.rmtree_p() + # try: + update_item_properties(item, remove_item_props=remove_item_props) + items.append(item) + # except RuntimeError as e: + # logger.info(e) + # logger.info(f'Item "{item.id}" is empty, skipping it.') + # item_dir.rmtree_p() if not inplace: return x -- GitLab From c541d2cb5b790fd3e5fb5047c0bdd888941add12 Mon Sep 17 00:00:00 2001 From: Florian de Boissieu <fdeboiss@gmail.com> Date: Mon, 17 Feb 2025 11:51:15 +0100 Subject: [PATCH 4/6] fix documentation for previous commit --- simplestac/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/simplestac/utils.py b/simplestac/utils.py index 561cec1..9daf5ca 100644 --- a/simplestac/utils.py +++ b/simplestac/utils.py @@ -683,11 +683,11 @@ def write_assets(x: Union[ItemCollection, pystac.Item], Argument forwarded to ItemCollection.to_xarray to rioxarray.clip the assets to. Usually a GeoDataFrame or GeoSeries. See notes. - update : bool, optional - Whether to update the item properties with the new asset paths. + keep_asset_attrs : bool, optional + Whether to keep the asset attributes in the returned item collection. Defaults to True. xy_coords : str, optional - The coordinate system to use for the x and y coordinates of the + Argument forwarded to ItemCollection.to_xarray. remove_item_props : list of str List of regex patterns to remove from item properties. If None, no properties are removed. -- GitLab From 45db68a36c036dd8e8f04d0ff51bf82744e90fc1 Mon Sep 17 00:00:00 2001 From: Florian de Boissieu <fdeboiss@gmail.com> Date: Mon, 17 Feb 2025 14:53:28 +0100 Subject: [PATCH 5/6] fix pystac < 1.12 in dependencies --- environment.yml | 3 ++- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 2db3769..42c4d7e 100644 --- a/environment.yml +++ b/environment.yml @@ -20,7 +20,8 @@ dependencies: - git - geopandas - pygeofilter - - pystac + # fix pystac version due to stackstac issue: https://github.com/gjoseph92/stackstac/issues/262 + - pystac < 1.12 - pyarrow - pip - pip: diff --git a/pyproject.toml b/pyproject.toml index a7b5ab0..e545d8d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ dependencies = [ "tqdm", "path", - "pystac", + "pystac < 1.12", "rioxarray", "stac_static@git+https://github.com/jsignell/stac-static", "stackstac", -- GitLab From ba761417ec14f23a77bffb984eca6f7fddefba3b Mon Sep 17 00:00:00 2001 From: Florian de Boissieu <fdeboiss@gmail.com> Date: Mon, 17 Feb 2025 18:13:30 +0100 Subject: [PATCH 6/6] update changelog --- CHANGELOG.md | 9 ++++++++- simplestac/utils.py | 6 +----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 703d9e5..5f43ed4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ +# v1.2.2 +## Add +- `ItemCollection.filter_assets`: filter assets (keep or drop) + +## Fix +- log as info if writing had an error (issue #2) + # v1.2.1 -## add +## Add - add xarray.Dataset support to apply_formula - make write_raster ready for delayed write diff --git a/simplestac/utils.py b/simplestac/utils.py index 9daf5ca..5e59877 100644 --- a/simplestac/utils.py +++ b/simplestac/utils.py @@ -799,13 +799,9 @@ def write_assets(x: Union[ItemCollection, pystac.Item], logger.info(f"There was an error writing the asset '{b}' of item '{item.id}', skipping it.") file.remove_p() item.assets.pop(b, None) - # try: + update_item_properties(item, remove_item_props=remove_item_props) items.append(item) - # except RuntimeError as e: - # logger.info(e) - # logger.info(f'Item "{item.id}" is empty, skipping it.') - # item_dir.rmtree_p() if not inplace: return x -- GitLab