From 84e07bfd8b72ad1ebfcea787fc5d0e402f0a6be5 Mon Sep 17 00:00:00 2001
From: Florian de Boissieu <fdeboiss@gmail.com>
Date: Fri, 24 Jan 2025 10:43:45 +0100
Subject: [PATCH 1/6] add function and method filter_assets

---
 simplestac/utils.py | 79 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 69 insertions(+), 10 deletions(-)

diff --git a/simplestac/utils.py b/simplestac/utils.py
index e887a35..2708ccc 100644
--- a/simplestac/utils.py
+++ b/simplestac/utils.py
@@ -81,17 +81,8 @@ class ExtendPystacClasses:
         object
             If `inplace` is False, a cloned collection is returned.       
         """
-        if inplace:
-            x = self
-        else:
-            x = self.clone()
+        return self.filter_assets(pattern=pattern, inplace=inplace)
         
-        for item in x.items: 
-            drop_assets_without_proj(item, pattern=pattern, inplace=True)
-
-        if not inplace:
-            return x
-
     def to_xarray(self, xy_coords="center", bbox=None, geometry=None, gdal_env=DEFAULT_GDAL_ENV, **kwargs):
         """Returns a DASK xarray()
         
@@ -165,6 +156,18 @@ class ExtendPystacClasses:
             arr = arr.rio.clip(geometry)
         return arr
     
+    def filter_assets(self, assets=None, pattern=None, drop=False, inplace=False):
+        if inplace:
+            x = self
+        else:
+            x = self.clone()
+        
+        for item in x.items: 
+            filter_assets(item, assets=assets, pattern=pattern, drop=drop, inplace=True)
+
+        if not inplace:
+            return x
+
     def filter(self, assets=None, with_assets=None, clone_items=True, **kwargs):
         """Filter items with stac-static search.
         Additional args:
@@ -1054,6 +1057,62 @@ def drop_assets_without_proj(item, pattern="^proj:|^raster:", inplace=False):
 
     return item
 
+def filter_assets(
+        item: pystac.Item,
+        assets: Union[str, list]=None,
+        pattern: str="^proj:|^raster:",
+        drop: bool=False,
+        inplace: bool=False):
+    """
+    Filter assets from the given item according to pattern and asset keys.
+
+    Parameters
+    ----------
+    item: pystac.Item
+      The item from which to filter assets.
+    assets: Union[str, list], optional
+        The asset keys to match.
+    pattern: str, optional. 
+        The pattern to search for in asset extra_fields keys.
+    drop: bool, optional
+        If True, the assets matching the pattern and the asset keys
+        are dropped.
+    inplace: bool, optional
+        If True, the assets will be filtered in place.
+        Otherwise, a clone of the item will be created and modified.
+
+    Returns
+    ------
+    pystac.Item
+        The modified item.
+    """
+    if not inplace:
+        item = item.clone()
+    
+    if not pattern:
+        keep = item.assets.keys()
+    else:
+        keep = []
+        for k,v in item.assets.items():
+            if any([bool(re.search(pattern, p)) for p in v.extra_fields]):
+                keep.append(k)
+    
+    if assets is not None:
+        if not isinstance(assets, list):
+            assets = [assets]
+        keep = [k for k in keep if k in assets]
+
+    if drop:
+        item.assets = {k:v for k,v in item.assets.items() if k not in keep}
+    else:
+        item.assets = {k:v for k,v in item.assets.items() if k in keep}
+
+    if len(item.assets) == 0:
+        logger.warning(f"Item {item.id} has no assets left after filtering.")
+
+    return item
+
+
 def harmonize_sen2cor_offset(x, assets=S2_SEN2COR_BANDS, inplace=False):
     """
     Harmonize new Sentinel-2 item collection (Sen2Cor v4+, 2022-01-25)
-- 
GitLab


From 02f68db998e0e991083ae320c414a95e1cd47a9f Mon Sep 17 00:00:00 2001
From: Florian de Boissieu <fdeboiss@gmail.com>
Date: Fri, 24 Jan 2025 10:45:39 +0100
Subject: [PATCH 2/6] add tests for filter_assets

---
 tests/test_remote.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/test_remote.py b/tests/test_remote.py
index 225770b..05949b4 100644
--- a/tests/test_remote.py
+++ b/tests/test_remote.py
@@ -4,6 +4,16 @@ import pystac_client
 from tempfile import TemporaryDirectory
 import numpy as np
 
+
+def test_filter_assets(pc_col):
+    col = ItemCollection(pc_col)
+    col1 = col.filter_assets(assets=["B02", "B03"])
+    assert len(col1[0].assets) == 2
+    col1 = col.filter_assets(assets=["B02"], drop=True)
+    assert "B02" not in col1[0].assets
+    col1 = col.filter_assets(pattern="^proj:bbox", drop=False)
+    assert all(["proj:bbox" in a.extra_fields for a in col1[0].assets.values()])
+
 def test_to_xarray(pc_col):
     col = ItemCollection(pc_col)
     x = col.drop_non_raster().to_xarray()
-- 
GitLab


From 643423f6d407a87894453c577c15da44cdd46c1f Mon Sep 17 00:00:00 2001
From: Florian de Boissieu <fdeboiss@gmail.com>
Date: Fri, 24 Jan 2025 10:46:22 +0100
Subject: [PATCH 3/6] fix for issue #2

---
 simplestac/local.py | 24 ++++++++++++++--------
 simplestac/utils.py | 50 +++++++++++++++++++++++++++++++--------------
 2 files changed, 50 insertions(+), 24 deletions(-)

diff --git a/simplestac/local.py b/simplestac/local.py
index c78d644..f90613b 100644
--- a/simplestac/local.py
+++ b/simplestac/local.py
@@ -381,20 +381,26 @@ def properties_from_assets(assets, update_assets=True):
         Bounding box in WGS84, WGS84 geometry in GeoJSON, and properties.
     """
     properties = {}
-    assets = [(k, v) for k, v in assets.items()]
-    df_assets = DataFrame(assets, columns=["key", "asset"])
-    epsg_list = df_assets["asset"].apply(lambda x: x.extra_fields["proj:epsg"])
-    bbox_list = df_assets["asset"].apply(lambda x: box(*x.extra_fields["proj:bbox"]))
-    if len(epsg_list.unique()) == 1:
+    epsg_list = []
+    bbox_list = []
+    for k, v in assets.items():
+        if "proj:epsg" in v.extra_fields and "proj:bbox" in v.extra_fields:
+            epsg = v.extra_fields["proj:epsg"]
+            bbox = gpd.GeoSeries(box(*v.extra_fields["proj:bbox"]), crs=epsg).to_crs(4326)
+            epsg_list.append(epsg)
+            bbox_list.append(bbox)
+    
+    if len(set(epsg_list)) == 1 and epsg_list[0] is not None:
         properties.update({
             "proj:epsg" : int(epsg_list[0])
         })
-
         if update_assets:
-        # remove epsg from extra_fields
-            df_assets["asset"].apply(lambda x: x.extra_fields.pop("proj:epsg"))
+            # remove epsg from extra_fields
+            for k, v in assets.items():
+                if "proj:epsg" in v.extra_fields:
+                    v.extra_fields.pop("proj:epsg")
 
-    g = unary_union([gpd.GeoSeries(bbox, crs=epsg).to_crs(4326).geometry for bbox, epsg in zip(bbox_list, epsg_list)])
+    g = unary_union(bbox_list)
     bbox_wgs = list(g.bounds)
     geometry = json.loads(to_geojson(g))
     return bbox_wgs, geometry, properties
diff --git a/simplestac/utils.py b/simplestac/utils.py
index 2708ccc..561cec1 100644
--- a/simplestac/utils.py
+++ b/simplestac/utils.py
@@ -17,6 +17,7 @@ import stackstac
 import xarray as xr
 import rioxarray # necessary to activate rio plugin in xarray
 from tempfile import TemporaryDirectory, NamedTemporaryFile
+import time
 from tqdm import tqdm
 from typing import Union, Iterable
 import warnings
@@ -654,7 +655,7 @@ def write_assets(x: Union[ItemCollection, pystac.Item],
                  output_dir: str,
                  bbox=None,
                  geometry=None,
-                 update=True,
+                 keep_asset_attrs=True,
                  xy_coords='center', 
                  remove_item_props=DEFAULT_REMOVE_PROPS,
                  overwrite=False,
@@ -759,33 +760,52 @@ def write_assets(x: Union[ItemCollection, pystac.Item],
                 wa = writer_args[b]
             else:
                 wa = kwargs
-            
             try:
                 if file.exists() and not overwrite:
-                    logger.debug(f"File already exists, skipping asset: {file}")
+                    logger.info(f"File already exists, skipping asset: {file}")
                 else:
-                    write_raster(arr.sel(band=b), file, **wa)
-                
+                    done = False
+                    max_retry = 10
+                    retry = 0
+                    wait = 2
+                    while not done and retry != max_retry:
+                        try:
+                            write_raster(arr.sel(band=b), file, **wa)
+                            done=True
+                        except RuntimeError as e:
+                            logger.info(e)
+                            if 'HTTP response code: 403' in str(e):
+                                retry += 1
+                                logger.info(
+                                    f"Failed to read the asset '{b}' of item '{item.id}', "
+                                    f"retrying in {wait*retry} minutes ({retry}/{max_retry}).")
+                                time.sleep(wait*60)
+                            else:
+                                raise e
+
+                    if not file.exists():
+                        raise Exception(f"File was not written: {file}")
+                    
                 # update stac asset info            
                 stac_info = stac_asset_info_from_raster(file)
-                if update:
+                if keep_asset_attrs:
                     asset_info = item.assets[b].to_dict()
                     asset_info.update(stac_info)
                     stac_info = asset_info
                 asset = pystac.Asset.from_dict(stac_info)
                 item.add_asset(key=b, asset=asset)
             except RuntimeError as e:
-                logger.debug(e)
-                logger.debug(f'Skipping asset "{b}" for "{item.id}".')
+                logger.info(e)
+                logger.info(f"There was an error writing the asset '{b}' of item '{item.id}', skipping it.")
                 file.remove_p()
                 item.assets.pop(b, None)
-        try:
-            update_item_properties(item, remove_item_props=remove_item_props)
-            items.append(item)
-        except RuntimeError as e:
-            logger.debug(e)
-            logger.info(f'Item "{item.id}" is empty, skipping it.')
-            item_dir.rmtree_p()
+        # try:
+        update_item_properties(item, remove_item_props=remove_item_props)
+        items.append(item)
+        # except RuntimeError as e:
+        #     logger.info(e)
+        #     logger.info(f'Item "{item.id}" is empty, skipping it.')
+        #     item_dir.rmtree_p()
     
     if not inplace:
         return x
-- 
GitLab


From c541d2cb5b790fd3e5fb5047c0bdd888941add12 Mon Sep 17 00:00:00 2001
From: Florian de Boissieu <fdeboiss@gmail.com>
Date: Mon, 17 Feb 2025 11:51:15 +0100
Subject: [PATCH 4/6] fix documentation for previous commit

---
 simplestac/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/simplestac/utils.py b/simplestac/utils.py
index 561cec1..9daf5ca 100644
--- a/simplestac/utils.py
+++ b/simplestac/utils.py
@@ -683,11 +683,11 @@ def write_assets(x: Union[ItemCollection, pystac.Item],
         Argument forwarded to ItemCollection.to_xarray to rioxarray.clip the assets to.
         Usually a GeoDataFrame or GeoSeries.
         See notes.
-    update : bool, optional
-        Whether to update the item properties with the new asset paths.
+    keep_asset_attrs : bool, optional
+        Whether to keep the asset attributes in the returned item collection.
         Defaults to True.
     xy_coords : str, optional
-        The coordinate system to use for the x and y coordinates of the
+        Argument forwarded to ItemCollection.to_xarray.
     remove_item_props : list of str
         List of regex patterns to remove from item properties.
         If None, no properties are removed.
-- 
GitLab


From 45db68a36c036dd8e8f04d0ff51bf82744e90fc1 Mon Sep 17 00:00:00 2001
From: Florian de Boissieu <fdeboiss@gmail.com>
Date: Mon, 17 Feb 2025 14:53:28 +0100
Subject: [PATCH 5/6] fix pystac < 1.12 in dependencies

---
 environment.yml | 3 ++-
 pyproject.toml  | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/environment.yml b/environment.yml
index 2db3769..42c4d7e 100644
--- a/environment.yml
+++ b/environment.yml
@@ -20,7 +20,8 @@ dependencies:
   - git
   - geopandas
   - pygeofilter
-  - pystac
+  # fix pystac version due to stackstac issue: https://github.com/gjoseph92/stackstac/issues/262
+  - pystac < 1.12
   - pyarrow
   - pip
   - pip:
diff --git a/pyproject.toml b/pyproject.toml
index a7b5ab0..e545d8d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,7 +22,7 @@ classifiers = [
 dependencies = [
   "tqdm",
   "path",
-  "pystac",
+  "pystac < 1.12",
   "rioxarray",
   "stac_static@git+https://github.com/jsignell/stac-static",
   "stackstac",
-- 
GitLab


From ba761417ec14f23a77bffb984eca6f7fddefba3b Mon Sep 17 00:00:00 2001
From: Florian de Boissieu <fdeboiss@gmail.com>
Date: Mon, 17 Feb 2025 18:13:30 +0100
Subject: [PATCH 6/6] update changelog

---
 CHANGELOG.md        | 9 ++++++++-
 simplestac/utils.py | 6 +-----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 703d9e5..5f43ed4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,12 @@
+# v1.2.2
+## Add
+- `ItemCollection.filter_assets`: filter assets (keep or drop)
+
+## Fix
+- log as info if writing had an error (issue #2)
+
 # v1.2.1
-## add
+## Add
 - add xarray.Dataset support to apply_formula
 - make write_raster ready for delayed write
 
diff --git a/simplestac/utils.py b/simplestac/utils.py
index 9daf5ca..5e59877 100644
--- a/simplestac/utils.py
+++ b/simplestac/utils.py
@@ -799,13 +799,9 @@ def write_assets(x: Union[ItemCollection, pystac.Item],
                 logger.info(f"There was an error writing the asset '{b}' of item '{item.id}', skipping it.")
                 file.remove_p()
                 item.assets.pop(b, None)
-        # try:
+        
         update_item_properties(item, remove_item_props=remove_item_props)
         items.append(item)
-        # except RuntimeError as e:
-        #     logger.info(e)
-        #     logger.info(f'Item "{item.id}" is empty, skipping it.')
-        #     item_dir.rmtree_p()
     
     if not inplace:
         return x
-- 
GitLab