Add S3 storage support via s3fs; make storage field required

- New filesystem.py: make_fs() factory (returns s3fs.S3FileSystem or None), plus fsjoin/fsstem/fsname path helpers - config.py: storage field is now required ('local' or 's3'); load_config raises a clear ValueError when it is missing - video_loader, clip_selector, annotator: thread fs through all file I/O; local paths unchanged, S3 paths use fs.open/fs.exists/fs.pipe - annotation_script: load .env via python-dotenv at startup, create fs from config and pass to Annotator - Add .env.example with SwitchEngines endpoint and AWS checksum env vars - pyproject.toml: add s3fs and python-dotenv dependencies - Reduce default mask alpha from 40% to 15% - Update example clip names to colon-separated timestamps Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-20 16:15:38 +02:00
parent 8579bad2e2
commit dc59b8affb
15 changed files with 1539 additions and 106 deletions
--- a/src/river_annotation_tool/annotation_script.py
+++ b/src/river_annotation_tool/annotation_script.py
@@ -11,6 +11,7 @@ from PySide6.QtWidgets import QApplication, QMessageBox

 from .annotator import Annotator
 from .config import load_config
+from .filesystem import make_fs


 def parse_args():
@@ -40,6 +41,13 @@ def parse_args():


 if __name__ == "__main__":
+    try:
+        from dotenv import load_dotenv
+
+        load_dotenv()
+    except ImportError:
+        pass
+
    args = parse_args()

    cfg = load_config(Path(args.config))
@@ -50,6 +58,8 @@ if __name__ == "__main__":
    if args.clips:
        cfg.clips_file = args.clips

+    fs = make_fs(cfg.storage)
+
    app = QApplication([])
    try:
        win = Annotator(
@@ -57,6 +67,7 @@ if __name__ == "__main__":
            clip=args.clip,
            extras=args.extras,
            skip_annotated=not args.no_skip,
+            fs=fs,
        )
    except RuntimeError as e:
        QMessageBox.information(None, "No clips", str(e))
--- a/src/river_annotation_tool/annotator.py
+++ b/src/river_annotation_tool/annotator.py
@@ -1,3 +1,4 @@
+import io
 import json
 from pathlib import Path

@@ -22,6 +23,7 @@ from PySide6.QtWidgets import (
 from .clip_selector import ClipSelector
 from .compute_optical_flow import compute_optical_flow_mask
 from .config import AppConfig, load_optical_flow_config
+from .filesystem import fsjoin, fsname, fsstem
 from .mask_canvas import MaskCanvas
 from .video_loader import load_frames

@@ -33,11 +35,13 @@ class Annotator(QMainWindow):
        clip: str = None,
        extras: bool = False,
        skip_annotated: bool = True,
+        fs=None,
    ):
        super().__init__()

        self.cfg = config
-        self.out_dir = Path(config.out_dir)
+        self.fs = fs
+        self.out_dir = config.out_dir
        self.extras = extras
        self.of_cfg = (
            load_optical_flow_config(Path(config.optical_flow_config_file))
@@ -46,15 +50,16 @@ class Annotator(QMainWindow):
        )

        self.selector = ClipSelector(
-            data_dir=Path(config.data_dir),
+            data_dir=config.data_dir,
            out_dir=self.out_dir,
            clips_file=Path(config.clips_file),
            mask_filename=config.filenames.mask,
            zip_extension=config.filenames.zip_extension,
            skip_annotated=skip_annotated,
+            fs=fs,
        )

-        self.history: list[Path] = []
+        self.history: list[str] = []
        self.history_pos: int = -1

        self.setWindowTitle("River Annotator")
@@ -63,8 +68,53 @@ class Annotator(QMainWindow):
        self._init_ui()
        self._init_timer()

+    # ── filesystem helpers ─────────────────────────────────────────
+    def _out_path(self, *parts: str) -> str:
+        return fsjoin(self.out_dir, fsstem(self.filename), *parts)
+
+    def _fs_exists(self, path: str) -> bool:
+        if self.fs is None:
+            return Path(path).exists()
+        return self.fs.exists(path)
+
+    def _fs_makedirs(self, path: str):
+        if self.fs is None:
+            Path(path).mkdir(parents=True, exist_ok=True)
+        else:
+            self.fs.makedirs(path, exist_ok=True)
+
+    def _pil_open(self, path: str) -> Image.Image:
+        if self.fs is None:
+            return Image.open(path)
+        with self.fs.open(path, "rb") as f:
+            return Image.open(io.BytesIO(f.read()))
+
+    def _pil_save(self, img: Image.Image, path: str):
+        if self.fs is None:
+            img.save(path)
+        else:
+            ext = str(path).rsplit(".", 1)[-1].lower()
+            fmt = "JPEG" if ext in ("jpg", "jpeg") else ext.upper()
+            buf = io.BytesIO()
+            img.save(buf, format=fmt)
+            self.fs.pipe(path, buf.getvalue())
+
+    def _json_read(self, path: str):
+        if self.fs is None:
+            with open(path) as f:
+                return json.load(f)
+        with self.fs.open(path, "r") as f:
+            return json.load(f)
+
+    def _json_write(self, data, path: str):
+        if self.fs is None:
+            with open(path, "w") as f:
+                json.dump(data, f, indent=2)
+        else:
+            self.fs.pipe(path, json.dumps(data, indent=2).encode())
+
    # ── clip loading ───────────────────────────────────────────────
-    def _load_clip(self, specific: str = None, path: Path = None):
+    def _load_clip(self, specific: str = None, path: str = None):
        if path is not None:
            self.filename = path
        else:
@@ -76,6 +126,7 @@ class Annotator(QMainWindow):
            self.cfg.fps_fallback,
            self.cfg.filenames.video_in_zip,
            self.cfg.filenames.video_tmp_suffix,
+            fs=self.fs,
        )
        self._pending_answers = self._read_saved_answers()

@@ -85,10 +136,10 @@ class Annotator(QMainWindow):
        self.history_pos = len(self.history) - 1

    def _read_saved_mask(self):
-        mask_path = self.out_dir / self.filename.stem / self.cfg.filenames.mask
-        if not mask_path.exists():
+        mask_path = self._out_path(self.cfg.filenames.mask)
+        if not self._fs_exists(mask_path):
            return None
-        mask_full = np.array(Image.open(mask_path).convert("L"))
+        mask_full = np.array(self._pil_open(mask_path).convert("L"))
        return cv2.resize(
            (mask_full > 127).astype(np.uint8),
            (self.dw, self.dh),
@@ -96,16 +147,15 @@ class Annotator(QMainWindow):
        )

    def _read_saved_answers(self):
-        meta_path = self.out_dir / self.filename.stem / self.cfg.filenames.metadata
-        if not meta_path.exists():
+        meta_path = self._out_path(self.cfg.filenames.metadata)
+        if not self._fs_exists(meta_path):
            return None
-        with open(meta_path) as f:
-            return json.load(f)
+        return self._json_read(meta_path)

    # ── UI setup ───────────────────────────────────────────────────
    def _init_ui(self):
        self.mc = MaskCanvas(self.frames, self.dh, self.dw)
-        self.mc.set_title(self.filename.name)
+        self.mc.set_title(fsname(self.filename))
        self.mc.reset(self._read_saved_mask())

        self.q_widgets = {}
@@ -279,22 +329,34 @@ class Annotator(QMainWindow):
        overlay[m] = (1 - alpha) * overlay[m] + alpha * green[m]
        return overlay.astype(np.uint8)

-    def _save_gif(self, frames, out_path, scale=1.0):
+    def _save_gif(self, frames, out_path: str, scale=1.0):
        h, w = frames[0].shape[:2]
        nh, nw = max(1, int(h * scale)), max(1, int(w * scale))
        pil_frames = [Image.fromarray(cv2.resize(f, (nw, nh))) for f in frames]
-        pil_frames[0].save(
-            out_path,
-            save_all=True,
-            append_images=pil_frames[1:],
-            duration=int(1000 / self.fps),
-            loop=0,
-        )
+        if self.fs is None:
+            pil_frames[0].save(
+                out_path,
+                save_all=True,
+                append_images=pil_frames[1:],
+                duration=int(1000 / self.fps),
+                loop=0,
+            )
+        else:
+            buf = io.BytesIO()
+            pil_frames[0].save(
+                buf,
+                format="GIF",
+                save_all=True,
+                append_images=pil_frames[1:],
+                duration=int(1000 / self.fps),
+                loop=0,
+            )
+            self.fs.pipe(out_path, buf.getvalue())

    # ── actions ────────────────────────────────────────────────────
    def save(self):
-        out = self.out_dir / self.filename.stem
-        out.mkdir(parents=True, exist_ok=True)
+        out = fsjoin(self.out_dir, fsstem(self.filename))
+        self._fs_makedirs(out)

        mask_full = cv2.resize(
            self.mc.mask.astype(np.uint8),
@@ -302,25 +364,28 @@ class Annotator(QMainWindow):
            interpolation=cv2.INTER_NEAREST,
        )
        fn = self.cfg.filenames
-        Image.fromarray(mask_full * 255).save(out / fn.mask)
-
-        with open(out / fn.metadata, "w") as f:
-            json.dump(self.get_answers(), f, indent=2)
+        self._pil_save(Image.fromarray(mask_full * 255), fsjoin(out, fn.mask))
+        self._json_write(self.get_answers(), fsjoin(out, fn.metadata))

        mid = len(self.frames) // 2
        frame = self.frames[mid]
-        Image.fromarray(frame).save(out / fn.frame)
-        Image.fromarray(self._make_overlay(frame)).save(out / fn.overlay)
+        self._pil_save(Image.fromarray(frame), fsjoin(out, fn.frame))
+        self._pil_save(
+            Image.fromarray(self._make_overlay(frame)), fsjoin(out, fn.overlay)
+        )

        if self.extras:
-            Image.fromarray((self.mc.mask * 255).astype(np.uint8)).save(
-                out / fn.mask_vis
+            self._pil_save(
+                Image.fromarray((self.mc.mask * 255).astype(np.uint8)),
+                fsjoin(out, fn.mask_vis),
            )
            overlay_frames = [self._make_overlay(f) for f in self.frames]
-            self._save_gif(self.frames, out / fn.gif_original_hires, scale=1.0)
-            self._save_gif(self.frames, out / fn.gif_original_lowres, scale=0.5)
-            self._save_gif(overlay_frames, out / fn.gif_overlay_hires, scale=1.0)
-            self._save_gif(overlay_frames, out / fn.gif_overlay_lowres, scale=0.5)
+            self._save_gif(self.frames, fsjoin(out, fn.gif_original_hires), scale=1.0)
+            self._save_gif(self.frames, fsjoin(out, fn.gif_original_lowres), scale=0.5)
+            self._save_gif(overlay_frames, fsjoin(out, fn.gif_overlay_hires), scale=1.0)
+            self._save_gif(
+                overlay_frames, fsjoin(out, fn.gif_overlay_lowres), scale=0.5
+            )

        print("Saved:", out)

@@ -331,7 +396,7 @@ class Annotator(QMainWindow):
            self.dh,
            self.dw,
            mask=self._read_saved_mask(),
-            title=self.filename.name,
+            title=fsname(self.filename),
        )
        if self._pending_answers:
            self._set_answers(self._pending_answers)
@@ -366,12 +431,12 @@ class Annotator(QMainWindow):
        self._switch_ui_to_clip()

    def next_clip(self):
-        mask_path = self.out_dir / self.filename.stem / self.cfg.filenames.mask
-        if mask_path.exists():
+        mask_path = self._out_path(self.cfg.filenames.mask)
+        if self._fs_exists(mask_path):
            msg = QMessageBox(self)
            msg.setWindowTitle("Existing annotation found")
            msg.setText(
-                f"'{self.filename.stem}' already has a saved annotation.\n"
+                f"'{fsstem(self.filename)}' already has a saved annotation.\n"
                "Replace it with your current work, or keep the existing save?"
            )
            btn_replace = msg.addButton(
@@ -408,13 +473,15 @@ class Annotator(QMainWindow):
            )
            return
        prev_clip = self.selector.clips[idx - 1]
-        mask_path = self.out_dir / prev_clip.stem / self.cfg.filenames.mask
-        if not mask_path.exists():
+        mask_path = fsjoin(self.out_dir, fsstem(prev_clip), self.cfg.filenames.mask)
+        if not self._fs_exists(mask_path):
            QMessageBox.information(
-                self, "No mask found", f"No saved mask found for '{prev_clip.stem}'."
+                self,
+                "No mask found",
+                f"No saved mask found for '{fsstem(prev_clip)}'.",
            )
            return
-        mask_full = np.array(Image.open(mask_path).convert("L"))
+        mask_full = np.array(self._pil_open(mask_path).convert("L"))
        mask = cv2.resize(
            (mask_full > 127).astype(np.uint8),
            (self.dw, self.dh),
--- a/src/river_annotation_tool/clip_selector.py
+++ b/src/river_annotation_tool/clip_selector.py
@@ -1,50 +1,72 @@
 from pathlib import Path

+from .filesystem import fsjoin, fsstem
+

 class ClipSelector:
    def __init__(
        self,
-        data_dir: Path,
-        out_dir: Path,
+        data_dir,
+        out_dir,
        clips_file: Path,
        mask_filename: str = "mask.png",
        zip_extension: str = ".zip",
        skip_annotated: bool = True,
+        fs=None,
    ):
-        self.data_dir = data_dir
-        self.out_dir = out_dir
+        self.data_dir = str(data_dir)
+        self.out_dir = str(out_dir)
        self.mask_filename = mask_filename
        self.zip_extension = zip_extension
        self.skip_annotated = skip_annotated
+        self.fs = fs
        self.clips = self._load_clips(clips_file)
        self.index = 0

-    def _load_clips(self, clips_file: Path) -> list[Path]:
+    def _load_clips(self, clips_file: Path) -> list:
        lines = clips_file.read_text().splitlines()
        return [
-            self.data_dir / name.strip()
+            fsjoin(self.data_dir, name.strip())
            for name in lines
            if name.strip() and not name.strip().startswith("#")
        ]

-    def is_annotated(self, path: Path) -> bool:
-        return (self.out_dir / path.stem / self.mask_filename).exists()
+    def is_annotated(self, path) -> bool:
+        mask_path = fsjoin(self.out_dir, fsstem(path), self.mask_filename)
+        if self.fs is None:
+            return Path(mask_path).exists()
+        return self.fs.exists(mask_path)

-    def next(self, specific: str = None) -> Path:
+    def next(self, specific: str = None) -> str:
        if specific:
            return self._resolve_specific(specific)
        return self._pick_next()

-    def _resolve_specific(self, specific: str) -> Path:
-        matches = list(self.data_dir.glob(f"{specific}{self.zip_extension}"))
-        if not matches:
-            p = self.data_dir / specific
-            matches = [p] if p.exists() else []
-        if not matches:
-            raise FileNotFoundError(f"Clip '{specific}' not found in {self.data_dir}")
-        return matches[0]
+    def _resolve_specific(self, specific: str) -> str:
+        if self.fs is None:
+            data_dir = Path(self.data_dir)
+            matches = list(data_dir.glob(f"{specific}{self.zip_extension}"))
+            if not matches:
+                p = data_dir / specific
+                matches = [p] if p.exists() else []
+            if not matches:
+                raise FileNotFoundError(
+                    f"Clip '{specific}' not found in {self.data_dir}"
+                )
+            return str(matches[0])
+        else:
+            pattern = fsjoin(self.data_dir, f"{specific}{self.zip_extension}")
+            matches = self.fs.glob(pattern)
+            if not matches:
+                p = fsjoin(self.data_dir, specific)
+                matches = [p] if self.fs.exists(p) else []
+            if not matches:
+                raise FileNotFoundError(
+                    f"Clip '{specific}' not found in {self.data_dir}"
+                )
+            return matches[0]

-    def _pick_next(self) -> Path:
+    def _pick_next(self) -> str:
        while self.index < len(self.clips):
            clip = self.clips[self.index]
            self.index += 1
--- a/src/river_annotation_tool/config.py
+++ b/src/river_annotation_tool/config.py
@@ -22,6 +22,7 @@ class FilenameConfig:

@dataclass
 class AppConfig:
+    storage: str  # required: 'local' or 's3'
    display_max: int = 480
    fps_fallback: int = 25
    max_frames: int = 100
@@ -71,6 +72,10 @@ def load_optical_flow_config(path: Path) -> OpticalFlowConfig:
 def load_config(path: Path) -> AppConfig:
    with open(path) as f:
        data = yaml.safe_load(f)
+    if "storage" not in data:
+        raise ValueError(
+            f"{path}: missing required field 'storage'. Set it to 'local' or 's3'."
+        )
    fn_data = data.pop("filenames", {})
    cfg = AppConfig(**data)
    cfg.filenames = FilenameConfig(**fn_data)
--- a/src/river_annotation_tool/filesystem.py
+++ b/src/river_annotation_tool/filesystem.py
@@ -0,0 +1,35 @@
+import os
+
+
+_DEFAULT_ENDPOINT = "https://os.zhdk.cloud.switch.ch"
+
+
+def make_fs(storage: str):
+    """Return an S3FileSystem for storage='s3', or None for local."""
+    if storage != "s3":
+        return None
+    import s3fs
+
+    return s3fs.S3FileSystem(
+        key=os.environ["S3_ACCESS_KEY"],
+        secret=os.environ["S3_SECRET_ACCESS_KEY"],
+        client_kwargs={
+            "endpoint_url": os.environ.get("S3_ENDPOINT_URL", _DEFAULT_ENDPOINT)
+        },
+    )
+
+
+def fsjoin(base, *parts: str) -> str:
+    """Join path segments with forward slashes (works for both local and S3)."""
+    return "/".join([str(base).rstrip("/"), *[str(p).strip("/") for p in parts if p]])
+
+
+def fsstem(path) -> str:
+    """Filename stem (no extension) for local Path or S3 string."""
+    name = str(path).replace("\\", "/").split("/")[-1]
+    return name.rsplit(".", 1)[0] if "." in name else name
+
+
+def fsname(path) -> str:
+    """Filename component (with extension) for local Path or S3 string."""
+    return str(path).replace("\\", "/").split("/")[-1]
--- a/src/river_annotation_tool/mask_canvas.py
+++ b/src/river_annotation_tool/mask_canvas.py
@@ -11,7 +11,7 @@ class MaskCanvas:
    """Matplotlib canvas with brush/polygon mask drawing, undo/redo, and erase."""

    _BRUSH_DEFAULT = 5
-    _ALPHA_DEFAULT = 40
+    _ALPHA_DEFAULT = 15
    _BRIGHTNESS_DEFAULT = 0
    _CONTRAST_DEFAULT = 0
    _GAMMA_DEFAULT = 100
--- a/src/river_annotation_tool/video_loader.py
+++ b/src/river_annotation_tool/video_loader.py
@@ -1,20 +1,25 @@
+import io
 import os
 import tempfile
 import zipfile
-from pathlib import Path

 import cv2


 def load_frames(
-    zip_path: Path,
+    zip_path,
    max_frames: int,
    display_max: int,
    fps_fallback: int,
    video_in_zip: str = "left.mp4",
    video_tmp_suffix: str = ".mp4",
+    fs=None,
 ):
-    video_bytes = zipfile.ZipFile(zip_path).read(video_in_zip)
+    if fs is None:
+        video_bytes = zipfile.ZipFile(zip_path).read(video_in_zip)
+    else:
+        with fs.open(str(zip_path), "rb") as f:
+            video_bytes = zipfile.ZipFile(io.BytesIO(f.read())).read(video_in_zip)

    with tempfile.NamedTemporaryFile(suffix=video_tmp_suffix, delete=False) as f:
        f.write(video_bytes)