diff --git a/README.md b/README.md index 6386027..266daf2 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,7 @@ max_frames: 100 # max frames to extract per clip data_dir: data/clips # directory containing ZIP archives out_dir: data/annotation_results clips_file: config/clips.txt +# optical_flow_config_file: config/optical_flow_config.yaml # optional, enables Auto Segment questions: - section: River @@ -108,6 +109,23 @@ questions: Add, remove, or reorder questions directly in the YAML — the UI rebuilds automatically. `key` is what gets saved in `metadata.json`; `default` selects the pre-checked option (omit or set to `null` to leave unselected). +### Optical flow segmentation (optional) + +Set `optical_flow_config_file` in `config.yaml` to point to a YAML file that enables the **Auto Segment** button. When pressed, the tool computes a river mask from the loaded frames and replaces the current mask (undoable). The segmentation combines two criteria: + +- **Optical flow magnitude** — pixels where the temporal median of frame-to-frame flow (scaled by FPS) exceeds a fraction of the maximum are considered moving water. +- **Brightness** — pixels outside a brightness window are excluded (removes sky, saturated glare, etc.). + +```yaml +# config/optical_flow_config.yaml +enabled: true +norm_squared_threshold: 0.06 # fraction of max flow² that counts as moving +gaussian_kernel: [5, 5] # blur kernel applied to the reference frame before brightness check +brightness_range: [2, 253] # [min, max] greyscale brightness to keep +``` + +`enabled: false` disables the button without removing the config file. + ## Clip list file `config/clips.txt` lists the clip filenames to annotate, one per line. Lines starting with `#` are ignored. Clips are processed in order; already-annotated clips (those with an existing `mask.png`) are skipped automatically. Pass `--no-skip` to include them. When the last clip is reached, a dialog appears and the app exits. @@ -176,6 +194,7 @@ Polygons are drawn as overlays and do not affect the mask until you use **Fill** | Toggle mask overlay | **Hide Mask / Show Mask** — button turns red when hidden; does not affect mask data | | Mask transparency | **Mask Alpha** slider; click **↺** to reset | | Load mask from previous clip | **Load Prev Mask** — copies the saved mask of the previous clip; undoable with **Undo** | +| Optical flow first guess | **Auto Segment** — runs automatic river segmentation and replaces the current mask; undoable with **Undo**. Only enabled when `optical_flow_config_file` is set in `config.yaml`. | ### Image display adjustments @@ -263,18 +282,20 @@ When a clip is loaded that already has a saved `mask.png` and `metadata.json`, t ``` config/ - config.yaml # Your local config (git-ignored, copy from example) - config.example.yaml # Example config to copy and edit - clips.txt # Your clip list (git-ignored, copy from example) - clips.example.txt # Example clip list + config.yaml # Your local config (git-ignored, copy from example) + config.example.yaml # Example config to copy and edit + clips.txt # Your clip list (git-ignored, copy from example) + clips.example.txt # Example clip list + optical_flow_config.yaml # Optional optical flow parameters (enable via config.yaml) src/river_annotation_tool/ - annotation_script.py # Entry point — argument parsing and app launch - annotator.py # Main QMainWindow — orchestrates all components - clip_selector.py # Reads the clip list and picks the next clip - mask_canvas.py # Drawing widget — brush, undo, erase, mouse events - video_loader.py # ZIP extraction and frame resizing - config.py # AppConfig dataclass and YAML loader - __init__.py # Package version + annotation_script.py # Entry point — argument parsing and app launch + annotator.py # Main QMainWindow — orchestrates all components + clip_selector.py # Reads the clip list and picks the next clip + mask_canvas.py # Drawing widget — brush, undo, erase, mouse events + video_loader.py # ZIP extraction and frame resizing + compute_optical_flow.py # Optical flow river segmentation (Auto Segment button) + config.py # AppConfig dataclass and YAML loader + __init__.py # Package version pyproject.toml # Project metadata and dependencies ``` diff --git a/config/config.example.yaml b/config/config.example.yaml index 18c3c4c..28abc59 100644 --- a/config/config.example.yaml +++ b/config/config.example.yaml @@ -19,6 +19,7 @@ max_frames: 100 data_dir: data/filtered_data out_dir: data/annotation_results clips_file: config/clips.txt +# optical_flow_config_file: config/optical_flow_config.yaml questions: - section: River diff --git a/config/optical_flow_config.yaml b/config/optical_flow_config.yaml new file mode 100644 index 0000000..3049af6 --- /dev/null +++ b/config/optical_flow_config.yaml @@ -0,0 +1,4 @@ +enabled: true +norm_squared_threshold: 0.06 +gaussian_kernel: [5, 5] +brightness_range: [2, 253] diff --git a/src/river_annotation_tool/annotator.py b/src/river_annotation_tool/annotator.py index f8c3753..8c01bfd 100644 --- a/src/river_annotation_tool/annotator.py +++ b/src/river_annotation_tool/annotator.py @@ -20,7 +20,8 @@ from PySide6.QtWidgets import ( ) from .clip_selector import ClipSelector -from .config import AppConfig +from .compute_optical_flow import compute_optical_flow_mask +from .config import AppConfig, load_optical_flow_config from .mask_canvas import MaskCanvas from .video_loader import load_frames @@ -38,6 +39,11 @@ class Annotator(QMainWindow): self.cfg = config self.out_dir = Path(config.out_dir) self.extras = extras + self.of_cfg = ( + load_optical_flow_config(Path(config.optical_flow_config_file)) + if config.optical_flow_config_file + else None + ) self.selector = ClipSelector( data_dir=Path(config.data_dir), @@ -114,9 +120,17 @@ class Annotator(QMainWindow): btn_undo10 = QPushButton("Undo×10") btn_redo = QPushButton("Redo") btn_load_prev_mask = QPushButton("Load Prev Mask") + btn_auto_segment = QPushButton("Auto Segment") + btn_auto_segment.setEnabled(self.of_cfg is not None and self.of_cfg.enabled) row1 = QHBoxLayout() - for b in [self.btn_prev, btn_next, btn_skip, btn_load_prev_mask]: + for b in [ + self.btn_prev, + btn_next, + btn_skip, + btn_load_prev_mask, + btn_auto_segment, + ]: row1.addWidget(b) row_tools = QHBoxLayout() @@ -198,6 +212,7 @@ class Annotator(QMainWindow): btn_undo10.clicked.connect(self.mc.undo10) btn_redo.clicked.connect(self.mc.redo) btn_load_prev_mask.clicked.connect(self.load_prev_mask) + btn_auto_segment.clicked.connect(self.run_optical_flow) if self._pending_answers: self._set_answers(self._pending_answers) @@ -406,3 +421,13 @@ class Annotator(QMainWindow): interpolation=cv2.INTER_NEAREST, ) self.mc.set_mask(mask) + + def run_optical_flow(self): + mask = compute_optical_flow_mask( + self.frames, + self.fps, + self.of_cfg.norm_squared_threshold, + self.of_cfg.gaussian_kernel, + self.of_cfg.brightness_range, + ) + self.mc.set_mask(mask) diff --git a/src/river_annotation_tool/compute_optical_flow.py b/src/river_annotation_tool/compute_optical_flow.py new file mode 100644 index 0000000..308fe7f --- /dev/null +++ b/src/river_annotation_tool/compute_optical_flow.py @@ -0,0 +1,49 @@ +import cv2 +import numpy as np + + +def compute_optical_flow_mask( + frames: list[np.ndarray], + fps: float, + norm_squared_threshold: float, + gaussian_kernel: tuple[int, int], + brightness_range: tuple[int, int], +) -> np.ndarray: + """Return a binary mask (uint8, values 0/1) from optical flow + brightness.""" + if len(frames) < 2: + return np.zeros(frames[0].shape[:2], dtype=np.uint8) + + frames_arr = np.stack(frames).astype(np.float64) + frames_sub_mean = frames_arr - np.mean(frames_arr, axis=0) + mn, mx = frames_sub_mean.min(), frames_sub_mean.max() + if mx > mn: + standardized = ((frames_sub_mean - mn) / (mx - mn) * 255).astype(np.uint8) + else: + standardized = np.zeros_like(frames_arr, dtype=np.uint8) + + N = len(standardized) + gray = np.stack([cv2.cvtColor(f, cv2.COLOR_RGB2GRAY) for f in standardized]) + + flow_data = np.zeros((N - 1,) + gray.shape[1:] + (2,)) + for i in range(N - 1): + flow_data[i] = fps * cv2.optflow.calcOpticalFlowSparseToDense( + gray[i], gray[i + 1] + ) + + optical_flow = np.median(flow_data, axis=0) + + flow_norm_sq = np.sum(optical_flow**2, axis=-1) + max_norm = np.max(flow_norm_sq) + if max_norm > 0: + flow_mask = flow_norm_sq >= max_norm * norm_squared_threshold**2 + else: + flow_mask = np.zeros(flow_norm_sq.shape, dtype=bool) + + reference_frame = frames[len(frames) // 2] + smoothed = cv2.GaussianBlur(reference_frame, gaussian_kernel, 0) + gray_ref = cv2.cvtColor(smoothed, cv2.COLOR_RGB2GRAY) + brightness_mask = (gray_ref > brightness_range[0]) & ( + gray_ref < brightness_range[1] + ) + + return np.logical_and(brightness_mask, flow_mask).astype(np.uint8) diff --git a/src/river_annotation_tool/config.py b/src/river_annotation_tool/config.py index 46f1c59..80a3a64 100644 --- a/src/river_annotation_tool/config.py +++ b/src/river_annotation_tool/config.py @@ -28,6 +28,7 @@ class AppConfig: data_dir: str = "data/clips" out_dir: str = "data/annotation_results" clips_file: str = "config/clips.txt" + optical_flow_config_file: str = "" questions: list = field(default_factory=list) filenames: FilenameConfig = field(default_factory=FilenameConfig) @@ -51,6 +52,22 @@ class AppConfig: ] +@dataclass +class OpticalFlowConfig: + enabled: bool = False + norm_squared_threshold: float = 0.3 + gaussian_kernel: tuple[int, int] = (5, 5) + brightness_range: tuple[int, int] = (20, 235) + + +def load_optical_flow_config(path: Path) -> OpticalFlowConfig: + with open(path) as f: + data = yaml.safe_load(f) + data["gaussian_kernel"] = tuple(data["gaussian_kernel"]) + data["brightness_range"] = tuple(data["brightness_range"]) + return OpticalFlowConfig(**data) + + def load_config(path: Path) -> AppConfig: with open(path) as f: data = yaml.safe_load(f)