Files
uzh-fpv-sv-test/benchmark/config.py
2026-05-29 18:49:01 +08:00

99 lines
3.6 KiB
Python

"""
Benchmark configuration — evaluation-only scene splits and metric definitions.
This config is independent from src.velocity_prediction.config so that
evaluation scenarios can be changed without touching training config.
"""
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Dict
# ──────────────────────────── Dataset root ────────────────────────────
DATASET_ROOT = Path(__file__).resolve().parents[1] / "dataset"
# ──────────────────────────── Scene splits ────────────────────────────
# Each scene group has a name, a list of scene dirs, and a difficulty label.
# The test scenes are the primary evaluation set; val scenes are for
# checkpoint selection reference.
@dataclass
class SceneGroup:
name: str
scenes: List[str]
difficulty: str = "medium" # easy / medium / hard
# ── Validation scenes (for checkpoint selection reference) ──
VAL_SCENE_GROUPS: List[SceneGroup] = [
SceneGroup("indoor_forward_7", ["indoor_forward_7"], "hard"),
SceneGroup("outdoor_forward_1", ["outdoor_forward_1"], "easy"),
# SceneGroup("indoor_forward_6", ["indoor_forward_6"], "medium"),
# SceneGroup("indoor_forward_9", ["indoor_forward_9"], "easy"),
# SceneGroup("indoor_forward_10", ["indoor_forward_10"], "easy"),
# SceneGroup("indoor_forward_5", ["indoor_forward_5"], "medium"),
]
# ── Test scenes (primary evaluation) ──
TEST_SCENE_GROUPS: List[SceneGroup] = [
SceneGroup("indoor_forward_7", ["indoor_forward_7"], "hard"),
SceneGroup("outdoor_forward_1", ["outdoor_forward_1"], "easy"),
SceneGroup("outdoor_forward_5", ["outdoor_forward_5"], "hard"),
SceneGroup("indoor_forward_6", ["indoor_forward_6"], "medium"),
SceneGroup("indoor_forward_9", ["indoor_forward_9"], "easy"),
SceneGroup("indoor_forward_10", ["indoor_forward_10"], "easy"),
SceneGroup("indoor_forward_5", ["indoor_forward_5"], "medium"),
]
# Flat lists for convenience
VAL_SCENES: List[str] = [s for g in VAL_SCENE_GROUPS for s in g.scenes]
TEST_SCENES: List[str] = [s for g in TEST_SCENE_GROUPS for s in g.scenes]
# Difficulty grouping
DIFFICULTY_GROUPS: Dict[str, List[str]] = {}
for g in TEST_SCENE_GROUPS:
DIFFICULTY_GROUPS.setdefault(g.difficulty, []).extend(g.scenes)
# ──────────────────────────── Evaluation parameters ────────────────────────────
@dataclass
class EvalConfig:
"""Parameters used when running evaluation."""
# Sequence length (must match what the model was trained with)
seq_len: int = 8
# Batch size for evaluation (can be larger than training)
batch_size: int = 64
# Data loading
num_workers: int = 2
# Event simulation (must match training config)
event_threshold: float = 0.1
event_use_log: bool = True
# Output directory (relative to benchmark/results/)
output_dir: str = "results"
# Whether to generate per-scene plots
save_plots: bool = True
# Device override (None = auto-detect)
device: str = "cuda"
# ──────────────────────────── Metrics definition ────────────────────────────
# Metrics computed per-axis and overall
METRICS = ["rmse", "mae", "r2"]
# Singleton
eval_cfg = EvalConfig()