initial commit

2026-05-29 18:49:01 +08:00
commit 9f0321eff8
21 changed files with 3143 additions and 0 deletions
--- a/benchmark/config.py
+++ b/benchmark/config.py
@@ -0,0 +1,98 @@
+"""
+Benchmark configuration — evaluation-only scene splits and metric definitions.
+
+This config is independent from src.velocity_prediction.config so that
+evaluation scenarios can be changed without touching training config.
+"""
+
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import List, Dict
+
+
+# ──────────────────────────── Dataset root ────────────────────────────
+
+DATASET_ROOT = Path(__file__).resolve().parents[1] / "dataset"
+
+# ──────────────────────────── Scene splits ────────────────────────────
+
+# Each scene group has a name, a list of scene dirs, and a difficulty label.
+# The test scenes are the primary evaluation set; val scenes are for
+# checkpoint selection reference.
+
+
+@dataclass
+class SceneGroup:
+    name: str
+    scenes: List[str]
+    difficulty: str = "medium"  # easy / medium / hard
+
+
+# ── Validation scenes (for checkpoint selection reference) ──
+VAL_SCENE_GROUPS: List[SceneGroup] = [
+    SceneGroup("indoor_forward_7", ["indoor_forward_7"], "hard"),
+    SceneGroup("outdoor_forward_1", ["outdoor_forward_1"], "easy"),
+    # SceneGroup("indoor_forward_6", ["indoor_forward_6"], "medium"),
+    # SceneGroup("indoor_forward_9", ["indoor_forward_9"], "easy"),
+    # SceneGroup("indoor_forward_10", ["indoor_forward_10"], "easy"),
+    # SceneGroup("indoor_forward_5", ["indoor_forward_5"], "medium"),
+]
+
+# ── Test scenes (primary evaluation) ──
+TEST_SCENE_GROUPS: List[SceneGroup] = [
+    SceneGroup("indoor_forward_7", ["indoor_forward_7"], "hard"),
+    SceneGroup("outdoor_forward_1", ["outdoor_forward_1"], "easy"),
+    SceneGroup("outdoor_forward_5", ["outdoor_forward_5"], "hard"),
+    SceneGroup("indoor_forward_6", ["indoor_forward_6"], "medium"),
+    SceneGroup("indoor_forward_9", ["indoor_forward_9"], "easy"),
+    SceneGroup("indoor_forward_10", ["indoor_forward_10"], "easy"),
+    SceneGroup("indoor_forward_5", ["indoor_forward_5"], "medium"),
+]
+
+# Flat lists for convenience
+VAL_SCENES: List[str] = [s for g in VAL_SCENE_GROUPS for s in g.scenes]
+TEST_SCENES: List[str] = [s for g in TEST_SCENE_GROUPS for s in g.scenes]
+
+# Difficulty grouping
+DIFFICULTY_GROUPS: Dict[str, List[str]] = {}
+for g in TEST_SCENE_GROUPS:
+    DIFFICULTY_GROUPS.setdefault(g.difficulty, []).extend(g.scenes)
+
+
+# ──────────────────────────── Evaluation parameters ────────────────────────────
+
+
+@dataclass
+class EvalConfig:
+    """Parameters used when running evaluation."""
+
+    # Sequence length (must match what the model was trained with)
+    seq_len: int = 8
+
+    # Batch size for evaluation (can be larger than training)
+    batch_size: int = 64
+
+    # Data loading
+    num_workers: int = 2
+
+    # Event simulation (must match training config)
+    event_threshold: float = 0.1
+    event_use_log: bool = True
+
+    # Output directory (relative to benchmark/results/)
+    output_dir: str = "results"
+
+    # Whether to generate per-scene plots
+    save_plots: bool = True
+
+    # Device override (None = auto-detect)
+    device: str = "cuda"
+
+
+# ──────────────────────────── Metrics definition ────────────────────────────
+
+# Metrics computed per-axis and overall
+METRICS = ["rmse", "mae", "r2"]
+
+# Singleton
+eval_cfg = EvalConfig()