99 lines
3.6 KiB
Python
99 lines
3.6 KiB
Python
"""
|
|
Benchmark configuration — evaluation-only scene splits and metric definitions.
|
|
|
|
This config is independent from src.velocity_prediction.config so that
|
|
evaluation scenarios can be changed without touching training config.
|
|
"""
|
|
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import List, Dict
|
|
|
|
|
|
# ──────────────────────────── Dataset root ────────────────────────────
|
|
|
|
DATASET_ROOT = Path(__file__).resolve().parents[1] / "dataset"
|
|
|
|
# ──────────────────────────── Scene splits ────────────────────────────
|
|
|
|
# Each scene group has a name, a list of scene dirs, and a difficulty label.
|
|
# The test scenes are the primary evaluation set; val scenes are for
|
|
# checkpoint selection reference.
|
|
|
|
|
|
@dataclass
|
|
class SceneGroup:
|
|
name: str
|
|
scenes: List[str]
|
|
difficulty: str = "medium" # easy / medium / hard
|
|
|
|
|
|
# ── Validation scenes (for checkpoint selection reference) ──
|
|
VAL_SCENE_GROUPS: List[SceneGroup] = [
|
|
SceneGroup("indoor_forward_7", ["indoor_forward_7"], "hard"),
|
|
SceneGroup("outdoor_forward_1", ["outdoor_forward_1"], "easy"),
|
|
# SceneGroup("indoor_forward_6", ["indoor_forward_6"], "medium"),
|
|
# SceneGroup("indoor_forward_9", ["indoor_forward_9"], "easy"),
|
|
# SceneGroup("indoor_forward_10", ["indoor_forward_10"], "easy"),
|
|
# SceneGroup("indoor_forward_5", ["indoor_forward_5"], "medium"),
|
|
]
|
|
|
|
# ── Test scenes (primary evaluation) ──
|
|
TEST_SCENE_GROUPS: List[SceneGroup] = [
|
|
SceneGroup("indoor_forward_7", ["indoor_forward_7"], "hard"),
|
|
SceneGroup("outdoor_forward_1", ["outdoor_forward_1"], "easy"),
|
|
SceneGroup("outdoor_forward_5", ["outdoor_forward_5"], "hard"),
|
|
SceneGroup("indoor_forward_6", ["indoor_forward_6"], "medium"),
|
|
SceneGroup("indoor_forward_9", ["indoor_forward_9"], "easy"),
|
|
SceneGroup("indoor_forward_10", ["indoor_forward_10"], "easy"),
|
|
SceneGroup("indoor_forward_5", ["indoor_forward_5"], "medium"),
|
|
]
|
|
|
|
# Flat lists for convenience
|
|
VAL_SCENES: List[str] = [s for g in VAL_SCENE_GROUPS for s in g.scenes]
|
|
TEST_SCENES: List[str] = [s for g in TEST_SCENE_GROUPS for s in g.scenes]
|
|
|
|
# Difficulty grouping
|
|
DIFFICULTY_GROUPS: Dict[str, List[str]] = {}
|
|
for g in TEST_SCENE_GROUPS:
|
|
DIFFICULTY_GROUPS.setdefault(g.difficulty, []).extend(g.scenes)
|
|
|
|
|
|
# ──────────────────────────── Evaluation parameters ────────────────────────────
|
|
|
|
|
|
@dataclass
|
|
class EvalConfig:
|
|
"""Parameters used when running evaluation."""
|
|
|
|
# Sequence length (must match what the model was trained with)
|
|
seq_len: int = 8
|
|
|
|
# Batch size for evaluation (can be larger than training)
|
|
batch_size: int = 64
|
|
|
|
# Data loading
|
|
num_workers: int = 2
|
|
|
|
# Event simulation (must match training config)
|
|
event_threshold: float = 0.1
|
|
event_use_log: bool = True
|
|
|
|
# Output directory (relative to benchmark/results/)
|
|
output_dir: str = "results"
|
|
|
|
# Whether to generate per-scene plots
|
|
save_plots: bool = True
|
|
|
|
# Device override (None = auto-detect)
|
|
device: str = "cuda"
|
|
|
|
|
|
# ──────────────────────────── Metrics definition ────────────────────────────
|
|
|
|
# Metrics computed per-axis and overall
|
|
METRICS = ["rmse", "mae", "r2"]
|
|
|
|
# Singleton
|
|
eval_cfg = EvalConfig()
|