fix: evaluate each scene independently to avoid plot mixing

Multi-scene evaluation previously concatenated all scenes into one continuous trace, causing scene boundary jumps to appear as glitches in plots. Now evaluates each scene separately and inserts NaN separators between scenes when concatenating for plotting. Generated by Mistral Vibe (deepseek-v4-flash). Co-Authored-By: Mistral Vibe <vibe@mistral.ai>
2026-06-05 16:47:42 +08:00
parent cb9936542e
commit e7e773a48f
1 changed files with 32 additions and 31 deletions
@@ -170,42 +170,43 @@ def main():
    model.to(device)
    print(f"Loaded checkpoint from {args.checkpoint} (epoch={ckpt.get('epoch', '?')})")

-    # Validation loader (use test scenes for final eval)
+    # Evaluate each scene independently → NaN gaps prevent plot mixing
    from src.velocity_prediction.config import TEST_SCENES
-    loader = create_val_loader(
-        scene_names=TEST_SCENES,
-        seq_len=train_cfg.seq_len,
-        batch_size=train_cfg.batch_size,
-        num_workers=2,
-        event_threshold=train_cfg.event_threshold,
-        event_use_log=train_cfg.event_use_log,
-    )
+    all_preds, all_targets = [], []
+    scene_rmses = []

-    # # ── Quick event diagnostics: inspect one batch ───────────────
-    # print("\n========== Event Frame Diagnostics ==========")
-    # sample_batch = next(iter(loader))
-    # ev = sample_batch["events"]  # (B, S, 1, H, W)
-    # print(f"Events shape: {ev.shape}")
-    # print(f"Events dtype: {ev.dtype}")
-    # print(f"Events value counts:  -1: {(ev == -1).sum().item()}, "
-    #       f"0: {(ev == 0).sum().item()}, +1: {(ev == 1).sum().item()}")
-    # total_el = ev.numel()
-    # nonzero = (ev != 0).sum().item()
-    # print(f"Non-zero ratio: {nonzero / total_el:.6f} ({nonzero}/{total_el})")
-    # print(f"Per-sample non-zero: {[(ev[b] != 0).sum().item() for b in range(min(4, ev.shape[0]))]}")
-    # print("=============================================\n")
+    for scene in TEST_SCENES:
+        loader = create_val_loader(
+            scene_names=[scene],
+            seq_len=train_cfg.seq_len,
+            batch_size=train_cfg.batch_size,
+            num_workers=2,
+            event_threshold=train_cfg.event_threshold,
+            event_use_log=train_cfg.event_use_log,
+        )
+        results = evaluate(model, loader, device)
+        n = len(results["preds"])
+        print(f"  [{scene}] RMSE vx={results['rmse_x']:.4f}  vy={results['rmse_y']:.4f}  "
+              f"xy={results['rmse_xy']:.4f}  samples={n}")
+        scene_rmses.append(results["rmse_xy"])

-    # Evaluate
-    results = evaluate(model, loader, device)
-    print(f"\nEvaluation results on test scenes: {TEST_SCENES}")
-    print(f"  RMSE vx: {results['rmse_x']:.4f} m/s")
-    print(f"  RMSE vy: {results['rmse_y']:.4f} m/s")
-    print(f"  RMSE xy: {results['rmse_xy']:.4f} m/s")
+        all_preds.append(results["preds"])
+        all_targets.append(results["targets"])
+        # NaN separator → plot won't connect discontinuous scenes
+        sep = np.full((1, 2), np.nan, dtype=np.float32)
+        all_preds.append(sep)
+        all_targets.append(sep)

-    # Plots
+    # Overall RMSE = mean across scenes (unweighted, avoids scene size bias)
+    rmse_xy = np.mean(scene_rmses)
+    print(f"\nOverall ({len(TEST_SCENES)} scenes, mean across scenes): RMSE xy={rmse_xy:.4f} m/s")
+
+    # Plots (with NaN gaps between scenes)
    if args.plot:
-        plot_results(results["preds"], results["targets"], "eval_velocity.png")
-        plot_scatter(results["preds"], results["targets"], "eval_scatter.png")
+        preds_cat = np.concatenate(all_preds, axis=0)
+        targets_cat = np.concatenate(all_targets, axis=0)
+        plot_results(preds_cat, targets_cat, "eval_velocity.png")
+        plot_scatter(preds_cat, targets_cat, "eval_scatter.png")


 if __name__ == "__main__":