diff --git a/src/velocity_prediction/evaluate.py b/src/velocity_prediction/evaluate.py index d8b515d..a0d790f 100644 --- a/src/velocity_prediction/evaluate.py +++ b/src/velocity_prediction/evaluate.py @@ -170,42 +170,43 @@ def main(): model.to(device) print(f"Loaded checkpoint from {args.checkpoint} (epoch={ckpt.get('epoch', '?')})") - # Validation loader (use test scenes for final eval) + # Evaluate each scene independently → NaN gaps prevent plot mixing from src.velocity_prediction.config import TEST_SCENES - loader = create_val_loader( - scene_names=TEST_SCENES, - seq_len=train_cfg.seq_len, - batch_size=train_cfg.batch_size, - num_workers=2, - event_threshold=train_cfg.event_threshold, - event_use_log=train_cfg.event_use_log, - ) + all_preds, all_targets = [], [] + scene_rmses = [] - # # ── Quick event diagnostics: inspect one batch ─────────────── - # print("\n========== Event Frame Diagnostics ==========") - # sample_batch = next(iter(loader)) - # ev = sample_batch["events"] # (B, S, 1, H, W) - # print(f"Events shape: {ev.shape}") - # print(f"Events dtype: {ev.dtype}") - # print(f"Events value counts: -1: {(ev == -1).sum().item()}, " - # f"0: {(ev == 0).sum().item()}, +1: {(ev == 1).sum().item()}") - # total_el = ev.numel() - # nonzero = (ev != 0).sum().item() - # print(f"Non-zero ratio: {nonzero / total_el:.6f} ({nonzero}/{total_el})") - # print(f"Per-sample non-zero: {[(ev[b] != 0).sum().item() for b in range(min(4, ev.shape[0]))]}") - # print("=============================================\n") + for scene in TEST_SCENES: + loader = create_val_loader( + scene_names=[scene], + seq_len=train_cfg.seq_len, + batch_size=train_cfg.batch_size, + num_workers=2, + event_threshold=train_cfg.event_threshold, + event_use_log=train_cfg.event_use_log, + ) + results = evaluate(model, loader, device) + n = len(results["preds"]) + print(f" [{scene}] RMSE vx={results['rmse_x']:.4f} vy={results['rmse_y']:.4f} " + f"xy={results['rmse_xy']:.4f} samples={n}") + scene_rmses.append(results["rmse_xy"]) - # Evaluate - results = evaluate(model, loader, device) - print(f"\nEvaluation results on test scenes: {TEST_SCENES}") - print(f" RMSE vx: {results['rmse_x']:.4f} m/s") - print(f" RMSE vy: {results['rmse_y']:.4f} m/s") - print(f" RMSE xy: {results['rmse_xy']:.4f} m/s") + all_preds.append(results["preds"]) + all_targets.append(results["targets"]) + # NaN separator → plot won't connect discontinuous scenes + sep = np.full((1, 2), np.nan, dtype=np.float32) + all_preds.append(sep) + all_targets.append(sep) - # Plots + # Overall RMSE = mean across scenes (unweighted, avoids scene size bias) + rmse_xy = np.mean(scene_rmses) + print(f"\nOverall ({len(TEST_SCENES)} scenes, mean across scenes): RMSE xy={rmse_xy:.4f} m/s") + + # Plots (with NaN gaps between scenes) if args.plot: - plot_results(results["preds"], results["targets"], "eval_velocity.png") - plot_scatter(results["preds"], results["targets"], "eval_scatter.png") + preds_cat = np.concatenate(all_preds, axis=0) + targets_cat = np.concatenate(all_targets, axis=0) + plot_results(preds_cat, targets_cat, "eval_velocity.png") + plot_scatter(preds_cat, targets_cat, "eval_scatter.png") if __name__ == "__main__":