refactor: replace rotation vector with body up vector for tilt input

- Replace body_attitude() with body_up_vector(): rotate world-up [0,0,1]
  by corrected world→body quaternion to get body up vector (pitch/roll only,
  no yaw). Matches DiffPhysDrone's env.R[:, 2] approach.
- Update ComputeTilt transform to use body_up_vector_np
- Update visualize_dataset.py to display Euler angles and body up vector
- Update model.py comments and disable CNN (zero output)
- Sync AGENTS.md with new architecture description

Generated by Mistral Vibe (ds-v4-flash).
Co-Authored-By: Mistral Vibe <vibe@mistral.ai>
This commit is contained in:
2026-06-04 21:02:08 +08:00
parent 8e1a98207e
commit 0a504d648e
6 changed files with 218 additions and 36 deletions

View File

@@ -1,9 +1,9 @@
"""
VelocityPredictionModel: CNN + PoseMLP → concat → GRU → Head → [vx_body, vy_body].
VelocityPredictionModel: CNN + PoseMLP → concat → GRU → Head → [v_right, v_forward].
Architecture:
Event frame (1, H, W) ──► CNN ──┐
Tilt angles (3,) ──► MLP ──┤──► concat ──► GRU ──► Head ──► [vx, vy]
Tilt angles (3,) ──► MLP ──┤──► concat ──► GRU ──► Head ──► [v_right, v_forward]
"""
import torch
@@ -92,7 +92,7 @@ class VelocityPredictionModel(nn.Module):
events: (B, S, 1, H, W)
tilt: (B, S, 3)
Output:
v_body: (B, 2) — body-frame [vx, vy] for the last frame in the sequence
v_body: (B, 2) — body-frame [v_forward, v_lateral] for the last frame in the sequence
"""
def __init__(self, cnn_cfg=model_cfg.cnn, pose_cfg=model_cfg.pose_mlp,
@@ -129,10 +129,13 @@ class VelocityPredictionModel(nn.Module):
tilt: (B, S, 3)
Returns:
v_body: (B, 2) predicted body-frame [vx, vy] at the last timestep
v_body: (B, 2) predicted body-frame [v_forward, v_lateral] at the last timestep
"""
# Per-frame encoding
cnn_feat = self.cnn(events) # (B, S, 256)
# cnn_feat = self.cnn(events) # (B, S, 256)
B, S = events.shape[:2]
cnn_feat = events.new_zeros(B, S, self.cnn.out_dim) # 全零替代
pose_feat = self.pose_mlp(tilt) # (B, S, 64)
# Fuse per frame

View File

@@ -15,7 +15,7 @@ import numpy as np
import cv2
from src.event_utils import EventProcessor
from src.velocity_prediction.utils import body_attitude_np, world_vel_to_body_np
from src.velocity_prediction.utils import body_up_vector_np, world_vel_to_body_np
from src.velocity_prediction.config import VELOCITY_MEAN, VELOCITY_STD
@@ -75,18 +75,18 @@ class SimulateEvents:
class ComputeTilt:
"""Compute body attitude rotation vector from pose quaternion.
"""Compute body up vector from pose quaternion.
Applies the static calibration R_odom_to_body to obtain the true
world→body quaternion, then converts to a rotation vector.
Unlike the old approach, yaw is preserved — the model can decide
how to use it.
world→body quaternion, then rotates world-up [0,0,1] to get the
body up vector. This encodes only pitch and roll (no yaw),
matching the DiffPhysDrone approach (env.R[:, 2]).
"""
def __call__(self, sample: dict) -> dict:
q = sample["pose"][3:7] # [qx, qy, qz, qw] world→odom
att = body_attitude_np(q) # (3,) rotation vector of true body
sample["tilt"] = att.astype(np.float32)
up = body_up_vector_np(q) # (3,) body up vector, unit norm
sample["tilt"] = up.astype(np.float32)
return sample

View File

@@ -229,31 +229,39 @@ def decompose_tilt_from_odom(q_world_to_odom: torch.Tensor) -> torch.Tensor:
return decompose_tilt(q_world_to_body)
# ──────────────────────────── Body attitude (new approach) ────────────────────────────
# ──────────────────────────── Body up vector (DiffPhysDrone style) ────────────────────────────
#
# Instead of removing yaw from the body quaternion, we directly use the
# corrected world→body quaternion's rotation vector. This preserves yaw
# information and lets the model decide how to use it — analogous to how
# DiffPhysDrone uses the body-up vector as a tilt feature.
# Instead of using a rotation vector (which includes yaw), we extract the
# body-frame up vector by rotating world-up [0,0,1] by the corrected
# world→body quaternion. This is exactly what DiffPhysDrone does:
# tilt_feature = env.R[:, 2] (third column of rotation matrix = body up)
#
# The body up vector encodes pitch and roll only — yaw has no effect on it.
# It's a unit vector (norm=1), so no normalization is needed.
def body_attitude(q_world_to_odom: torch.Tensor) -> torch.Tensor:
def body_up_vector(q_world_to_odom: torch.Tensor) -> torch.Tensor:
"""
Compute the true body attitude rotation vector from GT odom quaternion.
Compute the body-frame up vector from GT odom quaternion.
Applies the static calibration R_odom_to_body, then converts the
resulting world→body quaternion directly to a rotation vector.
Unlike decompose_tilt, this preserves yaw information.
Applies the static calibration R_odom_to_body, then rotates the
world up vector [0, 0, 1] by the resulting world→body quaternion.
The result is the body's up direction expressed in world coordinates —
identical to the third column of the rotation matrix R[:, 2].
This encodes only pitch and roll; yaw has no effect.
Args:
q_world_to_odom: (..., 4) world→odom unit quaternion from GT
Returns:
attitude: (..., 3) rotation vector [rx, ry, rz] of the true body
up: (..., 3) body up vector [x, y, z] in world coordinates, unit norm
"""
q_R = quat_from_matrix(R_ODOM_TO_BODY.to(q_world_to_odom.device))
q_world_to_body = quat_mul(q_world_to_odom, q_R)
q_world_to_body = quat_normalize(q_world_to_body)
return quat_to_rotvec(q_world_to_body)
world_up = torch.zeros_like(q_world_to_body[..., :3])
world_up[..., 2] = 1.0 # [0, 0, 1]
return quat_rotate(q_world_to_body, world_up)
def quat_to_euler(q: torch.Tensor) -> torch.Tensor:
@@ -298,11 +306,11 @@ def decompose_tilt_np(q: np.ndarray) -> np.ndarray:
return tilt.numpy()
def body_attitude_np(q: np.ndarray) -> np.ndarray:
"""NumPy version of body_attitude."""
def body_up_vector_np(q: np.ndarray) -> np.ndarray:
"""NumPy version of body_up_vector."""
q_t = torch.from_numpy(q)
att = body_attitude(q_t)
return att.numpy()
up = body_up_vector(q_t)
return up.numpy()
def quat_to_euler_np(q: np.ndarray) -> np.ndarray: