update: 观测空间归一化
相比未归一化前略有涨点,角度跟随较为灵敏,现在怀疑是marker的方向画错了
This commit is contained in:
@@ -39,6 +39,7 @@ FLEXR_CONFIG = ArticulationCfg(
|
|||||||
"wheel_FL": ImplicitActuatorCfg(
|
"wheel_FL": ImplicitActuatorCfg(
|
||||||
joint_names_expr=["wheel_FL_[1-5]"],
|
joint_names_expr=["wheel_FL_[1-5]"],
|
||||||
effort_limit_sim=100.0, # 最大扭矩 (N·m)
|
effort_limit_sim=100.0, # 最大扭矩 (N·m)
|
||||||
|
velocity_limit_sim=20.0,
|
||||||
# velocity_limit_sim=100.0, # 可选:最大速度限制
|
# velocity_limit_sim=100.0, # 可选:最大速度限制
|
||||||
stiffness=0.0, # 设置为0表示纯速度/扭矩控制
|
stiffness=0.0, # 设置为0表示纯速度/扭矩控制
|
||||||
damping=10000.0 # 高阻尼确保轮子自由转动但稳定
|
damping=10000.0 # 高阻尼确保轮子自由转动但稳定
|
||||||
@@ -46,18 +47,21 @@ FLEXR_CONFIG = ArticulationCfg(
|
|||||||
"wheel_FR": ImplicitActuatorCfg(
|
"wheel_FR": ImplicitActuatorCfg(
|
||||||
joint_names_expr=["wheel_FR_[1-5]"],
|
joint_names_expr=["wheel_FR_[1-5]"],
|
||||||
effort_limit_sim=100.0,
|
effort_limit_sim=100.0,
|
||||||
|
velocity_limit_sim=20.0,
|
||||||
stiffness=0.0,
|
stiffness=0.0,
|
||||||
damping=10000.0
|
damping=10000.0
|
||||||
),
|
),
|
||||||
"wheel_BL": ImplicitActuatorCfg(
|
"wheel_BL": ImplicitActuatorCfg(
|
||||||
joint_names_expr=["wheel_BL_[1-5]"],
|
joint_names_expr=["wheel_BL_[1-5]"],
|
||||||
effort_limit_sim=100.0,
|
effort_limit_sim=100.0,
|
||||||
|
velocity_limit_sim=20.0,
|
||||||
stiffness=0.0,
|
stiffness=0.0,
|
||||||
damping=10000.0
|
damping=10000.0
|
||||||
),
|
),
|
||||||
"wheel_BR": ImplicitActuatorCfg(
|
"wheel_BR": ImplicitActuatorCfg(
|
||||||
joint_names_expr=["wheel_BR_[1-5]"],
|
joint_names_expr=["wheel_BR_[1-5]"],
|
||||||
effort_limit_sim=100.0,
|
effort_limit_sim=100.0,
|
||||||
|
velocity_limit_sim=20.0,
|
||||||
stiffness=0.0,
|
stiffness=0.0,
|
||||||
damping=10000.0
|
damping=10000.0
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -234,28 +234,40 @@ class FlexrV0Env(DirectRLEnv):
|
|||||||
|
|
||||||
def _get_observations(self) -> dict:
|
def _get_observations(self) -> dict:
|
||||||
# 计算自身坐标系重力的矢量值
|
# 计算自身坐标系重力的矢量值
|
||||||
# 世界坐标系下的重力向量 (0, 0, -1)
|
|
||||||
gravity_world = torch.tensor([0.0, 0.0, -1.0], device=self.device).repeat(self.num_envs, 1)
|
gravity_world = torch.tensor([0.0, 0.0, -1.0], device=self.device).repeat(self.num_envs, 1)
|
||||||
# 转换到机体坐标系
|
|
||||||
gravity_body = quat_apply_inverse(self.orientations, gravity_world)
|
gravity_body = quat_apply_inverse(self.orientations, gravity_world)
|
||||||
|
|
||||||
# arm 关节的位置和速度
|
# --- 归一化处理 ---
|
||||||
arm_pos = self.joint_pos[:, self._arm_joint_indices] # [num_envs, 4]
|
|
||||||
arm_vel = self.joint_vel[:, self._arm_joint_indices] # [num_envs, 4]
|
|
||||||
|
|
||||||
# 轮组速度
|
# arm 关节位置归一化 (-π/2, π/2) -> (-1, 1)
|
||||||
wheel_FL_vel = self.joint_vel[:, self._actuator_joint_indices["wheel_FL"]].mean(dim=1, keepdim=True)
|
arm_pos = self.joint_pos[:, self._arm_joint_indices] / (math.pi/2)
|
||||||
wheel_FR_vel = self.joint_vel[:, self._actuator_joint_indices["wheel_FR"]].mean(dim=1, keepdim=True)
|
|
||||||
wheel_BL_vel = self.joint_vel[:, self._actuator_joint_indices["wheel_BL"]].mean(dim=1, keepdim=True)
|
|
||||||
wheel_BR_vel = self.joint_vel[:, self._actuator_joint_indices["wheel_BR"]].mean(dim=1, keepdim=True)
|
|
||||||
wheel_vel = torch.cat([wheel_FL_vel, wheel_FR_vel, wheel_BL_vel, wheel_BR_vel], dim=1) # [num_envs, 4]
|
|
||||||
|
|
||||||
# 先前动作
|
# arm 关节速度归一化 (假设最大3.0 rad/s)
|
||||||
|
arm_vel = self.joint_vel[:, self._arm_joint_indices] / 3.0
|
||||||
|
|
||||||
|
# 轮组速度归一化 (假设最大10.0 rad/s)
|
||||||
|
wheel_FL_vel = self.joint_vel[:, self._actuator_joint_indices["wheel_FL"]].mean(dim=1, keepdim=True) / 10.0
|
||||||
|
wheel_FR_vel = self.joint_vel[:, self._actuator_joint_indices["wheel_FR"]].mean(dim=1, keepdim=True) / 10.0
|
||||||
|
wheel_BL_vel = self.joint_vel[:, self._actuator_joint_indices["wheel_BL"]].mean(dim=1, keepdim=True) / 10.0
|
||||||
|
wheel_BR_vel = self.joint_vel[:, self._actuator_joint_indices["wheel_BR"]].mean(dim=1, keepdim=True) / 10.0
|
||||||
|
wheel_vel = torch.cat([wheel_FL_vel, wheel_FR_vel, wheel_BL_vel, wheel_BR_vel], dim=1)
|
||||||
|
|
||||||
|
# 线速度归一化 (假设最大2.0 m/s)
|
||||||
|
base_lin_vel = self.base_lin_vel / 2.0
|
||||||
|
|
||||||
|
# 角速度归一化 (假设最大2.0 rad/s)
|
||||||
|
base_ang_vel = self.base_ang_vel / 2.0
|
||||||
|
|
||||||
|
# 先前动作归一化 (根据动作空间范围)
|
||||||
last_actions = self.last_actions if hasattr(self, 'last_actions') else torch.zeros_like(self.actions)
|
last_actions = self.last_actions if hasattr(self, 'last_actions') else torch.zeros_like(self.actions)
|
||||||
|
last_actions = last_actions / torch.tensor([
|
||||||
|
1.0, 1.0, 1.0, 1.0, # arm joints: ±1.0 (已经归一化)
|
||||||
|
20.0, 20.0, 20.0, 20.0 # wheel joints: ±10.0 rad/s
|
||||||
|
], device=self.device)
|
||||||
|
|
||||||
# 指令xy线速度和指令角速度
|
# 指令归一化 (与观测值使用相同范围)
|
||||||
cmd_lin_vel_xy = self.cmd_lin_vel[:, :2] # [num_envs, 2]
|
cmd_lin_vel_xy = self.cmd_lin_vel[:, :2] / 2.0 # [num_envs, 2]
|
||||||
cmd_ang_vel_z = self.cmd_ang_vel[:, 2:3] # [num_envs, 1]
|
cmd_ang_vel_z = self.cmd_ang_vel[:, 2:3] / 2.0 # [num_envs, 1]
|
||||||
|
|
||||||
# # 高程图 (需要从height_sensor获取)
|
# # 高程图 (需要从height_sensor获取)
|
||||||
# # 确保高度传感器数据是最新的
|
# # 确保高度传感器数据是最新的
|
||||||
@@ -268,8 +280,8 @@ class FlexrV0Env(DirectRLEnv):
|
|||||||
# 组合所有观测
|
# 组合所有观测
|
||||||
obs = torch.cat(
|
obs = torch.cat(
|
||||||
(
|
(
|
||||||
self.base_lin_vel, # 机体线速度 [num_envs, 3]
|
base_lin_vel, # 机体线速度 [num_envs, 3]
|
||||||
self.base_ang_vel, # 机体角速度 [num_envs, 3]
|
base_ang_vel, # 机体角速度 [num_envs, 3]
|
||||||
gravity_body, # 重力矢量 [num_envs, 3]
|
gravity_body, # 重力矢量 [num_envs, 3]
|
||||||
arm_pos, # 摆臂位置 [num_envs, 4]
|
arm_pos, # 摆臂位置 [num_envs, 4]
|
||||||
arm_vel, # 摆臂速度 [num_envs, 4]
|
arm_vel, # 摆臂速度 [num_envs, 4]
|
||||||
@@ -282,6 +294,9 @@ class FlexrV0Env(DirectRLEnv):
|
|||||||
dim=-1,
|
dim=-1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 添加裁剪确保数值在合理范围内
|
||||||
|
obs = torch.clamp(obs, -5.0, 5.0) # 适当放宽范围以允许少量超限
|
||||||
|
|
||||||
observations = {"policy": obs}
|
observations = {"policy": obs}
|
||||||
return observations
|
return observations
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user