From 6630dda9b1eb7737486ef906f34aa9bdb803ab2a Mon Sep 17 00:00:00 2001 From: CaoWangrenbo Date: Thu, 19 Jun 2025 02:13:50 +0800 Subject: [PATCH] =?UTF-8?q?update:=20=E5=A2=9E=E5=8A=A0=E9=9A=8F=E6=9C=BA?= =?UTF-8?q?=E6=8C=87=E4=BB=A4=E5=B9=B6=E6=B7=BB=E5=8A=A0=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96=E7=AE=AD=E5=A4=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 当前可实现对于x轴线速度和z轴角速度的跟踪,但是转向跟踪总体仍较差,反映不灵敏,可能与模型有一定关系(此前测试在环境中差速转向比较困难) --- .../tasks/direct/flexr_v0/flexr_v0_env.py | 178 ++++++++++++++++-- .../tasks/direct/flexr_v0/flexr_v0_env_cfg.py | 2 +- 2 files changed, 160 insertions(+), 20 deletions(-) diff --git a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py index 6f6296a..07023fa 100644 --- a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py +++ b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py @@ -119,6 +119,12 @@ class FlexrV0Env(DirectRLEnv): self.joint_pos = self.robot.data.joint_pos self.joint_vel = self.robot.data.joint_vel + # 指令生成参数 + self._command_interval = 2.0 # 指令更新间隔(秒) + self._command_time = torch.zeros(self.num_envs, device=self.device) # 指令计时器 + self._max_lin_vel = 1.0 # 最大线速度(m/s) + self._max_ang_vel = 1.0 # 最大角速度(rad/s) + def _get_wheel_joint_indices(self, prefix: str, joint_names: list[str]) -> list[int]: """获取指定腿部的所有轮子关节索引(返回整数列表)""" indices = [] @@ -140,7 +146,7 @@ class FlexrV0Env(DirectRLEnv): light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75)) light_cfg.func("/World/Light", light_cfg) - # self.visualization_markers = define_markers() + self.visualization_markers = define_markers() # add height_scaner self.height_sensor = define_height_sensor() @@ -148,8 +154,33 @@ class FlexrV0Env(DirectRLEnv): def _pre_physics_step(self, actions: torch.Tensor) -> None: + # 更新时间 + self._command_time += self.dt + + # 检查是否需要生成新指令 + need_new_command = self._command_time >= self._command_interval + if torch.any(need_new_command): + # 只为需要更新的环境生成新指令 + if torch.all(need_new_command): + self._generate_commands() + else: + # 部分环境需要更新 + env_ids = torch.where(need_new_command)[0] + # 生成临时指令 + cmd_lin_vel_xy = torch.rand((len(env_ids), 2), device=self.device) * 2 * self._max_lin_vel - self._max_lin_vel + cmd_ang_vel_z = torch.rand((len(env_ids), 1), device=self.device) * 2 * self._max_ang_vel - self._max_ang_vel + + # 更新指定环境的指令 + self.cmd_lin_vel[env_ids, :2] = cmd_lin_vel_xy + self.cmd_ang_vel[env_ids, 2:3] = cmd_ang_vel_z + self._command_time[env_ids] = 0 + self.actions = actions.clone() + # 更新标记 + if hasattr(self, 'visualization_markers'): + self._update_markers() + def _apply_action(self) -> None: # self._debug_print_idx([0]) @@ -283,16 +314,6 @@ class FlexrV0Env(DirectRLEnv): return total_reward.reshape(-1) - # def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]: - # self.joint_pos = self.robot.data.joint_pos - # self.joint_vel = self.robot.data.joint_vel - - # # time_out = self.episode_length_buf >= self.max_episode_length - 1 - # # out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self._cart_dof_idx]) > self.cfg.max_cart_pos, dim=1) - # # out_of_bounds = out_of_bounds | torch.any(torch.abs(self.joint_pos[:, self._pole_dof_idx]) > math.pi / 2, dim=1) - # # return out_of_bounds, time_out - # return torch.zeros_like(self.reset_terminated), torch.zeros_like(self.reset_terminated) - def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]: # 初始化终止标志 @@ -325,6 +346,18 @@ class FlexrV0Env(DirectRLEnv): env_ids = self.robot._ALL_INDICES # type: ignore super()._reset_idx(env_ids) # type: ignore + # 重置时生成新指令 + if len(env_ids) == self.num_envs: # type: ignore + self._generate_commands() + else: + # 部分重置 + cmd_lin_vel_xy = torch.rand((len(env_ids), 2), device=self.device) * 2 * self._max_lin_vel - self._max_lin_vel # type: ignore + cmd_ang_vel_z = torch.rand((len(env_ids), 1), device=self.device) * 2 * self._max_ang_vel - self._max_ang_vel # type: ignore + + self.cmd_lin_vel[env_ids, :2] = cmd_lin_vel_xy + self.cmd_ang_vel[env_ids, 2:3] = cmd_ang_vel_z + self._command_time[env_ids] = 0 + joint_pos = self.robot.data.default_joint_pos[env_ids] joint_vel = self.robot.data.default_joint_vel[env_ids] @@ -374,6 +407,114 @@ class FlexrV0Env(DirectRLEnv): # logging.debug(f"orientations: {self.orientations[env_ids]}") logging.debug(f"euler_angles: {euler_xyz_from_quat(self.orientations[env_ids])}") + def _generate_commands(self): + """为每个环境生成随机指令""" + # 重置计时器 + self._command_time[:] = 0 + + # 生成随机指令 - y方向始终为0 + cmd_lin_vel_x = torch.rand((self.num_envs, 1), device=self.device) * 2 * self._max_lin_vel - self._max_lin_vel + cmd_lin_vel_y = torch.zeros((self.num_envs, 1), device=self.device) # y方向始终为0 + cmd_ang_vel_z = torch.rand((self.num_envs, 1), device=self.device) * 2 * self._max_ang_vel - self._max_ang_vel + + # 组合成完整指令 + self.cmd_lin_vel = torch.cat([ + cmd_lin_vel_x, + cmd_lin_vel_y, + torch.zeros((self.num_envs, 1), device=self.device) + ], dim=1) + + self.cmd_ang_vel = torch.cat([ + torch.zeros((self.num_envs, 2), device=self.device), + cmd_ang_vel_z + ], dim=1) + + def _update_markers(self): + """更新指令和实际速度的标记""" + # 指令方向计算 + command_vx = self.cmd_lin_vel[:, 0:1] + command_wz = self.cmd_ang_vel[:, 2:3] + command_directions = self._compute_direction_vector(command_vx, command_wz) + + # 实际方向计算 + actual_vx = self.base_lin_vel[:, 0:1] + actual_wz = self.base_ang_vel[:, 2:3] + actual_directions = self._compute_direction_vector(actual_vx, actual_wz) + + # 位置设置 + command_positions = self.robot.data.root_pos_w + torch.tensor([0, 0, 0.5], device=self.device) + actual_positions = self.robot.data.root_pos_w + torch.tensor([0, 0, 0.3], device=self.device) + + # 四元数计算 + command_orientations = self._compute_arrow_orientation(command_directions) + actual_orientations = self._compute_arrow_orientation(actual_directions) + + # 调用 visualize 接口 + translations = torch.cat([command_positions, actual_positions], dim=0) + orientations = torch.cat([command_orientations, actual_orientations], dim=0) + marker_indices = torch.cat([ + torch.zeros(self.num_envs, dtype=torch.long, device=self.device), # command + torch.ones(self.num_envs, dtype=torch.long, device=self.device) # actual + ], dim=0) + + self.visualization_markers.visualize( + translations=translations, + orientations=orientations, + marker_indices=marker_indices + ) + + def _compute_arrow_orientation(self, directions: torch.Tensor) -> torch.Tensor: + """计算箭头方向对应的四元数""" + # 将二维方向扩展为三维 (z=0) + directions_3d = torch.cat([ + directions, + torch.zeros(directions.shape[0], 1, device=self.device) + ], dim=1) + + # 归一化方向向量 + norm = torch.norm(directions_3d, dim=1, keepdim=True) + valid = norm > 0 + directions_norm = torch.where(valid, directions_3d / norm, torch.zeros_like(directions_3d)) + + # 默认朝向x轴 + default_forward = torch.tensor([1.0, 0.0, 0.0], device=self.device).repeat(self.num_envs, 1) + + # 计算旋转 + cross = torch.cross(default_forward, directions_norm, dim=1) # 指定 dim=1 + dot = (default_forward * directions_norm).sum(dim=1, keepdim=True) + angle = torch.acos(torch.clamp(dot, -1.0, 1.0)) + + # 转换为四元数 + axis = cross / (torch.norm(cross, dim=1, keepdim=True) + 1e-6) + quats = torch.zeros((self.num_envs, 4), device=self.device) + quats[:, 0:1] = torch.cos(angle / 2) + quats[:, 1:4] = axis * torch.sin(angle / 2) + + return quats + + def _compute_direction_vector(self, lin_vel_x: torch.Tensor, ang_vel_z: torch.Tensor) -> torch.Tensor: + """ + 根据线速度 vx 和角速度 wz 合成一个二维方向向量(xy平面) + + Args: + lin_vel_x (Tensor): 形状 [num_envs, 1],x轴线速度 + ang_vel_z (Tensor): 形状 [num_envs, 1],z轴角速度 + + Returns: + Tensor: 合成方向向量,形状 [num_envs, 2] + """ + # 计算等效的横向偏移方向(绕z轴旋转时相当于侧向移动) + lateral_dir = torch.sign(ang_vel_z) * torch.tensor([0.0, 1.0], device=self.device) # [2] + lateral_dir = lateral_dir.expand(lin_vel_x.shape[0], -1) # 扩展到 num_envs 行 + + # 构造合成方向:vx 在 x 方向,wz 在 y 方向(等效为转向) + direction = torch.cat([ + lin_vel_x, + ang_vel_z.sign() * ang_vel_z.abs().clamp(max=1.0), # 可选归一化或限制最大影响 + ], dim=1) + + return direction + @torch.jit.script def compute_rewards( # 输入参数 @@ -398,14 +539,13 @@ def compute_rewards( # 线速度/角速度跟踪(计算两个向量之间的欧几里得距离) sigma_squared = 0.25 # 线速度部分 - # 提取 xy 方向的速度 - v_target_xy = cmd_lin_vel[:, :2] # [num_envs, 2] - v_actual_xy = base_lin_vel[:, :2] # [num_envs, 2] - # 计算偏差的范数平方 - v_diff_xy = v_target_xy - v_actual_xy - v_diff_norm_squared = torch.sum(v_diff_xy ** 2, dim=1, keepdim=True) - # 计算线速度跟踪的奖励 - linear_error = torch.exp(-v_diff_norm_squared / sigma_squared) # [num_envs, 1] + # # 提取 x 方向的速度 + v_target_x = cmd_lin_vel[:, 0:1] # 只取x方向 + v_actual_x = base_lin_vel[:, 0:1] + v_diff_x = v_target_x - v_actual_x + v_diff_squared = v_diff_x.pow(2) + linear_error = torch.exp(-v_diff_squared / sigma_squared) + # 角速度部分 omega_target_z = cmd_ang_vel[:, 2].unsqueeze(1) # [num_envs, 1] omega_actual_z = base_ang_vel[:, 2].unsqueeze(1) # [num_envs, 1] diff --git a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env_cfg.py b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env_cfg.py index 09658e0..813082f 100644 --- a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env_cfg.py +++ b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env_cfg.py @@ -63,7 +63,7 @@ class FlexrV0EnvCfg(DirectRLEnvCfg): # 奖励权重参数 # TODO 写入外部配置 rew_scale_lin_vel = 1.0 # 线速度跟踪 - rew_scale_ang_vel = 0.5 # 角速度跟踪 + rew_scale_ang_vel = 0.8 # 角速度跟踪 rew_scale_z = 0.1 # z 轴稳定性 rew_scale_orientation = 0.2 # 姿态稳定性 rew_scale_joint_motion = 0.001 # 关节运动