update: 增加关节能量负奖励(缺少关节角加速度项)

与此前改动相比变化不大
This commit is contained in:
2025-06-20 01:09:43 +08:00
parent fc2e66fa26
commit f85f987e0b

View File

@@ -595,7 +595,20 @@ def compute_rewards(
# action_rate = torch.norm(actions - last_actions, dim=1, keepdim=True) # shape: [num_envs, 1] # action_rate = torch.norm(actions - last_actions, dim=1, keepdim=True) # shape: [num_envs, 1]
# action_rate_reward = -1.0 * action_rate * rew_scale_action_rate # action_rate_reward = -1.0 * action_rate * rew_scale_action_rate
total_reward = tracking_reward + z_reward + omega_xy_reward # 能量奖励
# # 关节扭矩 - 扭矩范数的平方
joint_torque_norm_squared = torch.sum(joint_torque.pow(2), dim=1, keepdim=True) # shape: [num_envs, 1]
joint_torque_reward = -1.0 * joint_torque_norm_squared * rew_scale_joint_torque * 0.00002 * dt
# # 关节速度 - 角速度范数平方和角加速度范数平方(目前先使用角速度)
joint_vel_norm_squared = torch.sum(joint_vel.pow(2), dim=1, keepdim=True) # shape: [num_envs, 1]
joint_vel_reward = -1.0 * joint_vel_norm_squared * rew_scale_joint_motion * 0.001 * dt
# # 关节运动
# joint_vel_norm = torch.norm(joint_vel, dim=1, keepdim=True) # shape: [num_envs, 1]
# joint_vel_reward = -1.0 * joint_vel_norm * rew_scale_joint_motion
total_reward = tracking_reward + z_reward + omega_xy_reward + joint_torque_reward + joint_vel_reward
# # 调试打印张量大小 # # 调试打印张量大小
# print(f"tracking_reward: {tracking_reward.shape}, z_reward: {z_reward.shape}, omega_xy_reward: {omega_xy_reward.shape}, total_reward: {total_reward.shape}") # print(f"tracking_reward: {tracking_reward.shape}, z_reward: {z_reward.shape}, omega_xy_reward: {omega_xy_reward.shape}, total_reward: {total_reward.shape}")