From b5078e9c6d359da1d34e0cd45055440d30d52f9b Mon Sep 17 00:00:00 2001
From: CaoWangrenbo <--global>
Date: Mon, 16 Jun 2025 12:16:07 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E6=AD=A5=E6=B7=BB=E5=8A=A0=E6=9C=BA?=
 =?UTF-8?q?=E5=99=A8=E4=BA=BA=E5=B9=B6=E8=AE=BE=E5=AE=9A=E5=8A=A8=E4=BD=9C?=
 =?UTF-8?q?=E7=A9=BA=E9=97=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .vscode/extensions.json                       |  12 +-
 .../__pycache__/cli_args.cpython-310.pyc      | Bin 2819 -> 2791 bytes
 source/FLEXR_v0/FLEXR_v0/robots/FLEXR_v0.py   |  70 +++++++
 source/FLEXR_v0/FLEXR_v0/robots/__init__.py   |   0
 .../tasks/direct/flexr_v0/flexr_v0_env.py     | 171 ++++++++++++++----
 .../tasks/direct/flexr_v0/flexr_v0_env_cfg.py |  43 +++--
 6 files changed, 237 insertions(+), 59 deletions(-)
 create mode 100644 source/FLEXR_v0/FLEXR_v0/robots/FLEXR_v0.py
 create mode 100644 source/FLEXR_v0/FLEXR_v0/robots/__init__.py

diff --git a/.vscode/extensions.json b/.vscode/extensions.json
index 6306e43..2348bec 100644
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@@ -2,11 +2,11 @@
 	// See http://go.microsoft.com/fwlink/?LinkId=827846
 	// for the documentation about the extensions.json format
 	"recommendations": [
-		"ms-python.python",
-		"ms-python.vscode-pylance",
-		"ban.spellright",
-		"ms-iot.vscode-ros",
-		"ms-python.black-formatter",
-		"ms-python.flake8",
+		// "ms-python.python",
+		// "ms-python.vscode-pylance",
+		// "ban.spellright",
+		// "ms-iot.vscode-ros",
+		// "ms-python.black-formatter",
+		// "ms-python.flake8",
 	]
 }
diff --git a/scripts/rsl_rl/__pycache__/cli_args.cpython-310.pyc b/scripts/rsl_rl/__pycache__/cli_args.cpython-310.pyc
index 73ebf20186e1b35a77e90290fcb5e2bf659b28ec..4e9d8c1a7efe0778740af774630b4f91579268f2 100644
GIT binary patch
delta 377
zcmZn`doIeI&&$ij00b<hz8kscF*8O@Ud4RUL=-6U%TYfgKQ~oBBefzwFI7K0zbLyP
zCowryzq~j;t)xib&Brw&D89@<zc{%lv!JAS@?jP)L2;nQTbw0{#o6(BiMgpo!kgt-
z7cv4(J;0{GC^PvXo204)P)U&_h>!siQJh7^IYl|~$!Y0DAX_1X*kpNjbw>Hk?(70g
zjEa*ZIMskg)N|_jM+xNQr^go*<!7ZPmw>H^;w?|iOG$!>Me*mQ7L=6crNRU>nTixZ
z=7McRvH>I}H~9~zp|c#22~@=j5@7?e*^0n!6b1=^wMl_k(jbBbB+mjd5yXbdPcGn6
eWK@|viAzf!6aq!^AOd7AlApkKi%q`Gr2znoVp$~s

delta 425
zcmYjNKTE?v95rp)l9XZ#we>H?Dy>!M;Vw8hDS{4ylZ5N_+Que#;V!lAZmzcrf*`m_
zKY(+;LMAuAgOm45ta`)a@!pU39{0I)Un}xb$zIU!Vb*yoK4y(ry=G?@HLZMIqahC{
z8qzz?D7xZe6#AG@G?A_+B_`4f(22yDoZ&8#M0jB&k)R&yb3tf8S>*Z@3+AzbdNEHc
z%Q`f+A}mKP!vPi5X(sn%=*H)~bF=|wWgPZ!L|q|$SNJaJ589?!(T-RJNNy}`rk;S6
zDb!WA&@vj+{X#io?5IPlsbh)NIgiUeAGo35H<Uz4ZCsjQ*6Yr-c#+XC8Z$Z<+8MF^
z+xV{mG)H|{2gi=4`YH!X9(X?8SOp;&x&~|=zy#k+6M_Hu>eAjaTI$x`+k{^c4FCt=
MuQ6eD^=|Ke0}eEAZU6uP

diff --git a/source/FLEXR_v0/FLEXR_v0/robots/FLEXR_v0.py b/source/FLEXR_v0/FLEXR_v0/robots/FLEXR_v0.py
new file mode 100644
index 0000000..6ba3478
--- /dev/null
+++ b/source/FLEXR_v0/FLEXR_v0/robots/FLEXR_v0.py
@@ -0,0 +1,70 @@
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg
+from isaaclab.actuators import ImplicitActuatorCfg
+from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
+
+import math
+
+FLEXR_CONFIG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        usd_path="/home/hexone/Documents/ftr.usd",
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(
+        joint_pos={
+            "arm_FL": math.radians(45.0),
+            "arm_FR": math.radians(45.0),
+            "arm_BL": math.radians(45.0),
+            "arm_BR": math.radians(45.0),
+        },
+    ),
+    actuators={
+        # 摆臂执行器组
+        "arm_acts": ImplicitActuatorCfg(
+            joint_names_expr=["arm_(FL|FR|BL|BR)"],
+            effort_limit_sim=100.0,  # 最大扭矩 (N·m)
+            velocity_limit_sim=100.0,  # 最大速度 (rad/s)
+            stiffness=800.0,  # 位置控制的刚度
+            damping=5.0,  # 位置控制的阻尼
+            friction=0.1  # 可选：关节摩擦
+        ),
+        
+        # 轮组执行器 - 每个腿部一个执行器
+        "wheel_FL": ImplicitActuatorCfg(
+            joint_names_expr=["wheel_FL_[1-5]"],
+            effort_limit_sim=100.0,  # 最大扭矩 (N·m)
+            # velocity_limit_sim=100.0,  # 可选：最大速度限制
+            stiffness=0.0,  # 设置为0表示纯速度/扭矩控制
+            damping=10000.0  # 高阻尼确保轮子自由转动但稳定
+        ),
+        "wheel_FR": ImplicitActuatorCfg(
+            joint_names_expr=["wheel_FR_[1-5]"],
+            effort_limit_sim=100.0,
+            stiffness=0.0,
+            damping=10000.0
+        ),
+        "wheel_BL": ImplicitActuatorCfg(
+            joint_names_expr=["wheel_BL_[1-5]"],
+            effort_limit_sim=100.0,
+            stiffness=0.0,
+            damping=10000.0
+        ),
+        "wheel_BR": ImplicitActuatorCfg(
+            joint_names_expr=["wheel_BR_[1-5]"],
+            effort_limit_sim=100.0,
+            stiffness=0.0,
+            damping=10000.0
+        )
+    },
+    # 可选：物理材质覆盖
+    # physics_material=sim_utils.RigidBodyMaterialCfg(
+    #     static_friction=1.0,
+    #     dynamic_friction=0.5,
+    #     restitution=0.1
+    # ),
+    # 可选：碰撞过滤器设置
+    # collision_props=sim_utils.CollisionPropertiesCfg(
+    #     collision_enabled=True,
+    #     contact_offset=0.02,
+    #     rest_offset=0.002
+    # )
+)
diff --git a/source/FLEXR_v0/FLEXR_v0/robots/__init__.py b/source/FLEXR_v0/FLEXR_v0/robots/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py
index e93ded4..4149192 100644
--- a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py
+++ b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env.py
@@ -17,18 +17,62 @@ from isaaclab.utils.math import sample_uniform
 
 from .flexr_v0_env_cfg import FlexrV0EnvCfg
 
+import logging
+# Set the default log level to WARNING
+logging.basicConfig(level=logging.WARNING)
+# Replace logging.debug statements with debug-level log logging.debugs
+logging.debug("Your log message here")
 
 class FlexrV0Env(DirectRLEnv):
     cfg: FlexrV0EnvCfg
 
     def __init__(self, cfg: FlexrV0EnvCfg, render_mode: str | None = None, **kwargs):
         super().__init__(cfg, render_mode, **kwargs)
+        
+        # 获取所有关节名称
+        joint_names = self.robot.joint_names
+        
+        logging.debug(joint_names)
 
-        self._cart_dof_idx, _ = self.robot.find_joints(self.cfg.cart_dof_name)
-        self._pole_dof_idx, _ = self.robot.find_joints(self.cfg.pole_dof_name)
+        # 创建执行器组到关节索引列表的映射
+        self._actuator_joint_indices = {}
 
+        # 摆臂执行器组关节索引
+        self._arm_joint_indices = []
+        for name in ["arm_FL", "arm_FR", "arm_BL", "arm_BR"]:
+            idx, _ = self.robot.find_joints(name)
+            self._arm_joint_indices.extend(list(idx))
+        self._actuator_joint_indices["arm_acts"] = self._arm_joint_indices
+        
+        # 轮组执行器关节索引
+        self._actuator_joint_indices["wheel_FL"] = self._get_wheel_joint_indices("wheel_FL", joint_names)
+        self._actuator_joint_indices["wheel_FR"] = self._get_wheel_joint_indices("wheel_FR", joint_names)
+        self._actuator_joint_indices["wheel_BL"] = self._get_wheel_joint_indices("wheel_BL", joint_names)
+        self._actuator_joint_indices["wheel_BR"] = self._get_wheel_joint_indices("wheel_BR", joint_names)
+        
+        # 合并所有轮子关节索引（存储为列表）
+        self._all_wheel_joint_indices = (
+            self._actuator_joint_indices["wheel_FL"] +
+            self._actuator_joint_indices["wheel_FR"] +
+            self._actuator_joint_indices["wheel_BL"] +
+            self._actuator_joint_indices["wheel_BR"]
+        )
+
+        # 调试打印所有映射
+        logging.debug("Actuator Group to Joint Indices:", self._actuator_joint_indices)
+        
+        # 初始化状态变量
         self.joint_pos = self.robot.data.joint_pos
         self.joint_vel = self.robot.data.joint_vel
+    
+    def _get_wheel_joint_indices(self, prefix: str, joint_names: list[str]) -> list[int]:
+        """获取指定腿部的所有轮子关节索引（返回整数列表）"""
+        indices = []
+        for i, name in enumerate(joint_names):
+            if name.startswith(prefix):
+                indices.append(i)
+        return indices
+
 
     def _setup_scene(self):
         self.robot = Articulation(self.cfg.robot_cfg)
@@ -46,57 +90,118 @@ class FlexrV0Env(DirectRLEnv):
         self.actions = actions.clone()
 
     def _apply_action(self) -> None:
-        self.robot.set_joint_effort_target(self.actions * self.cfg.action_scale, joint_ids=self._cart_dof_idx)
+        # 确保动作有正确形状 [num_envs, 8]
+        if self.actions.dim() == 1:
+            self.actions = self.actions.unsqueeze(0)
+        
+        # 分离动作分量
+        arm_actions = self.actions[:, :4]   # 前4个分量控制摆臂 (FL, FR, BL, BR)
+        wheel_actions = self.actions[:, 4:8]  # 后4个分量控制轮组 (FL, FR, BL, BR)
+        
+        # --- 摆臂控制部分 ---
+        arm_pos_target = torch.zeros_like(self.robot.data.joint_pos[:, self._arm_joint_indices])
+        arm_pos_target.copy_(arm_actions)
+        
+        logging.debug(f"Total joints: {len(self.robot.joint_names)}")
+        logging.debug(f"Arm joint indices: {self._arm_joint_indices}")
+        logging.debug(f"Wheel joint counts: { {k:len(v) for k,v in self._actuator_joint_indices.items() if k.startswith('wheel')} }")
+        logging.debug(f"Initial arm positions: {self.robot.data.joint_pos[0, self._arm_joint_indices]}")
+        logging.debug(f"Arm actions: {arm_actions[0]}")
+
+        # 设置目标位置（只针对arm关节）
+        self.robot.set_joint_position_target(
+            arm_pos_target, 
+            joint_ids=self._arm_joint_indices
+        )
+        
+        # --- 轮组控制部分 ---
+        wheel_vel_target = torch.zeros_like(self.robot.data.joint_vel)  # [num_envs, 24]
+        
+        wheel_groups = ["wheel_FL", "wheel_FR", "wheel_BL", "wheel_BR"]
+        for i, group in enumerate(wheel_groups):
+            joint_indices = self._actuator_joint_indices[group]  # 全局索引
+            wheel_vel_target[:, joint_indices] = wheel_actions[:, i].unsqueeze(-1)
+        
+        self.robot.set_joint_velocity_target(
+            wheel_vel_target[:, self._all_wheel_joint_indices],  # 只选择轮组部分
+            joint_ids=self._all_wheel_joint_indices
+        )
+
 
     def _get_observations(self) -> dict:
+        # 获取摆臂关节的位置和速度
+        arm_pos = self.joint_pos[:, self._arm_joint_indices]  # [num_envs, 4]
+        arm_vel = self.joint_vel[:, self._arm_joint_indices]  # [num_envs, 4]
+        
+        # 计算每个轮组的平均速度
+        wheel_FL_vel = self.joint_vel[:, self._actuator_joint_indices["wheel_FL"]].mean(dim=1, keepdim=True)
+        wheel_FR_vel = self.joint_vel[:, self._actuator_joint_indices["wheel_FR"]].mean(dim=1, keepdim=True)
+        wheel_BL_vel = self.joint_vel[:, self._actuator_joint_indices["wheel_BL"]].mean(dim=1, keepdim=True)
+        wheel_BR_vel = self.joint_vel[:, self._actuator_joint_indices["wheel_BR"]].mean(dim=1, keepdim=True)
+        
+        # 组合轮组速度
+        wheel_vel = torch.cat([wheel_FL_vel, wheel_FR_vel, wheel_BL_vel, wheel_BR_vel], dim=1)  # [num_envs, 4]
+
+        # 特权信息 - 暂不加入
+        base_pos = self.robot.data.root_pos_w
+        base_quat = self.robot.data.root_quat_w
+        base_lin_vel = self.robot.data.root_lin_vel_w
+        base_ang_vel = self.robot.data.root_ang_vel_w
+
+        # 
+
+        # 组合所有观测
         obs = torch.cat(
             (
-                self.joint_pos[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
-                self.joint_vel[:, self._pole_dof_idx[0]].unsqueeze(dim=1),
-                self.joint_pos[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
-                self.joint_vel[:, self._cart_dof_idx[0]].unsqueeze(dim=1),
+                arm_pos,        # 摆臂位置 [num_envs, 4]
+                arm_vel,        # 摆臂速度 [num_envs, 4]
+                wheel_vel,      # 轮组平均速度 [num_envs, 4]
             ),
             dim=-1,
         )
+        
         observations = {"policy": obs}
         return observations
-
+    
     def _get_rewards(self) -> torch.Tensor:
-        total_reward = compute_rewards(
-            self.cfg.rew_scale_alive,
-            self.cfg.rew_scale_terminated,
-            self.cfg.rew_scale_pole_pos,
-            self.cfg.rew_scale_cart_vel,
-            self.cfg.rew_scale_pole_vel,
-            self.joint_pos[:, self._pole_dof_idx[0]],
-            self.joint_vel[:, self._pole_dof_idx[0]],
-            self.joint_pos[:, self._cart_dof_idx[0]],
-            self.joint_vel[:, self._cart_dof_idx[0]],
-            self.reset_terminated,
-        )
-        return total_reward
+        # total_reward = compute_rewards(
+        #     self.cfg.rew_scale_alive,
+        #     self.cfg.rew_scale_terminated,
+        #     self.cfg.rew_scale_pole_pos,
+        #     self.cfg.rew_scale_cart_vel,
+        #     self.cfg.rew_scale_pole_vel,
+        #     self.joint_pos[:, self._pole_dof_idx[0]],
+        #     self.joint_vel[:, self._pole_dof_idx[0]],
+        #     self.joint_pos[:, self._cart_dof_idx[0]],
+        #     self.joint_vel[:, self._cart_dof_idx[0]],
+        #     self.reset_terminated,
+        # )
+        # return total_reward
+        # 返回一个全为0的float张量
+        return torch.zeros_like(self.reset_terminated).float()
 
     def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]:
         self.joint_pos = self.robot.data.joint_pos
         self.joint_vel = self.robot.data.joint_vel
 
-        time_out = self.episode_length_buf >= self.max_episode_length - 1
-        out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self._cart_dof_idx]) > self.cfg.max_cart_pos, dim=1)
-        out_of_bounds = out_of_bounds | torch.any(torch.abs(self.joint_pos[:, self._pole_dof_idx]) > math.pi / 2, dim=1)
-        return out_of_bounds, time_out
+        # time_out = self.episode_length_buf >= self.max_episode_length - 1
+        # out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self._cart_dof_idx]) > self.cfg.max_cart_pos, dim=1)
+        # out_of_bounds = out_of_bounds | torch.any(torch.abs(self.joint_pos[:, self._pole_dof_idx]) > math.pi / 2, dim=1)
+        # return out_of_bounds, time_out
+        return torch.zeros_like(self.reset_terminated), torch.zeros_like(self.reset_terminated)
 
     def _reset_idx(self, env_ids: Sequence[int] | None):
         if env_ids is None:
-            env_ids = self.robot._ALL_INDICES
-        super()._reset_idx(env_ids)
+            env_ids = self.robot._ALL_INDICES # type: ignore
+        super()._reset_idx(env_ids) # type: ignore
 
         joint_pos = self.robot.data.default_joint_pos[env_ids]
-        joint_pos[:, self._pole_dof_idx] += sample_uniform(
-            self.cfg.initial_pole_angle_range[0] * math.pi,
-            self.cfg.initial_pole_angle_range[1] * math.pi,
-            joint_pos[:, self._pole_dof_idx].shape,
-            joint_pos.device,
-        )
+        # joint_pos[:, self._pole_dof_idx] += sample_uniform(
+        #     self.cfg.initial_pole_angle_range[0] * math.pi,
+        #     self.cfg.initial_pole_angle_range[1] * math.pi,
+        #     joint_pos[:, self._pole_dof_idx].shape,
+        #     joint_pos.device,
+        # )
         joint_vel = self.robot.data.default_joint_vel[env_ids]
 
         default_root_state = self.robot.data.default_root_state[env_ids]
diff --git a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env_cfg.py b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env_cfg.py
index a208ef8..3c54c20 100644
--- a/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env_cfg.py
+++ b/source/FLEXR_v0/FLEXR_v0/tasks/direct/flexr_v0/flexr_v0_env_cfg.py
@@ -3,7 +3,9 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
 
-from isaaclab_assets.robots.cartpole import CARTPOLE_CFG
+# from isaaclab_assets.robots.cartpole import CARTPOLE_CFG
+from FLEXR_v0.robots.FLEXR_v0 import FLEXR_CONFIG
+
 
 from isaaclab.assets import ArticulationCfg
 from isaaclab.envs import DirectRLEnvCfg
@@ -18,31 +20,32 @@ class FlexrV0EnvCfg(DirectRLEnvCfg):
     decimation = 2
     episode_length_s = 5.0
     # - spaces definition
-    action_space = 1
-    observation_space = 4
+    action_space = 8
+    observation_space = 12
     state_space = 0
 
     # simulation
     sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
 
     # robot(s)
-    robot_cfg: ArticulationCfg = CARTPOLE_CFG.replace(prim_path="/World/envs/env_.*/Robot")
+    robot_cfg: ArticulationCfg = FLEXR_CONFIG.replace(prim_path="/World/envs/env_.*/Robot") # type: ignore
 
     # scene
-    scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)
+    scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=128, env_spacing=4.0, replicate_physics=True)
 
-    # custom parameters/scales
-    # - controllable joint
-    cart_dof_name = "slider_to_cart"
-    pole_dof_name = "cart_to_pole"
-    # - action scale
-    action_scale = 100.0  # [N]
-    # - reward scales
-    rew_scale_alive = 1.0
-    rew_scale_terminated = -2.0
-    rew_scale_pole_pos = -1.0
-    rew_scale_cart_vel = -0.01
-    rew_scale_pole_vel = -0.005
-    # - reset states/conditions
-    initial_pole_angle_range = [-0.25, 0.25]  # pole angle sample range on reset [rad]
-    max_cart_pos = 3.0  # reset if cart exceeds this position [m]
\ No newline at end of file
+    # 待增加的自定义参数
+    # # custom parameters/scales
+    # # - controllable joint
+    # cart_dof_name = "slider_to_cart"
+    # pole_dof_name = "cart_to_pole"
+    # # - action scale
+    # action_scale = 100.0  # [N]
+    # # - reward scales
+    # rew_scale_alive = 1.0
+    # rew_scale_terminated = -2.0
+    # rew_scale_pole_pos = -1.0
+    # rew_scale_cart_vel = -0.01
+    # rew_scale_pole_vel = -0.005
+    # # - reset states/conditions
+    # initial_pole_angle_range = [-0.25, 0.25]  # pole angle sample range on reset [rad]
+    # max_cart_pos = 3.0  # reset if cart exceeds this position [m]
\ No newline at end of file