From 79c2d737b9d4e83ed2bd4b4c9eff723512234570 Mon Sep 17 00:00:00 2001 From: binichallein <132759743+binichallein@users.noreply.github.com> Date: Fri, 15 May 2026 13:37:04 +0800 Subject: [PATCH] fix: fail malformed model actions --- phone_agent/agent.py | 10 +++- phone_agent/agent_ios.py | 12 +++-- tests/test_agent_parse_errors.py | 85 ++++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 5 deletions(-) create mode 100644 tests/test_agent_parse_errors.py diff --git a/phone_agent/agent.py b/phone_agent/agent.py index 364279174..347fd680d 100644 --- a/phone_agent/agent.py +++ b/phone_agent/agent.py @@ -189,10 +189,16 @@ def _execute_step( # Parse action from response try: action = parse_action(response.action) - except ValueError: + except ValueError as e: if self.agent_config.verbose: traceback.print_exc() - action = finish(message=response.action) + return StepResult( + success=False, + finished=True, + action=None, + thinking=response.thinking, + message=f"Failed to parse action: {response.action}. Error: {e}", + ) if self.agent_config.verbose: # Print thinking process diff --git a/phone_agent/agent_ios.py b/phone_agent/agent_ios.py index a3b20d9fb..3ec0d2eb8 100644 --- a/phone_agent/agent_ios.py +++ b/phone_agent/agent_ios.py @@ -20,7 +20,7 @@ class IOSAgentConfig: max_steps: int = 100 wda_url: str = "http://localhost:8100" session_id: str | None = None - device_id: str | None = None # iOS device UDID + device_id: str | None = None # iOS device identifier lang: str = "cn" system_prompt: str | None = None verbose: bool = True @@ -208,10 +208,16 @@ def _execute_step( # Parse action from response try: action = parse_action(response.action) - except ValueError: + except ValueError as e: if self.agent_config.verbose: traceback.print_exc() - action = finish(message=response.action) + return StepResult( + success=False, + finished=True, + action=None, + thinking=response.thinking, + message=f"Failed to parse action: {response.action}. Error: {e}", + ) if self.agent_config.verbose: # Print thinking process diff --git a/tests/test_agent_parse_errors.py b/tests/test_agent_parse_errors.py new file mode 100644 index 000000000..5be150a7b --- /dev/null +++ b/tests/test_agent_parse_errors.py @@ -0,0 +1,85 @@ +from dataclasses import dataclass + +import phone_agent.agent as agent_module +import phone_agent.agent_ios as agent_ios_module +from phone_agent.agent import AgentConfig, PhoneAgent +from phone_agent.agent_ios import IOSAgentConfig, IOSPhoneAgent +from phone_agent.model.client import ModelResponse + + +@dataclass +class FakeScreenshot: + base64_data: str = "" + width: int = 1080 + height: int = 2400 + + +class FakeDeviceFactory: + def get_screenshot(self, device_id=None): + return FakeScreenshot() + + def get_current_app(self, device_id=None): + return "test.app" + + +class FakeModelClient: + def request(self, messages): + return ModelResponse( + thinking="bad action", + action="[{'Type': 'Type'}]", + raw_content="[{'Type': 'Type'}]", + ) + + +def test_parse_error_returns_failure_without_executing_action(monkeypatch) -> None: + monkeypatch.setattr(agent_module, "get_device_factory", lambda: FakeDeviceFactory()) + + agent = PhoneAgent(agent_config=AgentConfig(verbose=False)) + agent.model_client = FakeModelClient() + + def fail_if_called(*args, **kwargs): + raise AssertionError("malformed action should not be executed") + + monkeypatch.setattr(agent.action_handler, "execute", fail_if_called) + + result = agent.step("do something") + + assert result.success is False + assert result.finished is True + assert result.action is None + assert "Failed to parse action" in result.message + + +class FakeWDAConnection: + def __init__(self, wda_url): + self.wda_url = wda_url + + def start_wda_session(self): + return False, None + + +def test_ios_parse_error_returns_failure_without_executing_action( + monkeypatch, +) -> None: + monkeypatch.setattr(agent_ios_module, "XCTestConnection", FakeWDAConnection) + monkeypatch.setattr( + agent_ios_module, "get_screenshot", lambda **kwargs: FakeScreenshot() + ) + monkeypatch.setattr( + agent_ios_module, "get_current_app", lambda **kwargs: "test.app" + ) + + agent = IOSPhoneAgent(agent_config=IOSAgentConfig(verbose=False)) + agent.model_client = FakeModelClient() + + def fail_if_called(*args, **kwargs): + raise AssertionError("malformed action should not be executed") + + monkeypatch.setattr(agent.action_handler, "execute", fail_if_called) + + result = agent.step("do something") + + assert result.success is False + assert result.finished is True + assert result.action is None + assert "Failed to parse action" in result.message