Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ea3a77a
feat(local): capability policy + driver registry for local control
abonneth Jun 29, 2026
3f6c600
feat(local): in-SDK sidecar + desktop/browser drivers for local control
abonneth Jun 29, 2026
f6d3585
fix(local): browser destroy stops chromedriver, leaves attached Chrom…
abonneth Jun 29, 2026
0c176a2
refactor(local): derive command allowlist from driver, drop registry …
abonneth Jun 29, 2026
7e00149
fix(local): self-review fixes + per-driver package layout
abonneth Jun 29, 2026
a26e295
refactor(local): drop leading underscore on new module-level helpers/…
abonneth Jun 29, 2026
890d7db
fix(local/browser): defuddle for extract_markdown, h.js viewport mark…
abonneth Jun 29, 2026
e343d15
feat(local): auto-derive session_id for source:local envs + `hai loca…
abonneth Jun 29, 2026
11f4bd1
fix(local): address Bugbot review (focus secret field, tab title/clos…
abonneth Jun 29, 2026
0d1c561
refactor(local): source values user_device/cloud (was local/remote)
abonneth Jun 29, 2026
8a1810f
refactor(local): source->host in autowiring
abonneth Jun 29, 2026
80a969d
fix(local): forward positional agent_name in localize wrappers; surfa…
abonneth Jun 30, 2026
0460ef2
refactor(local): idempotent redelivery, pyautogui keys, policy MRO, s…
abonneth Jun 30, 2026
fded9ce
refactor(local): drop capability policy; harden dispatch, lease, and …
abonneth Jun 30, 2026
7345734
fix(local): correct driver observation, input, and autowiring bugs
abonneth Jun 30, 2026
32ce8a5
refactor(local/browser): consolidate hjs/defuddle/markdown into suppo…
abonneth Jun 30, 2026
aeca68a
refactor(local): drop leading underscore on module-level constants
abonneth Jun 30, 2026
db90a85
refactor(local): rename driver packages browser->selenium_browser, de…
abonneth Jun 30, 2026
b3006fd
fix(local): wire subagent environments; keep redelivered command UIDs…
abonneth Jun 30, 2026
2a055f1
refactor(local/browser): drop unused disable_html flag
abonneth Jul 1, 2026
1c267dc
fix(local/desktop): expose screenshot_b64 as the screenshot primitive
abonneth Jul 1, 2026
d25d885
fix(local): dispatch property commands, tolerate existing channel, fl…
abonneth Jul 1, 2026
abf44a5
chore(local): lock desktop/browser extras and bump ruff to pinned 0.1…
abonneth Jul 1, 2026
73d1fc9
Log sidecar session id and enable INFO logging in hai local
abonneth Jul 1, 2026
576e89a
Auto-start local sidecars when a session uses a user_device environment
abonneth Jul 1, 2026
4e8401e
Simplify sidecar runtime to module-level functions
abonneth Jul 1, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,16 @@ cli = [
"rich>=13,<16",
"typer>=0.12,<1",
]
desktop = [
"pyautogui>=0.9.54",
"pillow>=10",
]
browser = [
"selenium>=4.20",
"markdownify>=0.11",
]
all = [
"python-dotenv>=1.2.2,<2",
"rich>=13,<16",
"typer>=0.12,<1",
"hai-agents[browser,cli,desktop]",
]

[project.scripts]
Expand All @@ -55,6 +61,9 @@ Documentation = "https://hub.hcompany.ai/computer-use-agents"
[tool.hatch.build.targets.wheel]
packages = ["src/hai_agents", "src/hai_agents_cli", "src/hai_agents_common"]

[tool.hatch.build.targets.wheel.force-include]
"src/hai_agents/local/selenium_browser/js" = "hai_agents/local/selenium_browser/js"

[dependency-groups]
dev = [
"pytest>=9.0.3,<10",
Expand Down
98 changes: 98 additions & 0 deletions src/hai_agents/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,16 @@

from __future__ import annotations

import asyncio
import functools
import typing

import typing_extensions

from .agents.client import AgentsClient, AsyncAgentsClient
from .base_client import AsyncBaseClient, BaseClient
from .local.runtime import auto_sidecars_enabled, ensure_sidecar
from .local.wiring import collect_sidecar_configs, localize_agent, localize_environments, localize_subagents
from .polling import (
AnswerT,
AsyncSessionHandle,
Expand All @@ -24,9 +29,78 @@
)
from .polling import async_run_session as _async_run_session
from .polling import run_session as _run_session
from .sessions.client import AsyncSessionsClient, SessionsClient
from .tools import ToolInput, as_tools


def _wire_agent_fields(kwargs: typing.Dict[str, typing.Any], get_api_key: typing.Callable[[], str]) -> None:
if kwargs.get("environments"):
kwargs["environments"] = localize_environments(kwargs["environments"], get_api_key)
if kwargs.get("subagents"):
kwargs["subagents"] = localize_subagents(kwargs["subagents"], get_api_key)


def _ensure_local_sidecars(agent: typing.Any, client_wrapper: typing.Any) -> None:
"""Auto-start sidecars for the agent's user_device environments before the session is created."""
if not auto_sidecars_enabled():
return
for config in collect_sidecar_configs(agent, client_wrapper._get_api_key(), client_wrapper.get_base_url()):
ensure_sidecar(config)


class _LocalAgentsClient(AgentsClient):
@functools.wraps(AgentsClient.create_agent)
def create_agent(self, **kwargs: typing.Any) -> typing.Any:
_wire_agent_fields(kwargs, self._raw_client._client_wrapper._get_api_key)
return super().create_agent(**kwargs)

@functools.wraps(AgentsClient.update_agent)
def update_agent(self, *args: typing.Any, **kwargs: typing.Any) -> typing.Any:
_wire_agent_fields(kwargs, self._raw_client._client_wrapper._get_api_key)
return super().update_agent(*args, **kwargs)

@functools.wraps(AgentsClient.patch_agent)
def patch_agent(self, *args: typing.Any, **kwargs: typing.Any) -> typing.Any:
_wire_agent_fields(kwargs, self._raw_client._client_wrapper._get_api_key)
return super().patch_agent(*args, **kwargs)


class _LocalSessionsClient(SessionsClient):
@functools.wraps(SessionsClient.create_session)
def create_session(self, **kwargs: typing.Any) -> typing.Any:
if "agent" in kwargs:
kwargs["agent"] = localize_agent(kwargs["agent"], self._raw_client._client_wrapper._get_api_key)
_ensure_local_sidecars(kwargs["agent"], self._raw_client._client_wrapper)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Named agents skip sidecar startup

High Severity

With auto sidecars enabled, create_session only starts local sidecars from configs collected off the inline agent payload. A registered agent passed as a string (the usual agent="my-agent" flow) is returned unchanged by localize_agent, and catalog environment entries that are plain id strings never match user_device in _local_target, so collect_sidecar_configs is empty and no sidecar is started.

Additional Locations (2)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 4e8401e. Configure here.

return super().create_session(**kwargs)


class _LocalAsyncAgentsClient(AsyncAgentsClient):
@functools.wraps(AsyncAgentsClient.create_agent)
async def create_agent(self, **kwargs: typing.Any) -> typing.Any:
_wire_agent_fields(kwargs, self._raw_client._client_wrapper._get_api_key)
return await super().create_agent(**kwargs)

@functools.wraps(AsyncAgentsClient.update_agent)
async def update_agent(self, *args: typing.Any, **kwargs: typing.Any) -> typing.Any:
_wire_agent_fields(kwargs, self._raw_client._client_wrapper._get_api_key)
return await super().update_agent(*args, **kwargs)

@functools.wraps(AsyncAgentsClient.patch_agent)
async def patch_agent(self, *args: typing.Any, **kwargs: typing.Any) -> typing.Any:
_wire_agent_fields(kwargs, self._raw_client._client_wrapper._get_api_key)
return await super().patch_agent(*args, **kwargs)


class _LocalAsyncSessionsClient(AsyncSessionsClient):
@functools.wraps(AsyncSessionsClient.create_session)
async def create_session(self, **kwargs: typing.Any) -> typing.Any:
if "agent" in kwargs:
kwargs["agent"] = localize_agent(kwargs["agent"], self._raw_client._client_wrapper._get_api_key)
# Chrome launch + sidecar startup can take seconds; keep the event loop free.
await asyncio.to_thread(_ensure_local_sidecars, kwargs["agent"], self._raw_client._client_wrapper)
return await super().create_session(**kwargs)


class Client(BaseClient):
def run_session(
self,
Expand Down Expand Up @@ -75,6 +149,18 @@ def session(self, id: str) -> SessionHandle:
"""Wrap an existing session id in a handle."""
return SessionHandle(self, id)

@property
def agents(self) -> _LocalAgentsClient:
if self._agents is None:
self._agents = _LocalAgentsClient(client_wrapper=self._client_wrapper)
return self._agents

@property
def sessions(self) -> _LocalSessionsClient:
if self._sessions is None:
self._sessions = _LocalSessionsClient(client_wrapper=self._client_wrapper)
return self._sessions


class AsyncClient(AsyncBaseClient):
async def run_session(
Expand Down Expand Up @@ -123,3 +209,15 @@ async def start_session(
def session(self, id: str) -> AsyncSessionHandle:
"""Wrap an existing session id in a handle."""
return AsyncSessionHandle(self, id)

@property
def agents(self) -> _LocalAsyncAgentsClient:
if self._agents is None:
self._agents = _LocalAsyncAgentsClient(client_wrapper=self._client_wrapper)
return self._agents

@property
def sessions(self) -> _LocalAsyncSessionsClient:
if self._sessions is None:
self._sessions = _LocalAsyncSessionsClient(client_wrapper=self._client_wrapper)
return self._sessions
16 changes: 16 additions & 0 deletions src/hai_agents/local/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from __future__ import annotations

from .chrome import ensure_local_chrome
from .config import SidecarConfig, session_id_from_environment_id
from .runtime import ensure_sidecar, stop_sidecars
from .sidecar import SidecarBusyError, SidecarClient

__all__ = [
"SidecarBusyError",
"SidecarClient",
"SidecarConfig",
"ensure_local_chrome",
"ensure_sidecar",
"session_id_from_environment_id",
"stop_sidecars",
]
79 changes: 79 additions & 0 deletions src/hai_agents/local/chrome.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from __future__ import annotations

import logging
import platform
import shutil
import subprocess
import time
from pathlib import Path

import httpx

logger = logging.getLogger(__name__)

DEFAULT_DEBUG_PORT = 9222
CHROME_STARTUP_TIMEOUT_S = 20.0
CHROME_PROFILE_DIR = Path.home() / ".hai" / "chrome-profile"

_CHROME_CANDIDATES = {
"Darwin": (
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Chromium.app/Contents/MacOS/Chromium",
),
"Windows": (
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
),
}
_CHROME_COMMANDS = ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser", "chrome")


def ensure_local_chrome(port: int = DEFAULT_DEBUG_PORT) -> None:
"""Make sure a Chrome with an open remote-debugging port is running, launching one if needed."""
if _debugger_listening(port):
return
binary = _find_chrome()
if binary is None:
raise RuntimeError(
"Google Chrome was not found. Install Chrome, or start a browser yourself with "
f"--remote-debugging-port={port}."
)
CHROME_PROFILE_DIR.mkdir(parents=True, exist_ok=True)
logger.info("launching Chrome with remote debugging on port %d (profile: %s)", port, CHROME_PROFILE_DIR)
subprocess.Popen(
[
binary,
f"--remote-debugging-port={port}",
f"--user-data-dir={CHROME_PROFILE_DIR}",
"--no-first-run",
"--no-default-browser-check",
],
stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
start_new_session=True,
)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicate Chrome launch race

Medium Severity

ensure_local_chrome uses a check-then-launch pattern with no lock. Parallel browser sidecar threads (e.g. parent and subagent user_device web envs) can both see the debug port closed and spawn separate Chrome processes against the same profile directory and port.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 576e89a. Configure here.

deadline = time.monotonic() + CHROME_STARTUP_TIMEOUT_S
while time.monotonic() < deadline:
if _debugger_listening(port):
return
time.sleep(0.25)
raise RuntimeError(f"Chrome did not open debugging port {port} within {CHROME_STARTUP_TIMEOUT_S:.0f}s")


def _debugger_listening(port: int) -> bool:
try:
return httpx.get(f"http://127.0.0.1:{port}/json/version", timeout=2.0).status_code == 200
except httpx.HTTPError:
return False


def _find_chrome() -> str | None:
for path in _CHROME_CANDIDATES.get(platform.system(), ()):
if Path(path).exists():
return path
for command in _CHROME_COMMANDS:
found = shutil.which(command)
if found:
return found
return None
39 changes: 39 additions & 0 deletions src/hai_agents/local/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from __future__ import annotations

import os
import uuid as _uuid
from typing import Any

from pydantic import BaseModel, Field, model_validator
from typing_extensions import Self

from ..environment import HaiAgentsEnvironment

DEFAULT_BASE_URL = HaiAgentsEnvironment.EU.value
KIND_TO_CAPABILITY = {"web": "browser", "desktop": "desktop"}
CAPABILITIES = frozenset(KIND_TO_CAPABILITY.values())


def session_id_from_environment_id(environment_id: str, api_key: str, capability: str) -> str:
return str(_uuid.uuid5(_uuid.NAMESPACE_DNS, f"{api_key}.{environment_id}.{capability}"))


class SidecarConfig(BaseModel):
capability: str
environment_id: str
api_key: str = ""
base_url: str = DEFAULT_BASE_URL
session_id: str = ""
driver_options: dict[str, Any] = Field(default_factory=dict)

@model_validator(mode="after")
def _resolve_defaults(self) -> Self:
if self.capability not in CAPABILITIES:
raise ValueError(f"unknown capability {self.capability!r}; expected one of {sorted(CAPABILITIES)}")
if not self.api_key:
self.api_key = os.getenv("HAI_API_KEY") or os.getenv("AGP_SERVICE_KEY") or os.getenv("AGP_API_KEY") or ""
if not self.api_key:
raise ValueError("api_key is required (or set HAI_API_KEY)")
if not self.session_id:
self.session_id = session_id_from_environment_id(self.environment_id, self.api_key, self.capability)
return self

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sidecar ignores API base URL

Medium Severity

SidecarConfig fills api_key from environment variables but always keeps base_url at the EU default unless passed explicitly. A sidecar started with SidecarConfig(...) while HAI_API_BASE_URL (or a non-EU SDK client) points elsewhere will poll and post results on the wrong host, so local control never attaches to the session the client created.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit abf44a5. Configure here.

5 changes: 5 additions & 0 deletions src/hai_agents/local/pyautogui_desktop/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from __future__ import annotations

from .driver import LocalDesktopDriver, RunCommandResponse

__all__ = ["LocalDesktopDriver", "RunCommandResponse"]
Loading