diff --git a/.env.example b/.env.example index a29fbd78b..445933a67 100644 --- a/.env.example +++ b/.env.example @@ -1,13 +1,17 @@ # This file is intentionally documentation-only. # # Integrated LexVoice runs should not configure frontend variables here. -# Use `../lex-voice/.env` as the single source of truth; `lex-voice/run.sh` -# injects LiveKit, room-input, input-source, role-device, agent, media, and -# debug settings into the frontend process when it starts `make start_ui`. +# Use the LexVoice repository `.env` as the single source of truth; its `run.sh` +# injects LiveKit, room-input, input-source, role-device, agent, media, and debug +# settings into the frontend process when it starts `make start_ui`. # # Only create `agent-starter-react/.env.local` for standalone frontend # development where this repository is launched directly with `pnpm dev`. # In that case, define only the variables needed for that standalone run. + +# For sandbox gateway deployment, see the LexVoice repository's +# `deploy/liveavatar_gateway/.env.example.gateway` and +# `deploy/liveavatar_gateway/.env.example.sandbox` reference files. # # `OBSERVABILITY_ENABLED=1` uses the same unified switch as the backend. When # enabled by the LexVoice runtime, browser-side probes publish LiveKit data diff --git a/.gitignore b/.gitignore index dcc586172..3fa917c01 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,12 @@ # testing /coverage +# Legacy sandbox gateway artifacts from pre-migration local runs +/.sandbox-gateway/ +/logs/direct-proxy.log +/logs/sandbox-gateway*.log +/logs/sandbox-gateway.pid + # next.js /.next/ /out/ diff --git a/README.md b/README.md index 3cadc5a13..54ebe2f90 100644 --- a/README.md +++ b/README.md @@ -57,10 +57,10 @@ Run the following command to automatically clone this template. lk app create --template agent-starter-react ``` -For integrated LexVoice runs, configure `../lex-voice/.env` and start the -frontend through the LexVoice runtime scripts. `../lex-voice/run.sh` injects -LiveKit, room-input, input-source, role-device, agent, media, and debug settings -into this Next.js process. +For integrated LexVoice runs, configure the LexVoice repository `.env` and start +the frontend through the LexVoice runtime scripts. The LexVoice repository's +`run.sh` injects LiveKit, room-input, input-source, role-device, agent, media, +and debug settings into this Next.js process. The session lifecycle API keeps start/stop state in memory, so integrated deployments should route `/api/session/*` to a single Next.js instance or sticky routing. @@ -68,6 +68,16 @@ If you replace the custom connection details endpoint, it must echo the requeste `sessionId` and derive the same room name so dispatch and stop calls coordinate with the connected room. +### LiveAvatar Gateway Deployments + +Sandbox-backed public deployments are owned by the LexVoice repository. Set +`LIVEAVATAR_USE_SANDBOX=1` in the LexVoice repository `.env` and configure +broker, template, warm pool, and `SANDBOX_ENV_*` values in the LexVoice repository's +`deploy/liveavatar_gateway/.env`. + +This frontend repository only runs the Next.js UI. It does not create, release, +or warm sandbox sessions. + For standalone frontend development, install dependencies and run the dev server directly: @@ -78,8 +88,8 @@ pnpm dev And open http://localhost:3000 in your browser. -You'll also need a LiveKit server and an agent worker. In this workspace, those -are normally provided by the sibling `../lex-voice` project. +You'll also need a LiveKit server and an agent worker. In integrated workspaces, +those are normally provided by the LexVoice project. ## Configuration @@ -118,7 +128,7 @@ You can update these values in [`app-config.ts`](./app-config.ts) to customize b #### Environment Variables -Integrated runs should keep runtime variables in `../lex-voice/.env`; this +Integrated runs should keep runtime variables in the LexVoice repository `.env`; this repository's `.env.example` is documentation-only. Only create `agent-starter-react/.env.local` for standalone frontend development launched directly with `pnpm dev`. @@ -130,7 +140,7 @@ LIVEKIT_URL=https://your-livekit-server-url ``` The frontend defaults to the browser camera/microphone input when no input -source is provided. Configure `INPUT_SOURCE` only in `../lex-voice/.env` for +source is provided. Configure `INPUT_SOURCE` only in the LexVoice repository `.env` for integrated backend runs. The LiveKit variables above are required for standalone voice agent functionality to work with your LiveKit project. diff --git a/app-config.ts b/app-config.ts index 60386bbc9..cc0a5db09 100644 --- a/app-config.ts +++ b/app-config.ts @@ -46,6 +46,7 @@ export interface AppConfig { // for LiveKit Cloud Sandbox sandboxId?: string; + voiceSessionId?: string; agentName?: string; inputSource?: string; audioInputDevice?: string; @@ -227,6 +228,7 @@ export const APP_CONFIG_DEFAULTS: AppConfig = { // for LiveKit Cloud Sandbox sandboxId: undefined, + voiceSessionId: undefined, agentName: undefined, inputSource: undefined, audioInputDevice: undefined, diff --git a/app/api/session/dispatch/route.ts b/app/api/session/dispatch/route.ts index a5b1ad562..42b36b39a 100644 --- a/app/api/session/dispatch/route.ts +++ b/app/api/session/dispatch/route.ts @@ -1,11 +1,16 @@ import { NextResponse } from 'next/server'; import { AgentDispatchClient, RoomServiceClient } from 'livekit-server-sdk'; -import { type ParticipantInfo, ParticipantInfo_Kind } from '@livekit/protocol'; +import { type ParticipantInfo } from '@livekit/protocol'; import { deriveLiveKitRoomName, deriveSessionIdFromLiveKitRoomName, isValidConnectionRoomId, } from '@/lib/connection-room-id'; +import { + type AgentParticipantMatchOptions, + findAgentParticipantInList, + findReusableAgentParticipant as findReusableAgentParticipantInList, +} from '@/lib/session-dispatch-readiness'; import { resolveLiveKitHttpUrl } from '@/lib/session-stop'; import { type RoomSessionToken, @@ -23,10 +28,6 @@ const AGENT_DISPATCH_POLL_MS = readPositiveIntEnv('AGENT_DISPATCH_POLL_MS', 200) export const runtime = 'nodejs'; export const revalidate = 0; -type AgentParticipantMatchOptions = { - allowAnonymousLiveKitAgentFallback?: boolean; -}; - class RoomSessionCancelledError extends Error { constructor(session: RoomSessionToken) { super( @@ -157,11 +158,15 @@ async function createAgentDispatchWithRetry( try { throwIfSessionCancelled(session); - const alreadyJoined = await roomHasAgentParticipant(roomClient, roomName, agentName); + const alreadyJoined = await findReusableAgentParticipant(roomClient, roomName, agentName); throwIfSessionCancelled(session); if (alreadyJoined) { markRoomSessionRunning(session); - return { attempts, alreadyJoined: true }; + return { + attempts, + alreadyJoined: true, + agentParticipant: summarizeAgentParticipant(alreadyJoined), + }; } const dispatch = await dispatchClient.createDispatch(roomName, agentName); @@ -174,21 +179,28 @@ async function createAgentDispatchWithRetry( throw new RoomSessionCancelledError(session); } - if ( - await waitForAgentParticipant( - roomClient, - roomName, - agentName, - remainingDispatchTime(startedAt), - session - ) - ) { + // A successful LiveKit dispatch often needs multiple seconds before the + // agent worker joins the room. Wait for the full remaining session-start + // budget here; the retry loop is for API/listParticipants failures, not + // for repeatedly recreating a healthy dispatch every retry interval. + const agentParticipant = await waitForAgentParticipant( + roomClient, + roomName, + agentName, + remainingDispatchTime(startedAt), + session + ); + if (agentParticipant) { if (isRoomSessionCancelled(session)) { await deleteLiveKitRoomQuietly(roomClient, roomName); throw new RoomSessionCancelledError(session); } markRoomSessionRunning(session); - return { attempts, dispatchId: dispatch.id }; + return { + attempts, + dispatchId: dispatch.id, + agentParticipant: summarizeAgentParticipant(agentParticipant), + }; } lastError = new Error('agent participant did not join before retry'); @@ -231,17 +243,16 @@ async function waitForAgentParticipant( maxWaitMs: number, session: RoomSessionToken ) { - const deadline = Date.now() + Math.min(maxWaitMs, AGENT_DISPATCH_RETRY_MS); + const deadline = Date.now() + maxWaitMs; do { throwIfSessionCancelled(session); - if ( - await roomHasAgentParticipant(roomClient, roomName, agentName, { - allowAnonymousLiveKitAgentFallback: true, - }) - ) { + const participant = await findAgentParticipant(roomClient, roomName, agentName, { + allowAnonymousLiveKitAgentFallback: true, + }); + if (participant) { throwIfSessionCancelled(session); - return true; + return participant; } const waitMs = Math.min(AGENT_DISPATCH_POLL_MS, deadline - Date.now()); @@ -251,43 +262,38 @@ async function waitForAgentParticipant( } while (Date.now() < deadline); throwIfSessionCancelled(session); - return roomHasAgentParticipant(roomClient, roomName, agentName, { + return findAgentParticipant(roomClient, roomName, agentName, { allowAnonymousLiveKitAgentFallback: true, }); } -async function roomHasAgentParticipant( +async function findAgentParticipant( roomClient: RoomServiceClient, roomName: string, agentName: string, options: AgentParticipantMatchOptions = {} ) { const participants = await roomClient.listParticipants(roomName); - if (participants.some((participant) => isExpectedAgentParticipant(participant, agentName))) { - return true; - } - if (!options.allowAnonymousLiveKitAgentFallback) { - return false; - } - - // Local LiveKit may omit agent attributes; fresh per-session rooms keep this fallback bounded. - const anonymousLiveKitAgents = participants.filter(isAnonymousLiveKitAgentParticipant); - return anonymousLiveKitAgents.length === 1; + return findAgentParticipantInList(participants, agentName, options); } -function isExpectedAgentParticipant(participant: ParticipantInfo, agentName: string) { - const attributes = participant.attributes ?? {}; - return attributes['lk.agent.name'] === agentName || attributes['lk.agent_name'] === agentName; +async function findReusableAgentParticipant( + roomClient: RoomServiceClient, + roomName: string, + agentName: string +) { + const participants = await roomClient.listParticipants(roomName); + return findReusableAgentParticipantInList(participants, agentName); } -function isAnonymousLiveKitAgentParticipant(participant: ParticipantInfo) { - const attributes = participant.attributes ?? {}; - return ( - participant.kind === ParticipantInfo_Kind.AGENT && - participant.identity.startsWith('agent-') && - !attributes['lk.agent.name'] && - !attributes['lk.agent_name'] - ); +function summarizeAgentParticipant(participant: ParticipantInfo | null) { + if (!participant) { + return null; + } + + return { + identity: participant.identity, + }; } async function deleteDispatchQuietly( diff --git a/components/app/session-provider.tsx b/components/app/session-provider.tsx index 46eae62c2..51a42b4bb 100644 --- a/components/app/session-provider.tsx +++ b/components/app/session-provider.tsx @@ -14,6 +14,7 @@ const DEFAULT_BROWSER_SOURCE_CLIENT: BrowserSourceClient = { videoTrack: null, audioPending: false, videoPending: false, + setAudioDeviceId: async () => {}, setAudioEnabled: async () => {}, setVideoEnabled: async () => {}, start: async () => {}, diff --git a/components/livekit/agent-control-bar/agent-control-bar.tsx b/components/livekit/agent-control-bar/agent-control-bar.tsx index 90743cf25..d1e011a1c 100644 --- a/components/livekit/agent-control-bar/agent-control-bar.tsx +++ b/components/livekit/agent-control-bar/agent-control-bar.tsx @@ -134,6 +134,20 @@ export function AgentControlBar({ [browserSourceClient, handleDeviceError] ); + const handleAudioDeviceSelect = useCallback( + (deviceId: string) => { + handleAudioDeviceChange(deviceId); + if (!usesBrowserRawAudioInput) { + return; + } + + void browserSourceClient.setAudioDeviceId(deviceId).catch((error) => { + handleDeviceError({ source: Track.Source.Microphone, error }); + }); + }, + [browserSourceClient, handleAudioDeviceChange, handleDeviceError, usesBrowserRawAudioInput] + ); + const handleRawVideoToggle = useCallback( async (enabled: boolean) => { try { @@ -195,7 +209,7 @@ export function AgentControlBar({ usesBrowserRawAudioInput ? handleRawMicrophoneToggle : microphoneToggle.toggle } onMediaDeviceError={handleMicrophoneDeviceSelectError} - onActiveDeviceChange={handleAudioDeviceChange} + onActiveDeviceChange={handleAudioDeviceSelect} /> )} diff --git a/components/livekit/agent-control-bar/track-device-select.tsx b/components/livekit/agent-control-bar/track-device-select.tsx index f0373f558..faf52b423 100644 --- a/components/livekit/agent-control-bar/track-device-select.tsx +++ b/components/livekit/agent-control-bar/track-device-select.tsx @@ -18,11 +18,14 @@ type DeviceSelectProps = React.ComponentProps & { variant?: 'default' | 'small'; track?: LocalAudioTrack | LocalVideoTrack | undefined; requestPermissions?: boolean; + alwaysVisible?: boolean; onMediaDeviceError?: (error: Error) => void; onDeviceListChange?: (devices: MediaDeviceInfo[]) => void; onActiveDeviceChange?: (deviceId: string) => void; }; +const PERMISSION_PROMPT_DEVICE_VALUE = '__request_media_device_permission__'; + const selectVariants = cva( 'w-full rounded-full px-3 py-2 text-sm cursor-pointer disabled:not-allowed', { @@ -43,6 +46,7 @@ export function TrackDeviceSelect({ track, size = 'default', requestPermissions = false, + alwaysVisible = false, onMediaDeviceError, onDeviceListChange, onActiveDeviceChange, @@ -72,20 +76,28 @@ export function TrackDeviceSelect({ }, [open]); const handleActiveDeviceChange = (deviceId: string) => { + if (deviceId === PERMISSION_PROMPT_DEVICE_VALUE) { + setRequestPermissionsState(true); + return; + } + setActiveMediaDevice(deviceId); onActiveDeviceChange?.(deviceId); }; const filteredDevices = useMemo(() => devices.filter((d) => d.deviceId !== ''), [devices]); - if (filteredDevices.length < 2) { + if (!alwaysVisible && filteredDevices.length < 2) { return null; } + const selectedDeviceId = + activeDeviceId || filteredDevices[0]?.deviceId || PERMISSION_PROMPT_DEVICE_VALUE; + return ( ); } + +function getFallbackDeviceLabel(kind: MediaDeviceKind, index: number) { + const label = kind === 'audioinput' ? 'Microphone' : kind === 'videoinput' ? 'Camera' : 'Device'; + + return index === 0 ? `Default ${label.toLowerCase()}` : `${label} ${index + 1}`; +} diff --git a/components/livekit/agent-control-bar/track-selector.tsx b/components/livekit/agent-control-bar/track-selector.tsx index fdd0a3f70..5b0888df6 100644 --- a/components/livekit/agent-control-bar/track-selector.tsx +++ b/components/livekit/agent-control-bar/track-selector.tsx @@ -68,6 +68,7 @@ export function TrackSelector({ size="sm" kind={kind} requestPermissions={false} + alwaysVisible={kind === 'audioinput'} onMediaDeviceError={onMediaDeviceError} onActiveDeviceChange={onActiveDeviceChange} className={cn([ diff --git a/hooks/useBrowserSourceClient.ts b/hooks/useBrowserSourceClient.ts index d55f1c869..778715168 100644 --- a/hooks/useBrowserSourceClient.ts +++ b/hooks/useBrowserSourceClient.ts @@ -57,6 +57,7 @@ export interface BrowserSourceClient { videoTrack: LocalVideoTrack | null; audioPending: boolean; videoPending: boolean; + setAudioDeviceId: (deviceId: string) => Promise; setAudioEnabled: (enabled: boolean) => Promise; setVideoEnabled: (enabled: boolean) => Promise; start: () => Promise; @@ -86,6 +87,7 @@ export function useBrowserSourceClient( const browserVideoHeight = appConfig.browserVideoHeight ?? 480; const browserVideoStatsEnabled = appConfig.browserVideoStats || appConfig.debugVideo || false; const audioEnabledRef = useRef(audioConfigured); + const audioDeviceIdRef = useRef(null); const videoEnabledRef = useRef(videoConfigured ? BROWSER_VIDEO_DEFAULT_ENABLED : false); const [audioEnabled, setAudioEnabledState] = useState(audioConfigured); const [videoEnabled, setVideoEnabledState] = useState( @@ -126,7 +128,9 @@ export function useBrowserSourceClient( [OBSERVABILITY_ATTRS.TRACK_SID]: null, [OBSERVABILITY_ATTRS.TRACK_STREAM_NAME]: browserMediaStreamName, }; - const audioTrack = await createDirectBrowserAudioTrack(); + const audioTrack = await createLocalAudioTrack( + buildAudioCaptureOptions(audioDeviceIdRef.current) + ); const captureTrack = audioTrack.mediaStreamTrack; audioTrack.mediaStreamTrack.enabled = runtime.audioEnabled; @@ -402,6 +406,41 @@ export function useBrowserSourceClient( [audioConfigured, ensureAudioPublished, recordFrontendObservability, unpublishAudio] ); + const setAudioDeviceId = useCallback( + async (deviceId: string) => { + if (!audioConfigured) { + return; + } + + const nextDeviceId = normalizeAudioDeviceId(deviceId); + const previousDeviceId = audioDeviceIdRef.current; + if (nextDeviceId === previousDeviceId) { + return; + } + + setAudioPending(true); + audioDeviceIdRef.current = nextDeviceId; + const runtime = runtimeRef.current; + try { + if (runtime?.audioEnabled) { + await unpublishAudio(runtime); + await ensureAudioPublished(); + } + } catch (error) { + audioDeviceIdRef.current = previousDeviceId; + if (runtime?.audioEnabled && !runtime.audioTrack) { + await ensureAudioPublished().catch((restoreError) => { + console.warn('[browser-audio] failed to restore previous input device', restoreError); + }); + } + throw error; + } finally { + setAudioPending(false); + } + }, + [audioConfigured, ensureAudioPublished, unpublishAudio] + ); + const setVideoEnabled = useCallback( async (nextEnabled: boolean) => { if (!videoConfigured) { @@ -465,6 +504,7 @@ export function useBrowserSourceClient( videoTrack, audioPending, videoPending, + setAudioDeviceId, setAudioEnabled, setVideoEnabled, start, @@ -477,6 +517,7 @@ export function useBrowserSourceClient( videoTrack, audioPending, videoPending, + setAudioDeviceId, setAudioEnabled, setVideoEnabled, start, @@ -485,8 +526,19 @@ export function useBrowserSourceClient( ); } -async function createDirectBrowserAudioTrack(): Promise { - return createLocalAudioTrack(BROWSER_AUDIO_CONSTRAINTS); +function normalizeAudioDeviceId(deviceId: string | null | undefined) { + if (!deviceId || deviceId === 'default') { + return null; + } + + return deviceId; +} + +function buildAudioCaptureOptions(deviceId: string | null) { + return { + ...BROWSER_AUDIO_CONSTRAINTS, + ...(deviceId ? { deviceId: { exact: deviceId } } : {}), + }; } function syncTrackEnabled(track: LocalAudioTrack | LocalVideoTrack | null, enabled: boolean) { diff --git a/hooks/useRoom.ts b/hooks/useRoom.ts index d2fc9b066..3e96d82c9 100644 --- a/hooks/useRoom.ts +++ b/hooks/useRoom.ts @@ -5,6 +5,7 @@ import { toastAlert } from '@/components/livekit/alert-toast'; import { useBrowserSourceClient } from '@/hooks/useBrowserSourceClient'; import { getVoiceSessionId, resetVoiceSessionId } from '@/lib/browser-room-session'; import { readConnectionDetailsResponse } from '@/lib/connection-details-response'; +import { isValidConnectionRoomId } from '@/lib/connection-room-id'; import { FRONTEND_EVENTS, publishFrontendObservabilityEvent } from '@/lib/observability'; import { waitForRoomDisconnected } from '@/lib/room-disconnect'; import { @@ -38,6 +39,13 @@ export function useRoom(appConfig: AppConfig) { const browserSourceClient = useBrowserSourceClient(room, appConfig, { onVideoError: handleBrowserVideoError, }); + const resolveVoiceSessionId = useCallback(() => { + const configuredSessionId = appConfig.voiceSessionId?.trim(); + if (isValidConnectionRoomId(configuredSessionId)) { + return configuredSessionId; + } + return getVoiceSessionId(); + }, [appConfig.voiceSessionId]); const recordFrontendObservability = useCallback( (name: string, attributes?: Record) => { void publishFrontendObservabilityEvent({ @@ -92,7 +100,7 @@ export function useRoom(appConfig: AppConfig) { ); try { - const sessionId = sessionIdRef.current ?? getVoiceSessionId(); + const sessionId = sessionIdRef.current ?? resolveVoiceSessionId(); sessionIdRef.current = sessionId; const res = await fetch(url.toString(), { @@ -114,7 +122,7 @@ export function useRoom(appConfig: AppConfig) { throw new Error('Error fetching connection details!'); } }), - [appConfig] + [appConfig, resolveVoiceSessionId] ); const startSession = useCallback(async () => { @@ -127,7 +135,7 @@ export function useRoom(appConfig: AppConfig) { return; } - const sessionId = getVoiceSessionId(); + const sessionId = resolveVoiceSessionId(); sessionIdRef.current = sessionId; let dispatchSessionId: string | null = sessionId; let connectedRoomName: string | null = null; @@ -229,7 +237,14 @@ export function useRoom(appConfig: AppConfig) { } catch (error) { await handleStartError(error); } - }, [room, appConfig, tokenSource, browserSourceClient, recordFrontendObservability]); + }, [ + room, + appConfig, + tokenSource, + browserSourceClient, + resolveVoiceSessionId, + recordFrontendObservability, + ]); const endSession = useCallback(async () => { try { diff --git a/lib/session-dispatch-readiness.ts b/lib/session-dispatch-readiness.ts new file mode 100644 index 000000000..a3c6bb204 --- /dev/null +++ b/lib/session-dispatch-readiness.ts @@ -0,0 +1,92 @@ +import { + type ParticipantInfo, + ParticipantInfo_Kind, + ParticipantInfo_State, + TrackType, +} from '@livekit/protocol'; + +export type AgentParticipantMatchOptions = { + allowAnonymousLiveKitAgentFallback?: boolean; +}; + +const ROOM_VIDEO_INPUT_IDENTITY = 'room_video_input'; + +function readRoomInputVideoTrackName() { + return ( + process.env['NEXT_PUBLIC_ROOM_VISION_TRACK_NAME'] || + process.env['NEXT_PUBLIC_ROOM_VIDEO_TRACK_NAME'] || + 'room_video' + ); +} + +export function findReusableAgentParticipant( + participants: ParticipantInfo[], + agentName: string +): ParticipantInfo | null { + const expectedAgent = findAgentParticipantInList(participants, agentName); + if (!expectedAgent) { + return null; + } + + return hasReadyRoomVideoInput(participants) ? expectedAgent : null; +} + +export function findAgentParticipantInList( + participants: ParticipantInfo[], + agentName: string, + options: AgentParticipantMatchOptions = {} +): ParticipantInfo | null { + const expectedAgent = participants.find((participant) => + isExpectedAgentParticipant(participant, agentName) + ); + if (expectedAgent) { + return expectedAgent; + } + if (!options.allowAnonymousLiveKitAgentFallback) { + return null; + } + + // Local LiveKit may omit agent attributes; fresh per-session rooms keep this fallback bounded. + const anonymousLiveKitAgents = participants.filter(isAnonymousLiveKitAgentParticipant); + return anonymousLiveKitAgents.length === 1 ? anonymousLiveKitAgents[0] : null; +} + +function hasReadyRoomVideoInput(participants: ParticipantInfo[]) { + const roomVideoTrackName = readRoomInputVideoTrackName(); + return participants.some( + (participant) => + isParticipantActive(participant) && + participant.identity === ROOM_VIDEO_INPUT_IDENTITY && + (participant.tracks ?? []).some( + (track) => + track.name === roomVideoTrackName && + track.type === TrackType.VIDEO && + track.muted !== true + ) + ); +} + +function isExpectedAgentParticipant(participant: ParticipantInfo, agentName: string) { + return ( + isParticipantActive(participant) && + readAgentNameAttribute(participant.attributes ?? {}) === agentName + ); +} + +function isAnonymousLiveKitAgentParticipant(participant: ParticipantInfo) { + const attributes = participant.attributes ?? {}; + return ( + isParticipantActive(participant) && + participant.kind === ParticipantInfo_Kind.AGENT && + participant.identity.startsWith('agent-') && + !readAgentNameAttribute(attributes) + ); +} + +function isParticipantActive(participant: ParticipantInfo) { + return participant.state === ParticipantInfo_State.ACTIVE; +} + +function readAgentNameAttribute(attributes: Record) { + return attributes['lk.agent.name'] || attributes['lk.agent_name'] || attributes.lkAgentName || ''; +} diff --git a/lib/session-stop-client.ts b/lib/session-stop-client.ts index 7cdc9683a..fedbe595d 100644 --- a/lib/session-stop-client.ts +++ b/lib/session-stop-client.ts @@ -62,7 +62,7 @@ function endStopRequestPending() { async function sendAgentSessionStop( sessionId: string, - options: AgentSessionStopOptions = {} + options: AgentSessionStopOptions ): Promise { const response = await fetch('/api/session/stop', { method: 'POST', @@ -87,7 +87,7 @@ async function waitForAgentWorkerSettle(): Promise { async function sendAgentSessionStopAndSettle( sessionId: string, - options: AgentSessionStopOptions = {} + options: AgentSessionStopOptions ): Promise { try { await sendAgentSessionStop(sessionId, options); @@ -100,7 +100,7 @@ async function sendAgentSessionStopAndSettle( function sendAgentSessionStopInBackground( sessionId: string, - options: AgentSessionStopOptions = {} + options: AgentSessionStopOptions ): void { void sendAgentSessionStop(sessionId, options).catch((error: unknown) => { console.warn('Failed to stop remote agent session', error); diff --git a/next.config.ts b/next.config.ts index 597fcaa5a..8564a5b86 100644 --- a/next.config.ts +++ b/next.config.ts @@ -1,7 +1,7 @@ import type { NextConfig } from 'next'; const nextConfig: NextConfig = { - allowedDevOrigins: ['liveavatar.lexmount.cn'], + allowedDevOrigins: ['liveavatar.local.lexmount.net'], }; export default nextConfig; diff --git a/tests/project-config.test.mjs b/tests/project-config.test.mjs index 6b06b0448..d2194d0d3 100644 --- a/tests/project-config.test.mjs +++ b/tests/project-config.test.mjs @@ -16,7 +16,9 @@ test('README matches the documented LexVoice environment source', async () => { const envExample = await readFile('.env.example', 'utf8'); assert.match(envExample, /documentation-only/); - assert.match(readme, /\.\.\/lex-voice\/\.env/); + assert.match(readme, /LexVoice repository `\.env`/); + assert.doesNotMatch(readme, /\blex-voice\b/); + assert.doesNotMatch(envExample, /\blex-voice\b/); assert.match(readme, /single Next\.js instance or sticky routing/); assert.match(readme, /custom connection details endpoint/); assert.match(readme, /sessionId/); @@ -28,7 +30,6 @@ test('avatar filtering excludes the current room video input identity', async () const source = await readFile('hooks/useSmartVoiceAssistant.ts', 'utf8'); assert.match(source, /room_video_input/); - assert.doesNotMatch(source, /room_vision_input/); }); test('client config reads frontend observability from OBSERVABILITY_ENABLED only', () => { diff --git a/tests/session-dispatch-readiness.test.mjs b/tests/session-dispatch-readiness.test.mjs new file mode 100644 index 000000000..d70564274 --- /dev/null +++ b/tests/session-dispatch-readiness.test.mjs @@ -0,0 +1,75 @@ +import assert from 'node:assert/strict'; +import { test } from 'node:test'; + +const { ParticipantInfo_Kind, ParticipantInfo_State, TrackType } = await import( + '@livekit/protocol' +); +const { findReusableAgentParticipant } = await import('../lib/session-dispatch-readiness.ts'); + +function participant({ + identity, + kind = ParticipantInfo_Kind.STANDARD, + state = ParticipantInfo_State.ACTIVE, + attributes = {}, + tracks = [], +}) { + return { + identity, + kind, + state, + attributes, + tracks, + }; +} + +test('dispatch does not reuse an agent before room video input is ready', () => { + const participants = [ + participant({ + identity: 'agent-AJ_stale', + kind: ParticipantInfo_Kind.AGENT, + attributes: { 'lk.agent.name': 'frontdesk-browser-agent' }, + }), + participant({ + identity: 'voice_assistant_user_session', + tracks: [{ name: 'browser_video_track', type: TrackType.VIDEO, muted: false }], + }), + ]; + + assert.equal(findReusableAgentParticipant(participants, 'frontdesk-browser-agent'), null); +}); + +test('dispatch can reuse an active agent once room video input is publishing', () => { + const agent = participant({ + identity: 'agent-AJ_running', + kind: ParticipantInfo_Kind.AGENT, + attributes: { 'lk.agent.name': 'frontdesk-browser-agent' }, + }); + const participants = [ + agent, + participant({ + identity: 'room_video_input', + kind: ParticipantInfo_Kind.AGENT, + tracks: [{ name: 'room_video', type: TrackType.VIDEO, muted: false }], + }), + ]; + + assert.equal(findReusableAgentParticipant(participants, 'frontdesk-browser-agent'), agent); +}); + +test('dispatch does not reuse disconnected agents', () => { + const participants = [ + participant({ + identity: 'agent-AJ_disconnected', + kind: ParticipantInfo_Kind.AGENT, + state: ParticipantInfo_State.DISCONNECTED, + attributes: { 'lk.agent.name': 'frontdesk-browser-agent' }, + }), + participant({ + identity: 'room_video_input', + kind: ParticipantInfo_Kind.AGENT, + tracks: [{ name: 'room_video', type: TrackType.VIDEO, muted: false }], + }), + ]; + + assert.equal(findReusableAgentParticipant(participants, 'frontdesk-browser-agent'), null); +}); diff --git a/tests/session-start-dispatch.test.mjs b/tests/session-start-dispatch.test.mjs index 597abf38e..1af6d4ffc 100644 --- a/tests/session-start-dispatch.test.mjs +++ b/tests/session-start-dispatch.test.mjs @@ -40,7 +40,8 @@ test('session dispatch route retries explicit agent dispatch after the browser j assert.match(routeSource, /AGENT_DISPATCH_TIMEOUT_MS/); assert.match(routeSource, /AGENT_DISPATCH_RETRY_MS/); assert.match(routeSource, /calculateDispatchRetryDelay/); - assert.match(routeSource, /roomHasAgentParticipant/); + assert.match(routeSource, /findAgentParticipant/); + assert.match(routeSource, /summarizeAgentParticipant/); assert.match(routeSource, /deleteDispatchQuietly/); assert.match(routeSource, /dispatchClient\.createDispatch/); assert.match(routeSource, /roomName is required/); @@ -111,17 +112,37 @@ test('session dispatch route logs successful dispatch with canonical session ide assert.match(routeSource, /roomName/); }); -test('session dispatch route keeps pre-dispatch agent matching tied to the configured agent name', async () => { +test('session dispatch response does not expose raw agent attributes', async () => { const routeSource = await readFile( new URL('../app/api/session/dispatch/route.ts', import.meta.url), 'utf8' ); - const participantMatcher = routeSource.match(/function isExpectedAgentParticipant[\s\S]*?\n}/); + const summarySource = + routeSource.match( + /function summarizeAgentParticipant[\s\S]*?\n}\n\nasync function deleteDispatchQuietly/ + )?.[0] ?? ''; + + assert.match(summarySource, /identity: participant\.identity/); + assert.doesNotMatch(summarySource, /participant\.kind/); + assert.doesNotMatch(summarySource, /participant\.attributes/); + assert.doesNotMatch(summarySource, /attributes:/); +}); + +test('session dispatch readiness keeps agent matching tied to the configured agent name', async () => { + const readinessSource = await readFile( + new URL('../lib/session-dispatch-readiness.ts', import.meta.url), + 'utf8' + ); + const participantMatcher = readinessSource.match( + /function isExpectedAgentParticipant[\s\S]*?\n}/ + ); assert.ok(participantMatcher, 'isExpectedAgentParticipant should be defined'); const participantMatcherSource = participantMatcher[0]; - assert.match(participantMatcherSource, /attributes\['lk\.agent\.name'\] === agentName/); - assert.match(participantMatcherSource, /attributes\['lk\.agent_name'\] === agentName/); + assert.match(participantMatcherSource, /readAgentNameAttribute/); + assert.match(readinessSource, /attributes\['lk\.agent\.name'\]/); + assert.match(readinessSource, /attributes\['lk\.agent_name'\]/); + assert.match(readinessSource, /attributes\.lkAgentName/); assert.doesNotMatch(participantMatcherSource, /identity\.startsWith\(['"]agent-['"]\)/); }); @@ -130,24 +151,28 @@ test('session dispatch route only accepts anonymous LiveKit agent fallback after new URL('../app/api/session/dispatch/route.ts', import.meta.url), 'utf8' ); + const readinessSource = await readFile( + new URL('../lib/session-dispatch-readiness.ts', import.meta.url), + 'utf8' + ); + assert.match(routeSource, /findReusableAgentParticipant/); assert.match( routeSource, - /const alreadyJoined = await roomHasAgentParticipant\(roomClient, roomName, agentName\);/ + /const alreadyJoined = await findReusableAgentParticipant\(roomClient, roomName, agentName\);/ ); - assert.match(routeSource, /type AgentParticipantMatchOptions/); - assert.match(routeSource, /allowAnonymousLiveKitAgentFallback/); + assert.match(readinessSource, /type AgentParticipantMatchOptions/); + assert.match(readinessSource, /allowAnonymousLiveKitAgentFallback/); assert.match( routeSource, - /roomHasAgentParticipant\(\s*roomClient,\s*roomName,\s*agentName,\s*\{\s*allowAnonymousLiveKitAgentFallback: true,?\s*\}\s*\)/ - ); - assert.match(routeSource, /function isAnonymousLiveKitAgentParticipant/); - assert.match(routeSource, /ParticipantInfo_Kind\.AGENT/); - assert.match(routeSource, /identity\.startsWith\(['"]agent-['"]\)/); - assert.match(routeSource, /!attributes\['lk\.agent\.name'\]/); - assert.match(routeSource, /!attributes\['lk\.agent_name'\]/); - assert.match(routeSource, /fresh per-session rooms/); - assert.match(routeSource, /anonymousLiveKitAgents\.length === 1/); + /findAgentParticipant\(\s*roomClient,\s*roomName,\s*agentName,\s*\{\s*allowAnonymousLiveKitAgentFallback: true,?\s*\}\s*\)/ + ); + assert.match(readinessSource, /function isAnonymousLiveKitAgentParticipant/); + assert.match(readinessSource, /ParticipantInfo_Kind\.AGENT/); + assert.match(readinessSource, /identity\.startsWith\(['"]agent-['"]\)/); + assert.match(readinessSource, /!readAgentNameAttribute\(attributes\)/); + assert.match(readinessSource, /fresh per-session rooms/); + assert.match(readinessSource, /anonymousLiveKitAgents\.length === 1/); }); test('start call dispatches the agent with a cancellable room session id', async () => { @@ -157,7 +182,9 @@ test('start call dispatches the agent with a cancellable room session id', async ''; assert.match(useRoomSource, /const startSession = useCallback\(async \(\) =>/); - assert.match(useRoomSource, /const sessionId = getVoiceSessionId\(\)/); + assert.match(useRoomSource, /const sessionId = resolveVoiceSessionId\(\)/); + assert.match(useRoomSource, /appConfig\.voiceSessionId/); + assert.match(useRoomSource, /isValidConnectionRoomId\(configuredSessionId\)/); assert.doesNotMatch(dispatchAgentSessionSource, /crypto\.randomUUID\(\)/); assert.match(useRoomSource, /beginAgentSessionStart/); assert.match(useRoomSource, /registerAgentSessionDispatch/); @@ -284,6 +311,46 @@ test('browser video input shows the camera control as enabled by default', async ); }); +test('microphone device selector remains visible before media permission is granted', async () => { + const trackSelectorSource = await readFile( + new URL('../components/livekit/agent-control-bar/track-selector.tsx', import.meta.url), + 'utf8' + ); + const deviceSelectSource = await readFile( + new URL('../components/livekit/agent-control-bar/track-device-select.tsx', import.meta.url), + 'utf8' + ); + + assert.match(trackSelectorSource, /alwaysVisible=\{kind === 'audioinput'\}/); + assert.match(deviceSelectSource, /alwaysVisible = false/); + assert.match(deviceSelectSource, /!alwaysVisible && filteredDevices\.length < 2/); + assert.match(deviceSelectSource, /setRequestPermissionsState\(true\)/); +}); + +test('raw browser audio applies the selected microphone device to capture', async () => { + const browserSourceSource = await readFile( + new URL('../hooks/useBrowserSourceClient.ts', import.meta.url), + 'utf8' + ); + const sessionProviderSource = await readFile( + new URL('../components/app/session-provider.tsx', import.meta.url), + 'utf8' + ); + const controlBarSource = await readFile( + new URL('../components/livekit/agent-control-bar/agent-control-bar.tsx', import.meta.url), + 'utf8' + ); + + assert.match(browserSourceSource, /setAudioDeviceId: \(deviceId: string\) => Promise/); + assert.match(browserSourceSource, /const audioDeviceIdRef = useRef\(null\)/); + assert.match(browserSourceSource, /buildAudioCaptureOptions\(audioDeviceIdRef\.current\)/); + assert.match(browserSourceSource, /deviceId: \{ exact: deviceId \}/); + assert.match(sessionProviderSource, /setAudioDeviceId: async \(\) => \{\}/); + assert.match(controlBarSource, /const handleAudioDeviceSelect = useCallback/); + assert.match(controlBarSource, /browserSourceClient\.setAudioDeviceId\(deviceId\)/); + assert.match(controlBarSource, /onActiveDeviceChange=\{handleAudioDeviceSelect\}/); +}); + test('configurable video selector only changes externally controlled media from user toggle', async () => { const selectorSource = await readFile( new URL( diff --git a/tests/session-stop-client.test.mjs b/tests/session-stop-client.test.mjs index 3ad712439..61b62ab25 100644 --- a/tests/session-stop-client.test.mjs +++ b/tests/session-stop-client.test.mjs @@ -71,6 +71,137 @@ test('agent session stop sends only canonical session id to Next API', async () } }); +test('agent session stop does not release gateway sandbox sessions by default on public paths', async () => { + const originalFetch = globalThis.fetch; + const originalWindow = globalThis.window; + const calls = []; + globalThis.window = { + location: { + pathname: '/s/abc123/live', + }, + }; + globalThis.fetch = async (url, init = {}) => { + calls.push({ url, method: init.method }); + return { ok: true, status: 200 }; + }; + + try { + const { requestAgentSessionStop } = await loadSessionStopClientModule(); + + await requestAgentSessionStop('11111111-2222-4333-8444-555555555555'); + + assert.deepEqual(calls, [{ url: '/api/session/stop', method: 'POST' }]); + } finally { + globalThis.fetch = originalFetch; + if (originalWindow === undefined) { + delete globalThis.window; + } else { + globalThis.window = originalWindow; + } + } +}); + +test('agent session stop ignores public sandbox paths during local cleanup', async () => { + const originalFetch = globalThis.fetch; + const originalWindow = globalThis.window; + const calls = []; + globalThis.window = { + location: { + pathname: '/', + }, + }; + globalThis.fetch = async (url, init = {}) => { + calls.push({ url, method: init.method }); + return { ok: true, status: 200 }; + }; + + try { + const { beginAgentSessionStart, requestAgentSessionStop } = await loadSessionStopClientModule(); + const sessionId = '11111111-2222-4333-8444-555555555555'; + + beginAgentSessionStart('room-a', sessionId); + globalThis.window.location.pathname = '/s/abc123/live'; + + await requestAgentSessionStop(sessionId); + + assert.deepEqual(calls, [{ url: '/api/session/stop', method: 'POST' }]); + } finally { + globalThis.fetch = originalFetch; + if (originalWindow === undefined) { + delete globalThis.window; + } else { + globalThis.window = originalWindow; + } + } +}); + +test('background agent session stop does not release gateway sandbox sessions', async () => { + const originalFetch = globalThis.fetch; + const originalWindow = globalThis.window; + const calls = []; + globalThis.window = { + location: { + pathname: '/s/abc123/live', + }, + }; + globalThis.fetch = async (url, init = {}) => { + calls.push({ url, method: init.method }); + return { ok: true, status: 200 }; + }; + + try { + const { beginAgentSessionStart, requestAgentSessionStop } = await loadSessionStopClientModule(); + const sessionId = '11111111-2222-4333-8444-555555555555'; + + beginAgentSessionStart('room-a', sessionId); + await requestAgentSessionStop(sessionId, { + waitForRemote: false, + }); + for (let i = 0; i < 8 && calls.length < 1; i++) { + await Promise.resolve(); + } + + assert.deepEqual(calls, [{ url: '/api/session/stop', method: 'POST' }]); + } finally { + globalThis.fetch = originalFetch; + if (originalWindow === undefined) { + delete globalThis.window; + } else { + globalThis.window = originalWindow; + } + } +}); + +test('agent session stop skips gateway release outside public sandbox paths', async () => { + const originalFetch = globalThis.fetch; + const originalWindow = globalThis.window; + const calls = []; + globalThis.window = { + location: { + pathname: '/', + }, + }; + globalThis.fetch = async (url, init = {}) => { + calls.push({ url, method: init.method }); + return { ok: true, status: 200 }; + }; + + try { + const { requestAgentSessionStop } = await loadSessionStopClientModule(); + + await requestAgentSessionStop('11111111-2222-4333-8444-555555555555'); + + assert.deepEqual(calls, [{ url: '/api/session/stop', method: 'POST' }]); + } finally { + globalThis.fetch = originalFetch; + if (originalWindow === undefined) { + delete globalThis.window; + } else { + globalThis.window = originalWindow; + } + } +}); + test('clears visible stop pending while keeping start gated during worker settle', async () => { const originalFetch = globalThis.fetch; const originalSetTimeout = globalThis.setTimeout; @@ -199,7 +330,7 @@ test('view controller disables start while a session is active', async () => { assert.match(source, /startDisabled=\{isStartDisabled\}/); }); -test('session lifecycle cancels in-flight dispatch before stop releases next start', async () => { +test('session lifecycle cancels in-flight dispatch before allowing next start', async () => { const source = await readFile(new URL('../lib/session-stop-client.ts', import.meta.url), 'utf8'); assert.match(source, /beginAgentSessionStart/); @@ -228,6 +359,7 @@ test('disconnect control exits the local session before remote stop finishes', a assert.match(controlBarSource, /getCurrentSessionId/); assert.match(controlBarSource, /registerAgentSessionLocalCleanup/); assert.match(controlBarSource, /requestAgentSessionStop\(sessionId\)/); + assert.doesNotMatch(controlBarSource, /releaseGatewaySession/); assert.doesNotMatch(controlBarSource, /usesFastBrowserStop/); assert.doesNotMatch(controlBarSource, /waitForRemote:\s*!/); assert.doesNotMatch(controlBarSource, /await requestAgentSessionStop\(room\.name\)/);