Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
ebfd9ef
Record replay audio
rgarcia May 26, 2026
191d61d
Add replay audio sample
rgarcia May 26, 2026
215891c
Make replay audio sample audible
rgarcia May 26, 2026
cb9135d
Add Zombocom replay recording sample
rgarcia May 26, 2026
fc58831
Keep replay audio aligned
rgarcia May 26, 2026
f02ebe6
Add replay pipeline verifier
rgarcia May 29, 2026
96d30e2
Fix replay audio startup stutter
rgarcia May 30, 2026
bfb3453
Add fake mic source and route neko live-view audio
rgarcia Jun 1, 2026
eb150c3
Assert browser sees a non-monitor mic in audio recording e2e
rgarcia Jun 1, 2026
114f47b
Fix KernelInput null-source load and enumerateDevices check
rgarcia Jun 1, 2026
3af1e94
Serve audio e2e fixture from inside the container
rgarcia Jun 1, 2026
99c7cba
Always record audio and centralize PulseAudio config
rgarcia Jun 1, 2026
08a61ed
Remove replay pipeline verifier script
rgarcia Jun 1, 2026
d4d6eb4
Route PulseAudio for root-user chromium launch
rgarcia Jun 1, 2026
ba9714f
Harden PulseAudio startup and recording cap
rgarcia Jun 1, 2026
bce1063
Fix live-view audio: give neko PULSE_SERVER and auto-unmute
rgarcia Jun 1, 2026
7937907
tweaks
rgarcia Jun 1, 2026
94ab524
Tighten audio config validation and extract ffmpeg audio args
rgarcia Jun 1, 2026
bb1484d
Guard live-view auto-unmute on mute state instead of a one-shot flag
rgarcia Jun 1, 2026
b8f1f8e
Make audio recording opt-in via recordAudio request flag
rgarcia Jun 1, 2026
b7df0ee
Gate thread_queue_size and x264 preset/tune on audio recording
rgarcia Jun 1, 2026
b9538d6
Return 400 on invalid audio config and clear stale pulse socket
rgarcia Jun 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions images/chromium-headful/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,8 @@ COPY --from=xorg-deps /usr/local/lib/xorg/modules/drivers/dummy_drv.so /usr/lib/
COPY --from=xorg-deps /usr/local/lib/xorg/modules/input/neko_drv.so /usr/lib/xorg/modules/input/neko_drv.so

COPY images/chromium-headful/image-chromium/ /
COPY images/chromium-headful/start-pulseaudio.sh /images/chromium-headful/start-pulseaudio.sh
RUN chmod +x /images/chromium-headful/start-pulseaudio.sh
COPY shared/start-pulseaudio.sh /usr/local/bin/start-pulseaudio.sh
RUN chmod +x /usr/local/bin/start-pulseaudio.sh
COPY images/chromium-headful/supervisord.conf /etc/supervisor/supervisord.conf
COPY images/chromium-headful/supervisor/services/ /etc/supervisor/conf.d/services/
COPY shared/envoy/supervisor-envoy.conf /etc/supervisor/conf.d/services/envoy.conf
Expand Down
16 changes: 16 additions & 0 deletions images/chromium-headful/client/src/components/video.vue
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,8 @@

/* Initialize Guacamole Keyboard */
this.keyboard.onkeydown = (key: number) => {
this.unmuteOnInteraction()

if (!this.hosting || this.locked) {
return true
}
Expand Down Expand Up @@ -670,6 +672,18 @@
this.$accessor.video.setMuted(false)
}

// The autoplay policy only lets us unmute inside a real user gesture, and
// this client hides the unmute overlay. Piggyback on the existing input
// handlers so the first interaction with the live view unmutes. Guarded on
// the current state so it is a cheap no-op once unmuted, yet re-applies if a
// reconnect re-mutes the element. (mousemove is intentionally not used: it
// is not a user-activation event, so it cannot unlock audio.)
unmuteOnInteraction() {
if (this.muted) {
this.unmute()
}
}
Comment thread
cursor[bot] marked this conversation as resolved.

toggleControl() {
if (!this.playable) {
return
Expand Down Expand Up @@ -789,6 +803,8 @@
}

onMouseDown(e: MouseEvent) {
this.unmuteOnInteraction()
Comment thread
cursor[bot] marked this conversation as resolved.

if (!this.hosting) {
this.$emit('control-attempt', e)
}
Expand Down
8 changes: 8 additions & 0 deletions images/chromium-headful/neko.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,14 @@ session:
# needed for legacy API
enabled: false

capture:
audio:
# Capture from the monitor of the recorder's playback sink so live view
# streams the same browser audio that gets recorded. Neko defaults to
# "audio_output.monitor", which does not exist here (our sink is
# KernelOutput), so without this override live view has no audio.
device: "KernelOutput.monitor"
Comment thread
cursor[bot] marked this conversation as resolved.

plugins:
enabled: false

Expand Down
13 changes: 0 additions & 13 deletions images/chromium-headful/start-pulseaudio.sh

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[program:kernel-images-api]
command=/bin/bash -lc 'mkdir -p "${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" && PORT="${KERNEL_IMAGES_API_PORT:-10001}" FRAME_RATE="${KERNEL_IMAGES_API_FRAME_RATE:-10}" DISPLAY_NUM="${KERNEL_IMAGES_API_DISPLAY_NUM:-${DISPLAY_NUM:-1}}" MAX_SIZE_MB="${KERNEL_IMAGES_API_MAX_SIZE_MB:-500}" OUTPUT_DIR="${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" LOG_CDP_MESSAGES="${LOG_CDP_MESSAGES:-false}" S2_BASIN="${S2_BASIN:-}" S2_ACCESS_TOKEN="${S2_ACCESS_TOKEN:-}" S2_STREAM="${S2_STREAM:-}" exec /usr/local/bin/kernel-images-api'
; AUDIO_SOURCE and PULSE_SERVER defaults must match shared/start-pulseaudio.sh.
command=/bin/bash -lc 'mkdir -p "${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" && PORT="${KERNEL_IMAGES_API_PORT:-10001}" FRAME_RATE="${KERNEL_IMAGES_API_FRAME_RATE:-10}" DISPLAY_NUM="${KERNEL_IMAGES_API_DISPLAY_NUM:-${DISPLAY_NUM:-1}}" MAX_SIZE_MB="${KERNEL_IMAGES_API_MAX_SIZE_MB:-500}" OUTPUT_DIR="${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" AUDIO_SOURCE="${KERNEL_IMAGES_API_AUDIO_SOURCE:-${AUDIO_SOURCE:-KernelOutput.monitor}}" PULSE_SERVER="${PULSE_SERVER:-unix:/tmp/pulse/native}" LOG_CDP_MESSAGES="${LOG_CDP_MESSAGES:-false}" S2_BASIN="${S2_BASIN:-}" S2_ACCESS_TOKEN="${S2_ACCESS_TOKEN:-}" S2_STREAM="${S2_STREAM:-}" exec /usr/local/bin/kernel-images-api'
autostart=false
autorestart=true
startsecs=0
Expand Down
3 changes: 3 additions & 0 deletions images/chromium-headful/supervisor/services/neko.conf
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,8 @@ command=/usr/bin/neko serve --server.static /var/www --server.bind 0.0.0.0:8080
autostart=false
autorestart=true
startsecs=0
# neko's gstreamer pulsesrc needs the shared PulseAudio socket to capture
# KernelOutput.monitor for live-view audio
environment=PULSE_SERVER="unix:/tmp/pulse/native"
stdout_logfile=/var/log/supervisord/neko
redirect_stderr=true
4 changes: 2 additions & 2 deletions images/chromium-headful/supervisor/services/pulseaudio.conf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[program:pulseaudio]
command=/bin/bash -lc '/images/chromium-headful/start-pulseaudio.sh'
command=/bin/bash -lc '/usr/local/bin/start-pulseaudio.sh'
autostart=false
autorestart=false
autorestart=true
startsecs=0
exitcodes=0
stdout_logfile=/var/log/supervisord/pulseaudio
Expand Down
3 changes: 3 additions & 0 deletions images/chromium-headless/image/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=$CACHEIDPREFIX-ap
gpg-agent \
dbus \
dbus-x11 \
pulseaudio \
xvfb \
x11-utils \
x11-xserver-utils \
Expand Down Expand Up @@ -235,6 +236,8 @@ RUN useradd -m -s /bin/bash kernel
# supervisor start scripts
COPY images/chromium-headless/image/start-xvfb.sh /images/chromium-headless/image/start-xvfb.sh
RUN chmod +x /images/chromium-headless/image/start-xvfb.sh
COPY shared/start-pulseaudio.sh /usr/local/bin/start-pulseaudio.sh
RUN chmod +x /usr/local/bin/start-pulseaudio.sh

# Container entrypoint wrapper (Go binary, replaces wrapper.sh)
COPY --from=server-builder /out/wrapper /wrapper
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[program:kernel-images-api]
command=/bin/bash -lc 'mkdir -p "${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" && PORT="${KERNEL_IMAGES_API_PORT:-10001}" FRAME_RATE="${KERNEL_IMAGES_API_FRAME_RATE:-10}" DISPLAY_NUM="${KERNEL_IMAGES_API_DISPLAY_NUM:-${DISPLAY_NUM:-1}}" MAX_SIZE_MB="${KERNEL_IMAGES_API_MAX_SIZE_MB:-500}" OUTPUT_DIR="${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" LOG_CDP_MESSAGES="${LOG_CDP_MESSAGES:-false}" S2_BASIN="${S2_BASIN:-}" S2_ACCESS_TOKEN="${S2_ACCESS_TOKEN:-}" S2_STREAM="${S2_STREAM:-}" exec /usr/local/bin/kernel-images-api'
; AUDIO_SOURCE and PULSE_SERVER defaults must match shared/start-pulseaudio.sh.
command=/bin/bash -lc 'mkdir -p "${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" && PORT="${KERNEL_IMAGES_API_PORT:-10001}" FRAME_RATE="${KERNEL_IMAGES_API_FRAME_RATE:-10}" DISPLAY_NUM="${KERNEL_IMAGES_API_DISPLAY_NUM:-${DISPLAY_NUM:-1}}" MAX_SIZE_MB="${KERNEL_IMAGES_API_MAX_SIZE_MB:-500}" OUTPUT_DIR="${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" AUDIO_SOURCE="${KERNEL_IMAGES_API_AUDIO_SOURCE:-${AUDIO_SOURCE:-KernelOutput.monitor}}" PULSE_SERVER="${PULSE_SERVER:-unix:/tmp/pulse/native}" LOG_CDP_MESSAGES="${LOG_CDP_MESSAGES:-false}" S2_BASIN="${S2_BASIN:-}" S2_ACCESS_TOKEN="${S2_ACCESS_TOKEN:-}" S2_STREAM="${S2_STREAM:-}" exec /usr/local/bin/kernel-images-api'
autostart=false
autorestart=true
startsecs=0
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[program:pulseaudio]
command=/bin/bash -lc '/usr/local/bin/start-pulseaudio.sh'
autostart=false
autorestart=true
startsecs=0
exitcodes=0
stdout_logfile=/var/log/supervisord/pulseaudio
redirect_stderr=true
5 changes: 5 additions & 0 deletions server/cmd/api/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ func (s *ApiService) StartRecording(ctx context.Context, req oapi.StartRecording
params.FrameRate = req.Body.Framerate
params.MaxSizeInMB = req.Body.MaxFileSizeInMB
params.MaxDurationInSeconds = req.Body.MaxDurationInSeconds
params.RecordAudio = req.Body.RecordAudio
Comment thread
cursor[bot] marked this conversation as resolved.
}

// Determine recorder ID (use default if none provided)
Expand All @@ -161,6 +162,10 @@ func (s *ApiService) StartRecording(ctx context.Context, req oapi.StartRecording
// Create, register, and start a new recorder
rec, err := s.factory(recorderID, params)
if err != nil {
if errors.Is(err, recorder.ErrInvalidParams) {
log.Warn("invalid recording parameters", "err", err, "recorder_id", recorderID)
return oapi.StartRecording400JSONResponse{BadRequestErrorJSONResponse: oapi.BadRequestErrorJSONResponse{Message: err.Error()}}, nil
}
log.Error("failed to create recorder", "err", err, "recorder_id", recorderID)
return oapi.StartRecording500JSONResponse{InternalErrorJSONResponse: oapi.InternalErrorJSONResponse{Message: "failed to create recording"}}, nil
}
Expand Down
2 changes: 2 additions & 0 deletions server/cmd/api/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ func main() {
FrameRate: &config.FrameRate,
MaxSizeInMB: &config.MaxSizeInMB,
OutputDir: &config.OutputDir,
AudioSource: &config.AudioSource,
PulseServer: &config.PulseServer,
}
if err := defaultParams.Validate(); err != nil {
slogger.Error("invalid default recording parameters", "err", err)
Expand Down
26 changes: 25 additions & 1 deletion server/cmd/chromium-launcher/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@ import (
"github.com/kernel/kernel-images/server/lib/x11"
)

// shared/start-pulseaudio.sh is the authority for the audio topology. These are
// the fixed contract values it creates, not overridable defaults: chromium must
// connect to the same socket and play into the same sink the daemon sets up.
// Keep them in sync with start-pulseaudio.sh.
const (
// pulseServer is the PulseAudio socket the recorder and chromium share.
pulseServer = "unix:/tmp/pulse/native"
// pulseSink is the null sink chromium plays into; the recorder captures
// its .monitor source.
pulseSink = "KernelOutput"
)

func main() {
headless := flag.Bool("headless", false, "Run Chromium with headless flags")
chromiumPath := flag.String("chromium", "chromium", "Chromium binary path (default: chromium)")
Expand Down Expand Up @@ -89,11 +101,15 @@ func main() {

runAsRoot := strings.EqualFold(strings.TrimSpace(os.Getenv("RUN_AS_ROOT")), "true")

// Prepare environment
// Prepare environment. PULSE_SERVER/PULSE_SINK route chromium's audio into the
// recorder's sink; the root path below relies on this inherited env, while the
// non-root path re-asserts them in its runuser env allowlist.
env := os.Environ()
env = append(env,
"DISPLAY=:1",
"DBUS_SESSION_BUS_ADDRESS=unix:path=/run/dbus/system_bus_socket",
"PULSE_SERVER="+pulseServer,
"PULSE_SINK="+pulseSink,
)

if runAsRoot {
Expand All @@ -118,10 +134,18 @@ func main() {
}

// Build: runuser -u kernel -- env DISPLAY=... DBUS_... XDG_... HOME=... chromium <args>
// PULSE_SERVER tells libpulse which daemon socket to connect to; without it
Comment thread
cursor[bot] marked this conversation as resolved.
// chromium-as-kernel-user can't reach the recorder's PulseAudio instance and
// has no audio output at all. PULSE_SINK then selects which sink within that
// daemon playback lands on: Chromium's AudioManagerPulse honors it to redirect
// playback into KernelOutput (see media/audio/pulse/audio_manager_pulse.cc
// GetDefaultOutputDeviceID), which is the sink the recorder captures.
inner := []string{
"env",
"DISPLAY=:1",
"DBUS_SESSION_BUS_ADDRESS=unix:path=/run/dbus/system_bus_socket",
"PULSE_SERVER=" + pulseServer,
"PULSE_SINK=" + pulseSink,
"XDG_CONFIG_HOME=/home/kernel/.config",
"XDG_CACHE_HOME=/home/kernel/.cache",
"HOME=/home/kernel",
Expand Down
8 changes: 8 additions & 0 deletions server/cmd/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ type Config struct {
DisplayNum int `envconfig:"DISPLAY_NUM" default:"1"`
MaxSizeInMB int `envconfig:"MAX_SIZE_MB" default:"500"`
OutputDir string `envconfig:"OUTPUT_DIR" default:"."`
// AudioSource and PulseServer default to empty, i.e. video-only. Setting both
// enables audio capture; their values must match the topology defined in
// shared/start-pulseaudio.sh (the authority for the sink/source/socket). The
// image's supervisor conf sets both.
AudioSource string `envconfig:"AUDIO_SOURCE" default:""`
PulseServer string `envconfig:"PULSE_SERVER" default:""`

// Absolute or relative path to the ffmpeg binary. If empty the code falls back to "ffmpeg" on $PATH.
PathToFFmpeg string `envconfig:"FFMPEG_PATH" default:"ffmpeg"`
Expand Down Expand Up @@ -55,6 +61,8 @@ func (c *Config) LogValue() slog.Value {
slog.Int("display_num", c.DisplayNum),
slog.Int("max_size_mb", c.MaxSizeInMB),
slog.String("output_dir", c.OutputDir),
slog.String("audio_source", c.AudioSource),
slog.String("pulse_server", c.PulseServer),
slog.String("ffmpeg_path", c.PathToFFmpeg),
slog.Int("devtools_proxy_port", c.DevToolsProxyPort),
slog.Bool("log_cdp_messages", c.LogCDPMessages),
Expand Down
8 changes: 8 additions & 0 deletions server/cmd/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ func TestLoad(t *testing.T) {
DisplayNum: 1,
MaxSizeInMB: 500,
OutputDir: ".",
AudioSource: "",
PulseServer: "",
PathToFFmpeg: "ffmpeg",
DevToolsProxyPort: 9222,
ScaleToZeroCooldown: time.Second,
Expand All @@ -39,6 +41,8 @@ func TestLoad(t *testing.T) {
"DISPLAY_NUM": "2",
"MAX_SIZE_MB": "250",
"OUTPUT_DIR": "/tmp",
"AUDIO_SOURCE": "CustomOutput.monitor",
"PULSE_SERVER": "unix:/tmp/pulse/native",
"FFMPEG_PATH": "/usr/local/bin/ffmpeg",
"DEVTOOLS_PROXY_PORT": "9876",
"SCALE_TO_ZERO_COOLDOWN": "5s",
Expand All @@ -51,6 +55,8 @@ func TestLoad(t *testing.T) {
DisplayNum: 2,
MaxSizeInMB: 250,
OutputDir: "/tmp",
AudioSource: "CustomOutput.monitor",
PulseServer: "unix:/tmp/pulse/native",
PathToFFmpeg: "/usr/local/bin/ffmpeg",
DevToolsProxyPort: 9876,
ScaleToZeroCooldown: 5 * time.Second,
Expand All @@ -71,6 +77,8 @@ func TestLoad(t *testing.T) {
DisplayNum: 1,
MaxSizeInMB: 500,
OutputDir: ".",
AudioSource: "",
PulseServer: "",
PathToFFmpeg: "ffmpeg",
DevToolsProxyPort: 7777,
ScaleToZeroCooldown: time.Second,
Expand Down
1 change: 0 additions & 1 deletion server/cmd/wrapper/chromium.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ func applyHeadlessDefaultFlags() {
"--hide-crash-restore-bubble",
"--hide-scrollbars",
"--metrics-recording-only",
"--mute-audio",
"--no-default-browser-check",
"--no-first-run",
"--no-sandbox",
Expand Down
13 changes: 6 additions & 7 deletions server/cmd/wrapper/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ const (
dbusSocket = "/run/dbus/system_bus_socket"
defaultDisplay = ":1"
defaultIntPort = "9223"
// pulseSocket must match the socket path created in shared/start-pulseaudio.sh
// (the authority for the audio topology); the wrapper only waits on it here.
pulseSocket = "/tmp/pulse/native"
)

type profile int
Expand Down Expand Up @@ -172,11 +175,13 @@ func main() {
_ = os.WriteFile(filepath.Join(supervisordLogD, "chromium"), nil, 0o644)

browserStart := time.Now()
startAll(xServer, "dbus", "chromedriver", "chromium")
startAll(xServer, "dbus", "chromedriver", "pulseaudio")
waitForX(defaultDisplay, 20*time.Second)
if prof == profileHeadful {
startAll("mutter")
}
waitForSocket(pulseSocket, 10*time.Second)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious how long this adds in terms of the startup time - not that it matters with fork

Copy link
Copy Markdown
Contributor Author

@rgarcia rgarcia Jun 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Measured on a cold headful boot (v6 image): the pulse socket (/tmp/pulse/native) becomes ready ~ 60ms after the X display socket (/tmp/.X11-unix/X1) — 854.579 vs 854.519. Since the wrapper waits for X (and headful waits for mutter) before the waitForSocket(pulseSocket) gate, the marginal latency added before chromium starts is ≤~60ms, and it overlaps mutter startup — effectively negligible against the ~0.8–1s browser bring-up (chromedriver alone was ready at 811ms in the same boot). And as you said, snapshot/fork restore skips cold boot entirely.

startAll("chromium")
Comment thread
cursor[bot] marked this conversation as resolved.
waitForSocket(dbusSocket, 10*time.Second)
if prof == profileHeadful && webrtc {
startAll("neko")
Expand Down Expand Up @@ -223,12 +228,6 @@ func main() {
identityDone.Sub(identityStart).Truncate(time.Millisecond),
formatProbeDurations(probeDurations))

// Cosmetic + non-critical services come up off the hot path. Headless has
// no audio stack.
if prof == profileHeadful {
go startAll("pulseaudio")
}

// Re-enable scale-to-zero now that the hot path is up — unless the caller
// asked to keep it disabled via ENABLE_STZ=false/0.
if stzManaged {
Expand Down
Loading
Loading