diff --git a/src/commands/manifest/bazel/bazel-build-parser.mts b/src/commands/manifest/bazel/bazel-build-parser.mts index af30345b7..7090eafd6 100644 --- a/src/commands/manifest/bazel/bazel-build-parser.mts +++ b/src/commands/manifest/bazel/bazel-build-parser.mts @@ -9,8 +9,13 @@ * the input string. */ +// `ruleKind` is the rule class the artifact came from. Legacy text-format +// parsers only emit 'jvm_import' / 'aar_import' (the kinds rules_jvm_external +// historically generated); the metadata cquery in bazel-cquery.mts emits +// whatever `ruleClass` jsonproto reports — `java_library`, `kt_jvm_import`, +// any future rules_jvm_external rule — so the type is open. export type ExtractedArtifact = { - ruleKind: 'jvm_import' | 'aar_import' + ruleKind: string ruleName: string mavenCoordinates: string sourceRepo?: string | undefined diff --git a/src/commands/manifest/bazel/bazel-cquery.mts b/src/commands/manifest/bazel/bazel-cquery.mts new file mode 100644 index 000000000..819cce39c --- /dev/null +++ b/src/commands/manifest/bazel/bazel-cquery.mts @@ -0,0 +1,349 @@ +/** + * Per-repo metadata cquery + jsonproto parser for the Maven path. + * + * Pipeline: + * 1. Build a cquery argv targeting `attr("tags", "\bmaven_coordinates=", + * @//...)` plus union variants for direct `maven_coordinates` / + * `maven_url` attributes. `--output=jsonproto` + + * `--proto:output_rule_attrs=tags,maven_coordinates,maven_url` keeps the + * payload small. + * 2. Spawn under a caller-supplied `outputUserRoot` so the orchestrator can + * reap the server cleanly (`bazel --output_user_root= shutdown` + * followed by `rm -rf`). The runner itself never deletes anything — + * server lifecycle is the orchestrator's concern. + * 3. Parse the jsonproto stream defensively: dispatch on `attribute[].type` + * and accept both camelCase (`stringValue`, `stringListValue`) and + * snake_case (`string_value`, `string_list_value`) payload keys. + * 4. Extract the maven coordinate from the direct `maven_coordinates` attr + * when present, else scan `tags` for `maven_coordinates=`. + * 5. Tag every artifact with `workspace:` + `repo:` + * provenance via `sourceRepo`. + */ +import { spawn } from '@socketsecurity/registry/lib/spawn' + +import { splitBazelFlags } from './bazel-query-runner.mts' + +import type { ExtractedArtifact } from './bazel-build-parser.mts' +import type { BazelQueryOptions } from './bazel-query-runner.mts' + +export type CqueryStatus = 'ok' | 'partial' | 'timeout' | 'empty' | 'error' + +export type CqueryRepoResult = { + repoName: string + workspaceRelPath: string + status: CqueryStatus + artifacts: ExtractedArtifact[] + stderr: string + durationMs: number +} + +export type RunMetadataCqueryArgs = { + repoName: string + workspaceRoot: string + // Provenance label (e.g. "examples/dagger"). Empty string for the root + // workspace. Embedded in each artifact's `sourceRepo` as + // `workspace:+repo:`. + workspaceRelPath: string + // Per-repo timeout in milliseconds. 60s default for auto-manifest; + // 120s for explicit invocation. Orchestrator picks; runner just enforces. + timeoutMs: number + opts: BazelQueryOptions +} + +// Maven coordinate token: `g:a:v` (3 parts) or `g:a:v:classifier` / +// `g:a:packaging:v` (4-part rules_jvm_external shapes). Tolerant of dots, +// dashes, plus, underscores in any part. +const MAVEN_COORD_TAG_RE = /^maven_coordinates=(.+)$/ + +// Build the metadata cquery target expression for one repo. The union of +// three predicates picks up artifacts that: +// - encode the coordinate in the conventional `tags = ["maven_coordinates=..."]` +// list (rules_jvm_external's emission for `jvm_import` and friends), +// - declare the coordinate as a direct `maven_coordinates` attribute +// (Bazel-native java_library / kt_jvm_import shape), or +// - declare a `maven_url` (POM-only and source-jar shapes that omit the +// coordinates tag but still represent a Maven artefact). +function buildMetadataCqueryExpr(repoName: string): string { + const r = `@${repoName}//...` + // The `\b` boundary in the tags predicate prevents matches on tag values + // like `pre_maven_coordinates=fake`; see todo 2 acceptance test (10). + return [ + `attr("tags", "\\bmaven_coordinates=", ${r})`, + `attr("maven_coordinates", ".+", ${r})`, + `attr("maven_url", ".+", ${r})`, + ].join(' union ') +} + +// Build the full cquery argv for a per-repo metadata cquery. Exposed for +// argv-shape unit tests without touching `spawn`. +export function buildMetadataCqueryArgv( + repoName: string, + opts: BazelQueryOptions, +): string[] { + const startup: string[] = [] + if (opts.bazelRc) { + startup.push(`--bazelrc=${opts.bazelRc}`) + } + if (opts.outputUserRoot) { + startup.push(`--output_user_root=${opts.outputUserRoot}`) + } + if (opts.bazelOutputBase) { + startup.push(`--output_base=${opts.bazelOutputBase}`) + } + const userFlags = splitBazelFlags(opts.bazelFlags) + return [ + ...startup, + 'cquery', + '--lockfile_mode=off', + '--noshow_progress', + ...opts.invocationFlags, + buildMetadataCqueryExpr(repoName), + '--output=jsonproto', + '--proto:output_rule_attrs=tags,maven_coordinates,maven_url', + '--keep_going', + ...userFlags, + ] +} + +type JsonprotoAttribute = { + name?: string + type?: string + stringValue?: string + string_value?: string + stringListValue?: string[] + string_list_value?: string[] +} + +type JsonprotoRule = { + name?: string + ruleClass?: string + rule_class?: string + attribute?: JsonprotoAttribute[] +} + +type JsonprotoTarget = { + type?: string + rule?: JsonprotoRule +} + +type JsonprotoEnvelope = { + // Bazel 5+ wraps the stream in `{ "results": [ { "target": {...} } ] }`; + // older shapes streamed one target per line. Accept either. + results?: Array<{ target?: JsonprotoTarget }> +} + +function readStringAttr(attr: JsonprotoAttribute): string | undefined { + if (attr.type !== 'STRING') { + return undefined + } + if (typeof attr.stringValue === 'string') { + return attr.stringValue + } + if (typeof attr.string_value === 'string') { + return attr.string_value + } + return undefined +} + +function readStringListAttr(attr: JsonprotoAttribute): string[] | undefined { + if (attr.type !== 'STRING_LIST') { + return undefined + } + if (Array.isArray(attr.stringListValue)) { + return attr.stringListValue + } + if (Array.isArray(attr.string_list_value)) { + return attr.string_list_value + } + return undefined +} + +// Extract the maven coordinate from a rule's attributes. Prefers the direct +// `maven_coordinates` attribute (Bazel-native shape); falls back to scanning +// `tags` for a `maven_coordinates=` entry (rules_jvm_external shape). +// Returns undefined if neither yields a non-empty value. +function extractMavenCoordinate( + rule: JsonprotoRule, +): { coord: string; url?: string | undefined } | undefined { + let coord: string | undefined + let url: string | undefined + for (const attr of rule.attribute ?? []) { + if (attr.name === 'maven_coordinates') { + const direct = readStringAttr(attr) + if (direct && direct.length) { + coord = direct + } + } else if (attr.name === 'maven_url') { + const direct = readStringAttr(attr) + if (direct && direct.length) { + url = direct + } + } else if (attr.name === 'tags') { + const tags = readStringListAttr(attr) + if (tags) { + for (const tag of tags) { + const m = MAVEN_COORD_TAG_RE.exec(tag) + if (m && !coord) { + coord = m[1] + } + } + } + } + } + if (!coord) { + return undefined + } + return url ? { coord, url } : { coord } +} + +// Strip the leading `@//:` prefix from a fully-qualified target label +// to recover the bare rule name (e.g. `com_google_guava_guava`). +function ruleNameFromLabel(label: string): string { + const colon = label.lastIndexOf(':') + return colon >= 0 ? label.slice(colon + 1) : label +} + +// Pure parser for the jsonproto cquery stream. Returns one +// `ExtractedArtifact` per rule with a recoverable maven coordinate. The +// `sourceRepo` field carries `:` provenance +// when a workspace path was provided; otherwise just the repo name. +export function parseCqueryJsonproto( + stdout: string, + repoName: string, + workspaceRelPath: string, +): ExtractedArtifact[] { + if (!stdout.trim()) { + return [] + } + // Bazel 5+ emits a single JSON envelope; older versions stream one target + // per line. Try envelope-first, then fall back to per-line. + const targets: JsonprotoTarget[] = [] + try { + const parsed = JSON.parse(stdout) as JsonprotoEnvelope + if (parsed.results) { + for (const r of parsed.results) { + if (r.target) { + targets.push(r.target) + } + } + } + } catch { + // Fall through to per-line scanning. + } + if (!targets.length) { + for (const line of stdout.split(/\r?\n/)) { + const trimmed = line.trim() + if (!trimmed) { + continue + } + try { + const parsed = JSON.parse(trimmed) as JsonprotoTarget + if (parsed?.rule) { + targets.push(parsed) + } + } catch { + // Skip malformed lines. + } + } + } + const provenance = workspaceRelPath + ? `${workspaceRelPath}:${repoName}` + : repoName + const out: ExtractedArtifact[] = [] + for (const target of targets) { + if (target.type && target.type !== 'RULE') { + continue + } + const rule = target.rule + if (!rule || !rule.name) { + continue + } + const extracted = extractMavenCoordinate(rule) + if (!extracted) { + continue + } + const ruleKind = rule.ruleClass ?? rule.rule_class ?? 'unknown' + out.push({ + deps: [], + mavenCoordinates: extracted.coord, + ruleKind, + ruleName: ruleNameFromLabel(rule.name), + sourceRepo: provenance, + ...(extracted.url ? { mavenUrl: extracted.url } : {}), + }) + } + return out +} + +// Classify the runner's raw outcome. Non-zero exit with `--keep_going` is a +// `partial` (some target analysis failed; the successful subset is still in +// stdout). Zero exit with no parsed artefacts is `empty`. Spawn timeout is +// signalled separately; this helper handles the post-spawn case. +function classifyCqueryOutcome( + code: number, + artifactCount: number, +): CqueryStatus { + if (code === 0) { + return artifactCount > 0 ? 'ok' : 'empty' + } + // --keep_going treats partial-analysis failures with non-zero exit but + // still yields the successful subset on stdout. Anything we parsed is + // worth keeping. + return artifactCount > 0 ? 'partial' : 'error' +} + +// Spawn the per-repo metadata cquery, parse the result, and return a +// structured outcome. On spawn timeout, return `status: 'timeout'` so the +// orchestrator can reap the server (`bazel --output_user_root= +// shutdown` + `rm -rf`) before moving on. +export async function runMetadataCqueryForRepo( + args: RunMetadataCqueryArgs, +): Promise { + const { opts, repoName, timeoutMs, workspaceRelPath, workspaceRoot } = args + const argv = buildMetadataCqueryArgv(repoName, opts) + const startedAt = Date.now() + try { + const result = await spawn(opts.bin, argv, { + cwd: workspaceRoot, + timeout: timeoutMs, + ...(opts.env ? { env: opts.env } : {}), + }) + const { code, stderr, stdout } = result + const artifacts = parseCqueryJsonproto(stdout, repoName, workspaceRelPath) + return { + artifacts, + durationMs: Date.now() - startedAt, + repoName, + status: classifyCqueryOutcome(code, artifacts.length), + stderr, + workspaceRelPath, + } + } catch (e) { + const err = e as { + code?: unknown + killed?: unknown + signal?: unknown + stderr?: unknown + stdout?: unknown + timedOut?: unknown + } + const stdout = typeof err.stdout === 'string' ? err.stdout : '' + const stderr = typeof err.stderr === 'string' ? err.stderr : '' + const timedOut = + err.timedOut === true || + err.killed === true || + err.signal === 'SIGTERM' || + err.signal === 'SIGKILL' + const artifacts = stdout + ? parseCqueryJsonproto(stdout, repoName, workspaceRelPath) + : [] + return { + artifacts, + durationMs: Date.now() - startedAt, + repoName, + status: timedOut ? 'timeout' : 'error', + stderr, + workspaceRelPath, + } + } +} diff --git a/src/commands/manifest/bazel/bazel-cquery.test.mts b/src/commands/manifest/bazel/bazel-cquery.test.mts new file mode 100644 index 000000000..05149e222 --- /dev/null +++ b/src/commands/manifest/bazel/bazel-cquery.test.mts @@ -0,0 +1,412 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +vi.mock('@socketsecurity/registry/lib/spawn', () => ({ + spawn: vi.fn(), +})) + +import { spawn } from '@socketsecurity/registry/lib/spawn' + +import { + buildMetadataCqueryArgv, + parseCqueryJsonproto, + runMetadataCqueryForRepo, +} from './bazel-cquery.mts' + +// Sample envelope shape Bazel 5+ emits: `{ "results": [ { "target": {...} } ] }`. +// Two rules: one with `tags`/`maven_coordinates` (rules_jvm_external shape) +// and one with the direct `maven_coordinates` attr only (Bazel-native shape). +const ENVELOPE_FIXTURE = JSON.stringify({ + results: [ + { + target: { + type: 'RULE', + rule: { + name: '@maven//:androidx_annotation_annotation', + ruleClass: 'jvm_import', + attribute: [ + { + name: 'maven_coordinates', + type: 'STRING', + stringValue: 'androidx.annotation:annotation:1.8.2', + }, + { + name: 'maven_url', + type: 'STRING', + stringValue: + 'https://maven.google.com/androidx/annotation/annotation/1.8.2/annotation-1.8.2.jar', + }, + { + name: 'tags', + type: 'STRING_LIST', + stringListValue: [ + 'maven_coordinates=androidx.annotation:annotation:1.8.2', + 'maven_repository=https://maven.google.com', + ], + }, + ], + }, + }, + }, + { + target: { + type: 'RULE', + rule: { + name: '@maven//:plain_lib', + ruleClass: 'java_library', + attribute: [ + { + name: 'tags', + type: 'STRING_LIST', + stringListValue: ['maven_coordinates=com.example:plain:1.0'], + }, + ], + }, + }, + }, + ], +}) + +describe('buildMetadataCqueryArgv', () => { + it('builds the union expression and the documented flag set', () => { + const argv = buildMetadataCqueryArgv('maven', { + bin: 'bazel', + cwd: '/repo', + invocationFlags: [], + }) + expect(argv).toContain('cquery') + expect(argv).toContain('--output=jsonproto') + expect(argv).toContain('--proto:output_rule_attrs=tags,maven_coordinates,maven_url') + expect(argv).toContain('--keep_going') + expect(argv).toContain('--lockfile_mode=off') + const expr = argv.find(a => a.includes('attr("tags"')) + expect(expr).toContain('attr("tags", "\\bmaven_coordinates=", @maven//...)') + expect(expr).toContain('attr("maven_coordinates", ".+", @maven//...)') + expect(expr).toContain('attr("maven_url", ".+", @maven//...)') + }) + + it('threads outputUserRoot, bazelRc, and bazelOutputBase as startup flags before cquery', () => { + const argv = buildMetadataCqueryArgv('maven', { + bin: 'bazel', + cwd: '/r', + invocationFlags: [], + bazelRc: '/etc/bazel.rc', + outputUserRoot: '/tmp/socket-bazel-1', + bazelOutputBase: '/tmp/output-base', + }) + expect(argv[0]).toBe('--bazelrc=/etc/bazel.rc') + expect(argv[1]).toBe('--output_user_root=/tmp/socket-bazel-1') + expect(argv[2]).toBe('--output_base=/tmp/output-base') + expect(argv[3]).toBe('cquery') + }) + + it('appends user --bazel-flag args AFTER the standard cquery flags', () => { + const argv = buildMetadataCqueryArgv('maven', { + bin: 'bazel', + cwd: '/r', + invocationFlags: [], + bazelFlags: '--config=ci --repo_env=SCALA_VERSION=2.13.18', + }) + const cqueryIdx = argv.indexOf('cquery') + const userIdx = argv.indexOf('--config=ci') + expect(userIdx).toBeGreaterThan(cqueryIdx) + expect(argv).toContain('--repo_env=SCALA_VERSION=2.13.18') + }) + + it('includes invocationFlags between subcommand and target expression', () => { + const argv = buildMetadataCqueryArgv('maven', { + bin: 'bazel', + cwd: '/r', + invocationFlags: ['--noenable_bzlmod', '--enable_workspace'], + }) + expect(argv).toContain('--noenable_bzlmod') + expect(argv).toContain('--enable_workspace') + }) +}) + +describe('parseCqueryJsonproto', () => { + it('parses Bazel-5+ envelope shape and returns one artifact per rule', () => { + const out = parseCqueryJsonproto(ENVELOPE_FIXTURE, 'maven', '') + expect(out).toHaveLength(2) + const first = out[0]! + expect(first.mavenCoordinates).toBe('androidx.annotation:annotation:1.8.2') + expect(first.mavenUrl).toBe( + 'https://maven.google.com/androidx/annotation/annotation/1.8.2/annotation-1.8.2.jar', + ) + expect(first.ruleKind).toBe('jvm_import') + expect(first.ruleName).toBe('androidx_annotation_annotation') + expect(first.sourceRepo).toBe('maven') + + const second = out[1]! + expect(second.mavenCoordinates).toBe('com.example:plain:1.0') + expect(second.ruleKind).toBe('java_library') + expect(second.ruleName).toBe('plain_lib') + }) + + it('emits workspace:+repo: provenance via sourceRepo when workspaceRelPath is set', () => { + const out = parseCqueryJsonproto( + ENVELOPE_FIXTURE, + 'maven', + 'examples/dagger', + ) + expect(out[0]?.sourceRepo).toBe('examples/dagger:maven') + }) + + it('falls back to snake_case payload keys (string_value, string_list_value)', () => { + const snakeCase = JSON.stringify({ + results: [ + { + target: { + type: 'RULE', + rule: { + name: '@maven//:snake_case_artifact', + rule_class: 'kt_jvm_import', + attribute: [ + { + name: 'tags', + type: 'STRING_LIST', + string_list_value: ['maven_coordinates=com.example:snake:2.0'], + }, + ], + }, + }, + }, + ], + }) + const out = parseCqueryJsonproto(snakeCase, 'maven', '') + expect(out).toHaveLength(1) + expect(out[0]?.mavenCoordinates).toBe('com.example:snake:2.0') + expect(out[0]?.ruleKind).toBe('kt_jvm_import') + }) + + it('falls back to per-line jsonproto stream when envelope is absent', () => { + const streamed = [ + JSON.stringify({ + type: 'RULE', + rule: { + name: '@maven//:a', + ruleClass: 'jvm_import', + attribute: [ + { + name: 'maven_coordinates', + type: 'STRING', + stringValue: 'g:a:1', + }, + ], + }, + }), + JSON.stringify({ + type: 'RULE', + rule: { + name: '@maven//:b', + ruleClass: 'jvm_import', + attribute: [ + { + name: 'maven_coordinates', + type: 'STRING', + stringValue: 'g:b:2', + }, + ], + }, + }), + ].join('\n') + const out = parseCqueryJsonproto(streamed, 'maven', '') + expect(out.map(a => a.mavenCoordinates)).toEqual(['g:a:1', 'g:b:2']) + }) + + it('skips rules with no recoverable maven coordinate', () => { + const noCoord = JSON.stringify({ + results: [ + { + target: { + type: 'RULE', + rule: { + name: '@maven//:no_coord', + ruleClass: 'java_library', + attribute: [ + { + name: 'tags', + type: 'STRING_LIST', + stringListValue: ['some_other_tag=value'], + }, + ], + }, + }, + }, + ], + }) + expect(parseCqueryJsonproto(noCoord, 'maven', '')).toEqual([]) + }) + + it('prefers the direct maven_coordinates attr over the tag fallback', () => { + const conflicting = JSON.stringify({ + results: [ + { + target: { + type: 'RULE', + rule: { + name: '@maven//:dual', + ruleClass: 'jvm_import', + attribute: [ + { + name: 'maven_coordinates', + type: 'STRING', + stringValue: 'g:direct:1', + }, + { + name: 'tags', + type: 'STRING_LIST', + stringListValue: ['maven_coordinates=g:via_tag:2'], + }, + ], + }, + }, + }, + ], + }) + const out = parseCqueryJsonproto(conflicting, 'maven', '') + expect(out[0]?.mavenCoordinates).toBe('g:direct:1') + }) + + it('returns [] on empty stdout', () => { + expect(parseCqueryJsonproto('', 'maven', '')).toEqual([]) + expect(parseCqueryJsonproto(' \n\n', 'maven', '')).toEqual([]) + }) +}) + +describe('runMetadataCqueryForRepo', () => { + const mocked = vi.mocked(spawn) + + beforeEach(() => { + mocked.mockReset() + }) + + it('returns status:ok with parsed artifacts on a clean run', async () => { + // @ts-ignore — narrow return shape for the test. + mocked.mockResolvedValueOnce({ + code: 0, + stdout: ENVELOPE_FIXTURE, + stderr: '', + }) + const r = await runMetadataCqueryForRepo({ + opts: { bin: 'bazel', cwd: '/r', invocationFlags: [] }, + repoName: 'maven', + timeoutMs: 60_000, + workspaceRelPath: '', + workspaceRoot: '/r', + }) + expect(r.status).toBe('ok') + expect(r.artifacts).toHaveLength(2) + expect(r.stderr).toBe('') + }) + + it('returns status:empty when stdout has no parsed artifacts on exit 0', async () => { + // @ts-ignore — narrow return shape for the test. + mocked.mockResolvedValueOnce({ code: 0, stdout: '', stderr: '' }) + const r = await runMetadataCqueryForRepo({ + opts: { bin: 'bazel', cwd: '/r', invocationFlags: [] }, + repoName: 'maven', + timeoutMs: 60_000, + workspaceRelPath: '', + workspaceRoot: '/r', + }) + expect(r.status).toBe('empty') + expect(r.artifacts).toEqual([]) + }) + + it('returns status:partial when --keep_going emits non-zero but still parses targets', async () => { + // Bazel: exit 1 + "Analysis succeeded for only 118 of 122 top-level targets" + // is the normal --keep_going outcome. + // @ts-ignore — narrow return shape for the test. + mocked.mockResolvedValueOnce({ + code: 1, + stdout: ENVELOPE_FIXTURE, + stderr: 'WARNING: analysis failed for some targets\n', + }) + const r = await runMetadataCqueryForRepo({ + opts: { bin: 'bazel', cwd: '/r', invocationFlags: [] }, + repoName: 'maven', + timeoutMs: 60_000, + workspaceRelPath: '', + workspaceRoot: '/r', + }) + expect(r.status).toBe('partial') + expect(r.artifacts).toHaveLength(2) + }) + + it('returns status:error on non-zero exit with no parsed targets', async () => { + // @ts-ignore — narrow return shape for the test. + mocked.mockResolvedValueOnce({ + code: 1, + stdout: '', + stderr: 'ERROR: something broke\n', + }) + const r = await runMetadataCqueryForRepo({ + opts: { bin: 'bazel', cwd: '/r', invocationFlags: [] }, + repoName: 'maven', + timeoutMs: 60_000, + workspaceRelPath: '', + workspaceRoot: '/r', + }) + expect(r.status).toBe('error') + expect(r.artifacts).toEqual([]) + }) + + it('returns status:timeout when spawn rejects with timedOut=true', async () => { + mocked.mockRejectedValueOnce( + Object.assign(new Error('command timed out'), { + code: null, + timedOut: true, + stderr: '', + stdout: '', + }), + ) + const r = await runMetadataCqueryForRepo({ + opts: { bin: 'bazel', cwd: '/r', invocationFlags: [] }, + repoName: 'maven', + timeoutMs: 60_000, + workspaceRelPath: '', + workspaceRoot: '/r', + }) + expect(r.status).toBe('timeout') + expect(r.artifacts).toEqual([]) + }) + + it('returns status:timeout when spawn signals SIGTERM/SIGKILL', async () => { + mocked.mockRejectedValueOnce( + Object.assign(new Error('killed'), { + signal: 'SIGTERM', + stderr: '', + stdout: '', + }), + ) + const r = await runMetadataCqueryForRepo({ + opts: { bin: 'bazel', cwd: '/r', invocationFlags: [] }, + repoName: 'maven', + timeoutMs: 60_000, + workspaceRelPath: '', + workspaceRoot: '/r', + }) + expect(r.status).toBe('timeout') + }) + + it('passes workspaceRoot as cwd and outputUserRoot as startup flag', async () => { + // @ts-ignore — narrow return shape for the test. + mocked.mockResolvedValueOnce({ code: 0, stdout: '', stderr: '' }) + await runMetadataCqueryForRepo({ + opts: { + bin: 'bazel', + cwd: '/anywhere', + invocationFlags: [], + outputUserRoot: '/tmp/socket-bazel-xyz', + }, + repoName: 'maven', + timeoutMs: 60_000, + workspaceRelPath: '', + workspaceRoot: '/repo/sub', + }) + const call = mocked.mock.calls[0]! + expect(call[2]).toMatchObject({ cwd: '/repo/sub', timeout: 60_000 }) + const argv = call[1] as string[] + expect(argv).toContain('--output_user_root=/tmp/socket-bazel-xyz') + }) +}) diff --git a/src/commands/manifest/bazel/bazel-pypi-discovery.mts b/src/commands/manifest/bazel/bazel-pypi-discovery.mts index e92561cef..3b62610e8 100644 --- a/src/commands/manifest/bazel/bazel-pypi-discovery.mts +++ b/src/commands/manifest/bazel/bazel-pypi-discovery.mts @@ -5,7 +5,95 @@ import { logger } from '@socketsecurity/registry/lib/logger' import { getErrorCause } from '../../../utils/errors.mts' -import type { RepoProbe, ValidationResult } from './bazel-repo-discovery.mts' +import type { RepoProbe } from './bazel-repo-discovery.mts' + +// Result shape returned by `validatePypiHub`. Kept local to the PyPI module +// since validation here is hub-alias-marker based (different from the +// Maven-side tri-state classifier). +export type ValidationResult = { + valid: boolean + // Probe stdout — populated whenever the probe was reachable, even when + // validation rejects the hub. Empty string when the probe itself threw. + stdout: string +} + +// PyPI-only repo-name predicate (Bazel apparent-name grammar). +const PYPI_REPO_NAME_PATTERN = '[A-Za-z0-9._+-]{1,129}' +const PYPI_REPO_NAME_RE = new RegExp(`^${PYPI_REPO_NAME_PATTERN}$`) + +function pypiApparentNameFromJsonValue(value: unknown): string | undefined { + if (!value || typeof value !== 'object') { + return undefined + } + const obj = value as Record + const direct = obj['apparentName'] ?? obj['apparent_name'] + if (typeof direct === 'string') { + return direct + } + for (const nested of Object.values(obj)) { + const found = pypiApparentNameFromJsonValue(nested) + if (found) { + return found + } + } + return undefined +} + +function pypiApparentNamesFromRepoMapping(value: unknown): string[] { + if (!value || typeof value !== 'object' || Array.isArray(value)) { + return [] + } + const candidates: string[] = [] + for (const [name, canonicalName] of Object.entries(value)) { + if (name.startsWith('@') || typeof canonicalName !== 'string') { + continue + } + if (PYPI_REPO_NAME_RE.test(name)) { + candidates.push(name) + } + } + return candidates +} + +function pypiNormalizeRepoName(name: string): string | undefined { + const repo = name.startsWith('@') ? name.slice(1) : name + return PYPI_REPO_NAME_RE.test(repo) ? repo : undefined +} + +// Parse `bazel mod dump_repo_mapping "" --output=json` output. Also accepts +// the older streamed jsonproto shape (apparentName / apparent_name records). +// PyPI-only; the Maven path consumes `bazel mod show_extension` instead. +export function parseVisibleRepoCandidates(output: string): string[] { + const seen = new Set() + const candidates: string[] = [] + for (const line of output.split(/\r?\n/)) { + const trimmed = line.trim() + if (!trimmed) { + continue + } + try { + const parsed = JSON.parse(trimmed) as unknown + for (const c of pypiApparentNamesFromRepoMapping(parsed)) { + if (!seen.has(c)) { + seen.add(c) + candidates.push(c) + } + } + const apparentName = pypiApparentNameFromJsonValue(parsed) + if (apparentName) { + const repo = pypiNormalizeRepoName(apparentName) + if (repo && !seen.has(repo)) { + seen.add(repo) + candidates.push(repo) + } + } + } catch { + // Skip malformed lines; caller falls back to static discovery when no + // usable visible repo names are found. + } + } + return candidates.sort() +} // Maximum size (bytes) we will read for any single Bazel workspace file. // Prevents DoS via maliciously large MODULE.bazel / WORKSPACE / .bzl files. diff --git a/src/commands/manifest/bazel/bazel-query-runner.mts b/src/commands/manifest/bazel/bazel-query-runner.mts index 34300d487..2d6361de3 100644 --- a/src/commands/manifest/bazel/bazel-query-runner.mts +++ b/src/commands/manifest/bazel/bazel-query-runner.mts @@ -12,6 +12,13 @@ export type BazelQueryOptions = { bazelRc?: string bazelFlags?: string bazelOutputBase?: string + // Per-invocation `--output_user_root` for server isolation. When set, all + // argv builders inject it as a startup flag so a timed-out Bazel server + // can be reaped via `bazel --output_user_root= shutdown` + `rm -rf` + // without disturbing the user's shared output_user_root. The Maven + // orchestrator mkdtemp's a fresh path per invocation; the legacy PyPI + // path may leave it unset for now. + outputUserRoot?: string env?: NodeJS.ProcessEnv verbose?: boolean } @@ -39,17 +46,28 @@ export function splitBazelFlags(flags: string | undefined): string[] { return flags.split(/\s+/).filter(Boolean) } -function buildBazelModShowVisibleReposArgv(opts: BazelQueryOptions): string[] { +// Build the shared startup-flag prefix for any bazel invocation. Centralised +// so `--output_user_root` propagates to every spawn — principle 7 of the +// Maven design requires per-invocation server isolation across query, +// cquery, and `bazel mod` commands alike. +function buildStartupFlags(opts: BazelQueryOptions): string[] { const startup: string[] = [] if (opts.bazelRc) { startup.push(`--bazelrc=${opts.bazelRc}`) } + if (opts.outputUserRoot) { + startup.push(`--output_user_root=${opts.outputUserRoot}`) + } if (opts.bazelOutputBase) { startup.push(`--output_base=${opts.bazelOutputBase}`) } + return startup +} + +function buildBazelModShowVisibleReposArgv(opts: BazelQueryOptions): string[] { const userFlags = splitBazelFlags(opts.bazelFlags) return [ - ...startup, + ...buildStartupFlags(opts), 'mod', 'dump_repo_mapping', '', @@ -58,17 +76,23 @@ function buildBazelModShowVisibleReposArgv(opts: BazelQueryOptions): string[] { ] } +function buildBazelModShowMavenExtensionArgv( + opts: BazelQueryOptions, +): string[] { + const userFlags = splitBazelFlags(opts.bazelFlags) + return [ + ...buildStartupFlags(opts), + 'mod', + 'show_extension', + '@rules_jvm_external//:extensions.bzl%maven', + ...userFlags, + ] +} + function buildBazelModShowPipExtensionArgv(opts: BazelQueryOptions): string[] { - const startup: string[] = [] - if (opts.bazelRc) { - startup.push(`--bazelrc=${opts.bazelRc}`) - } - if (opts.bazelOutputBase) { - startup.push(`--output_base=${opts.bazelOutputBase}`) - } const userFlags = splitBazelFlags(opts.bazelFlags) return [ - ...startup, + ...buildStartupFlags(opts), 'mod', 'show_extension', '@rules_python//python/extensions:pip.bzl%pip', @@ -84,18 +108,11 @@ function buildBazelArgv( ): string[] { // Startup flags MUST precede the `query` subcommand. // Bazel argv shape: query --output= - const startup: string[] = [] - if (opts.bazelRc) { - startup.push(`--bazelrc=${opts.bazelRc}`) - } - if (opts.bazelOutputBase) { - startup.push(`--output_base=${opts.bazelOutputBase}`) - } // Keep query output stable and avoid updating Bazel lockfiles while extracting. const queryFlags = ['--lockfile_mode=off', '--noshow_progress'] const userFlags = splitBazelFlags(opts.bazelFlags) return [ - ...startup, + ...buildStartupFlags(opts), 'query', ...queryFlags, ...opts.invocationFlags, @@ -105,6 +122,29 @@ function buildBazelArgv( ] } +// Lightweight presence-check cquery used by the tri-state probe classifier. +// `--keep_going --output=label` keeps it fast even on partial-analysis +// repos and avoids paying for `--output=jsonproto` plus +// `--proto:output_rule_attrs` (which the heavier metadata extraction in +// `bazel-cquery.mts` needs but the probe does not). +function buildBazelProbeCqueryArgv( + repoName: string, + opts: BazelQueryOptions, +): string[] { + const userFlags = splitBazelFlags(opts.bazelFlags) + return [ + ...buildStartupFlags(opts), + 'cquery', + '--lockfile_mode=off', + '--noshow_progress', + ...opts.invocationFlags, + `@${repoName}//...`, + '--output=label', + '--keep_going', + ...userFlags, + ] +} + function stringField(value: unknown): string { return typeof value === 'string' ? value : '' } @@ -229,15 +269,11 @@ export async function runBazelQuery( } } -/** - * Bzlmod-native visible repository enumeration. This is only a candidate - * source; callers must still validate each returned apparent repo name with a - * semantic query for generated ecosystem rules. - */ -export async function runBazelModShowVisibleRepos( +async function runBazelOneShot( + argv: string[], opts: BazelQueryOptions, + step: string, ): Promise { - const argv = buildBazelModShowVisibleReposArgv(opts) if (opts.verbose) { logger.log('[VERBOSE] Executing:', opts.bin, ', args:', argv) } @@ -259,70 +295,100 @@ export async function runBazelModShowVisibleRepos( durationMs: Date.now() - startedAt, opts, result, - step: 'bazel mod dump_repo_mapping', + step, }) return result } /** - * Bzlmod-native rules_python pip extension usage inspection. This is the - * authoritative source for root-module pip.parse metadata when Bazel supports - * the command; callers keep bounded static parsing as fallback. + * Bzlmod-native visible repository enumeration. NOTE: only consumed by the + * legacy PyPI path; the Maven path uses `runBazelModShowMavenExtension` + * instead because `dump_repo_mapping` over-enumerates apparent names that + * are not Maven hubs. + */ +export async function runBazelModShowVisibleRepos( + opts: BazelQueryOptions, +): Promise { + return runBazelOneShot( + buildBazelModShowVisibleReposArgv(opts), + opts, + 'bazel mod dump_repo_mapping', + ) +} + +/** + * Bzlmod-native Maven hub enumeration via the rules_jvm_external maven + * extension. The text-format report lists every repo the extension + * generated; `parseShowExtensionOutput` (bazel-repo-discovery.mts) + * extracts the hubs from the `Fetched repositories:` section. + */ +export async function runBazelModShowMavenExtension( + opts: BazelQueryOptions, +): Promise { + return runBazelOneShot( + buildBazelModShowMavenExtensionArgv(opts), + opts, + 'bazel mod show_extension rules_jvm_external maven', + ) +} + +/** + * Bzlmod-native rules_python pip extension usage inspection. Used by the + * PyPI path; kept here since the argv shape is identical to the maven + * variant modulo the extension target. */ export async function runBazelModShowPipExtension( opts: BazelQueryOptions, ): Promise { - const argv = buildBazelModShowPipExtensionArgv(opts) - if (opts.verbose) { - logger.log('[VERBOSE] Executing:', opts.bin, ', args:', argv) - } - const startedAt = Date.now() - let result: BazelQueryResult - try { - const output = await spawn(opts.bin, argv, { - cwd: opts.cwd, - timeout: BAZEL_QUERY_TIMEOUT_MS, - ...(opts.env ? { env: opts.env } : {}), - }) - const { code, stderr, stdout } = output - result = { code, stdout, stderr } - } catch (e) { - result = normalizeSpawnError(e) - } - logBazelTrace({ - argv, - durationMs: Date.now() - startedAt, + return runBazelOneShot( + buildBazelModShowPipExtensionArgv(opts), opts, - result, - step: 'bazel mod show_extension rules_python pip', - }) - return result + 'bazel mod show_extension rules_python pip', + ) } /** - * Build a `RepoProbe` (compatible with bazel-repo-discovery) bound to opts. - * Used by `discoverMavenRepos` to validate candidate Maven repo - * names against the running workspace. + * Build a `RepoProbe` (compatible with bazel-repo-discovery's tri-state + * classifier) bound to opts. Runs the lightweight presence-check cquery + * `@//... --output=label --keep_going` — cheap enough to attempt + * every conventional Maven hub name without triggering `repository_rule` + * fetches on undefined names (Exp 3). */ -export function buildProbeFor(opts: BazelQueryOptions): RepoProbe { +export function buildMavenProbeFor(opts: BazelQueryOptions): RepoProbe { return async (repoName: string) => { - const queryStr = `kind("jvm_import rule|aar_import rule", @${repoName}//:*)` - const result = await runBazelQuery(queryStr, opts) - return { stdout: result.stdout, code: result.code } + const argv = buildBazelProbeCqueryArgv(repoName, opts) + const result = await runBazelOneShot( + argv, + opts, + `bazel cquery probe @${repoName}`, + ) + return { code: result.code, stdout: result.stdout, stderr: result.stderr } } } /** * Build a `RepoProbe` for validating pip hub candidates. - * Queries the hub for package targets (e.g. `@//...`) and returns - * stdout so the caller can check for `:pkg` labels or alias rules. - * Does NOT require `pypi_name=` tags in the hub output, because those - * tags live on spoke repos, not the hub alias layer. + * Queries the hub for package targets (e.g. `@//...`) and returns the + * full result triple so the caller can check for `:pkg` labels or alias + * rules. Does NOT require `pypi_name=` tags in the hub output, because + * those tags live on spoke repos, not the hub alias layer. */ export function buildPypiProbeFor(opts: BazelQueryOptions): RepoProbe { return async (hubName: string) => { const queryStr = `@${hubName}//...` const result = await runBazelQuery(queryStr, opts) - return { stdout: result.stdout, code: result.code } + return { code: result.code, stdout: result.stdout, stderr: result.stderr } } } + +// Re-exported for direct test access — useful when asserting on argv shape +// without spawning. Returns the exact argv `runBazelModShowMavenExtension` +// would pass to spawn. +export const _internalArgvBuilders = { + buildBazelArgv, + buildBazelModShowMavenExtensionArgv, + buildBazelModShowPipExtensionArgv, + buildBazelModShowVisibleReposArgv, + buildBazelProbeCqueryArgv, + buildStartupFlags, +} diff --git a/src/commands/manifest/bazel/bazel-query-runner.test.mts b/src/commands/manifest/bazel/bazel-query-runner.test.mts index 15cd2411f..ff9bda425 100644 --- a/src/commands/manifest/bazel/bazel-query-runner.test.mts +++ b/src/commands/manifest/bazel/bazel-query-runner.test.mts @@ -19,8 +19,9 @@ import { logger } from '@socketsecurity/registry/lib/logger' import { spawn } from '@socketsecurity/registry/lib/spawn' import { - buildProbeFor, + buildMavenProbeFor, buildPypiProbeFor, + runBazelModShowMavenExtension, runBazelModShowPipExtension, runBazelModShowVisibleRepos, runBazelQuery, @@ -40,7 +41,7 @@ describe('runBazelQuery', () => { }) it('builds the standard query argv shape', async () => { - await runBazelQuery('kind(jvm_import, @maven//:*)', { + await runBazelQuery('attr("tags", ".+", @maven//:*)', { bin: '/usr/local/bin/bazel', cwd: '/repo', invocationFlags: [], @@ -51,7 +52,7 @@ describe('runBazelQuery', () => { expect(argv[0]).toBe('query') expect(argv).toContain('--lockfile_mode=off') expect(argv).toContain('--noshow_progress') - expect(argv).toContain('kind(jvm_import, @maven//:*)') + expect(argv).toContain('attr("tags", ".+", @maven//:*)') expect(argv).toContain('--output=build') }) @@ -69,6 +70,20 @@ describe('runBazelQuery', () => { ) }) + it('forwards outputUserRoot as a startup flag BEFORE the subcommand', async () => { + await runBazelQuery('q', { + bin: 'bazel', + cwd: '/r', + invocationFlags: [], + outputUserRoot: '/tmp/socket-bazel-xyz', + }) + const argv = mocked.mock.calls[0]![1] as string[] + expect(argv).toContain('--output_user_root=/tmp/socket-bazel-xyz') + expect( + argv.indexOf('--output_user_root=/tmp/socket-bazel-xyz'), + ).toBeLessThan(argv.indexOf('query')) + }) + it('forwards bazelOutputBase as a startup flag BEFORE query', async () => { await runBazelQuery('q', { bin: 'bazel', @@ -217,6 +232,50 @@ describe('runBazelQuery', () => { }) }) +describe('runBazelModShowMavenExtension', () => { + const mocked = vi.mocked(spawn) + + beforeEach(() => { + mocked.mockReset() + // @ts-ignore — narrow return shape for the test's purposes. + mocked.mockResolvedValue({ + code: 0, + stdout: '## @@rules_jvm_external+//:extensions.bzl%maven:\n', + stderr: '', + }) + }) + + it('uses the rules_jvm_external maven extension target', async () => { + await runBazelModShowMavenExtension({ + bin: 'bazel', + cwd: '/repo', + invocationFlags: [], + }) + const argv = mocked.mock.calls[0]![1] as string[] + expect(argv).toEqual([ + 'mod', + 'show_extension', + '@rules_jvm_external//:extensions.bzl%maven', + ]) + }) + + it('threads outputUserRoot ahead of the subcommand', async () => { + await runBazelModShowMavenExtension({ + bin: 'bazel', + cwd: '/repo', + invocationFlags: [], + outputUserRoot: '/tmp/socket-bazel-abc', + }) + const argv = mocked.mock.calls[0]![1] as string[] + expect(argv).toEqual([ + '--output_user_root=/tmp/socket-bazel-abc', + 'mod', + 'show_extension', + '@rules_jvm_external//:extensions.bzl%maven', + ]) + }) +}) + describe('runBazelModShowVisibleRepos', () => { const mocked = vi.mocked(spawn) @@ -232,7 +291,6 @@ describe('runBazelModShowVisibleRepos', () => { cwd: '/repo', invocationFlags: [], }) - const argv = mocked.mock.calls[0]![1] as string[] expect(argv).toEqual(['mod', 'dump_repo_mapping', '', '--output=json']) expect(argv).not.toContain('--all_visible_repos') @@ -255,7 +313,6 @@ describe('runBazelModShowPipExtension', () => { cwd: '/repo', invocationFlags: [], }) - const argv = mocked.mock.calls[0]![1] as string[] expect(argv).toEqual([ 'mod', @@ -266,7 +323,7 @@ describe('runBazelModShowPipExtension', () => { }) }) -describe('buildProbeFor', () => { +describe('buildMavenProbeFor', () => { const mocked = vi.mocked(spawn) beforeEach(() => { @@ -274,25 +331,62 @@ describe('buildProbeFor', () => { // @ts-ignore — narrow return shape for the test's purposes. mocked.mockResolvedValue({ code: 0, - stdout: 'jvm_import(\n maven_coordinates="g:a:1",\n)', + stdout: '@maven//:foo\n@maven//:bar\n', stderr: '', }) }) - it('builds the probe query for a repo name', async () => { - const probe = buildProbeFor({ + it('builds the lightweight presence-check cquery for a repo name', async () => { + const probe = buildMavenProbeFor({ bin: 'bazel', cwd: '/r', invocationFlags: [], }) const result = await probe('my_maven_repo') const argv = mocked.mock.calls[0]![1] as string[] - expect(argv).toContain( - 'kind("jvm_import rule|aar_import rule", @my_maven_repo//:*)', - ) + expect(argv).toContain('cquery') + expect(argv).toContain('@my_maven_repo//...') + expect(argv).toContain('--output=label') + expect(argv).toContain('--keep_going') expect(result).toEqual({ - stdout: expect.stringContaining('maven_coordinates'), code: 0, + stdout: '@maven//:foo\n@maven//:bar\n', + stderr: '', + }) + }) + + it('threads outputUserRoot into the probe argv', async () => { + const probe = buildMavenProbeFor({ + bin: 'bazel', + cwd: '/r', + invocationFlags: [], + outputUserRoot: '/tmp/x', + }) + await probe('maven') + const argv = mocked.mock.calls[0]![1] as string[] + expect(argv[0]).toBe('--output_user_root=/tmp/x') + expect(argv).toContain('@maven//...') + }) + + it('returns the full result triple including stderr (tri-state classifier needs it)', async () => { + // @ts-ignore — narrow return shape for the test's purposes. + mocked.mockResolvedValueOnce({ + code: 1, + stdout: '', + stderr: + "ERROR: No repository visible as '@nope' from main repository\n", + }) + const probe = buildMavenProbeFor({ + bin: 'bazel', + cwd: '/r', + invocationFlags: [], + }) + const result = await probe('nope') + expect(result).toEqual({ + code: 1, + stdout: '', + stderr: + "ERROR: No repository visible as '@nope' from main repository\n", }) }) }) @@ -320,12 +414,13 @@ describe('buildPypiProbeFor', () => { const argv = mocked.mock.calls[0]![1] as string[] expect(argv).toContain('@pypi//...') expect(result).toEqual({ - stdout: expect.stringContaining('@pypi//requests:pkg'), code: 0, + stdout: expect.stringContaining('@pypi//requests:pkg'), + stderr: '', }) }) - it('returns non-zero code when the hub has no :pkg targets', async () => { + it('returns the full triple when the hub has no :pkg targets', async () => { mocked.mockReset() // @ts-ignore — narrow return shape for the test's purposes. mocked.mockResolvedValue({ @@ -339,7 +434,6 @@ describe('buildPypiProbeFor', () => { invocationFlags: [], }) const result = await probe('empty_hub') - expect(result.code).toBe(0) - expect(result.stdout).toBe('') + expect(result).toEqual({ code: 0, stdout: '', stderr: '' }) }) }) diff --git a/src/commands/manifest/bazel/bazel-repo-discovery.mts b/src/commands/manifest/bazel/bazel-repo-discovery.mts index 8d13542a3..494dcb017 100644 --- a/src/commands/manifest/bazel/bazel-repo-discovery.mts +++ b/src/commands/manifest/bazel/bazel-repo-discovery.mts @@ -1,345 +1,165 @@ -import { existsSync, readFileSync, readdirSync, statSync } from 'node:fs' -import path from 'node:path' - +/** + * Maven hub repo discovery for `socket manifest bazel`. + * + * - Bzlmod path: `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven` + * emits a text-format report listing every repo the maven extension generated; + * `parseShowExtensionOutput` extracts the names of hub repos (items annotated + * with `(imported by ...)`) and skips generated per-artifact repos. + * - Legacy WORKSPACE path: probe a fixed list of conventional Maven hub names + * (plus any names the customer passed via `--bazel-maven-repo=`). Each probe + * is classified into `populated` / `empty` / `not-defined`; the orchestrator + * keeps only the `populated` candidates. + * + * No Starlark source is read by this module. All semantic interpretation + * comes from Bazel itself (`mod show_extension`, `cquery`). + */ import { logger } from '@socketsecurity/registry/lib/logger' -import { getErrorCause } from '../../../utils/errors.mts' - -// Maximum size (bytes) we will read for any single Bazel workspace file. -// Prevents DoS via maliciously large MODULE.bazel / WORKSPACE / .bzl files. -const MAX_WORKSPACE_FILE_BYTES = 5 * 1024 * 1024 - -// Maximum candidate count we will return (deduped) before truncating. -// Real repos have <20; this is a hard ceiling against pathological inputs. -const MAX_CANDIDATES = 256 - -// Regex strategy: anchored, bounded character classes, no nested quantifiers. -// Match `use_repo(maven, "X", "Y", ...)` with a bounded arg-list window to -// avoid catastrophic backtracking on hostile input. - -// Bzlmod use_repo(maven, "name1", "name2"...). -// Bounded: matches up to ~4KB of arg list to avoid catastrophic backtracking. -const USE_REPO_RE = /use_repo\s*\(\s*maven\s*,([^)]{0,4096})\)/g -const BAZEL_REPO_NAME_PATTERN = '[A-Za-z0-9._+-]{1,129}' -const BAZEL_REPO_NAME_RE = new RegExp(`^${BAZEL_REPO_NAME_PATTERN}$`) -// Quoted-name extractor inside the captured argument blob. -const QUOTED_NAME_RE = new RegExp(`"(${BAZEL_REPO_NAME_PATTERN})"`, 'g') - -// Legacy maven_install(name = "X", ...) on a single statement. -// Match the name= keyword arg specifically; bounded. -const MAVEN_INSTALL_NAME_RE = new RegExp( - `maven_install\\s*\\([^)]{0,8192}?\\bname\\s*=\\s*"(${BAZEL_REPO_NAME_PATTERN})"`, - 'g', -) -const MAVEN_COORDINATES_MARKER_RE = /\bmaven_coordinates\s*=/ - -// Reads file contents, refusing files that exceed MAX_WORKSPACE_FILE_BYTES. -// Returns null when the file is missing, oversized, or unreadable. -function safeReadFile(file: string): string | null { - if (!existsSync(file)) { - return null - } - try { - const stat = statSync(file) - if (stat.size > MAX_WORKSPACE_FILE_BYTES) { - return null - } - return readFileSync(file, 'utf8') - } catch { - return null - } -} - -// Walks workspace root for legacy Starlark sources we can scan: WORKSPACE -// (and WORKSPACE.bazel) plus top-level .bzl files. Non-recursive by design; -// Phase 1 explicitly avoids static Starlark parsing at depth. -function listLegacyStarlarkFiles(cwd: string): string[] { - const files: string[] = [] - const candidates = ['WORKSPACE', 'WORKSPACE.bazel'] - for (const c of candidates) { - const p = path.join(cwd, c) - if (existsSync(p)) { - files.push(p) - } - } - // Top-level .bzl files only. - try { - for (const entry of readdirSync(cwd)) { - if (entry.endsWith('.bzl')) { - files.push(path.join(cwd, entry)) - } - } - } catch { - // Ignore unreadable cwd. - } - return files -} - -// Returns deduplicated, sorted list of items, capped at MAX_CANDIDATES. -function uniqueSorted(items: string[]): string[] { - const seen = new Set() - const out: string[] = [] - for (const item of items) { - if (!seen.has(item)) { - seen.add(item) - out.push(item) - if (out.length >= MAX_CANDIDATES) { - break - } - } - } - return out.sort() -} - -function apparentNameFromJsonValue(value: unknown): string | undefined { - if (!value || typeof value !== 'object') { - return undefined - } - const obj = value as Record - const direct = obj['apparentName'] ?? obj['apparent_name'] - if (typeof direct === 'string') { - return direct - } - for (const nested of Object.values(obj)) { - const found = apparentNameFromJsonValue(nested) - if (found) { - return found - } - } - return undefined +export type ProbeResult = { + code: number + stdout: string + stderr: string } -function apparentNamesFromRepoMapping(value: unknown): string[] { - if (!value || typeof value !== 'object' || Array.isArray(value)) { +export type RepoProbe = (repoName: string) => Promise + +export type ProbeStatus = 'populated' | 'empty' | 'not-defined' + +// Conventional Maven hub names rules_jvm_external sets up under +// WORKSPACE-mode invocations. Probing each one is cheap (a failed visibility +// lookup never triggers a `repository_rule` fetch) so the orchestrator can +// try them all without paying the cost of a real cquery on undefined repos. +export const CONVENTIONAL_MAVEN_REPO_NAMES: readonly string[] = [ + 'maven', + 'maven_install', + 'maven_dev', + 'unpinned_maven', + 'maven_unpinned', +] + +// Pattern Bazel emits when a probed repo name isn't visible to the main +// module. Used to distinguish `not-defined` (skip silently) from `empty` +// (the repo exists but has no targets). Tolerant of either single- or +// double-quote styles Bazel has used across versions. +const NOT_VISIBLE_STDERR_RE = + /No repository visible as ['"]?@?[A-Za-z0-9._+-]+['"]? from/ +// Other "repo isn't analyzable" patterns Bazel emits, especially under +// WORKSPACE mode and on Bazel 6.x. They all map to `not-defined`. +const NO_SUCH_PACKAGE_STDERR_RE = /no such package ['"`]?@/ +// Pattern emitted when a repo IS visible / defined but yields no targets. +// `--keep_going` plus `'no targets found beneath'` is the empty-but-defined +// signature. The orchestrator treats `empty` and `not-defined` uniformly +// as skips. +const NO_TARGETS_STDERR_RE = /no targets found beneath/i +// Anchor for the maven extension's section header in +// `bazel mod show_extension` output. Tolerant of the canonical-name form +// Bazel uses across versions (`@@rules_jvm_external+`, `@@rules_jvm_external~`, +// or any future separator) and of trailing trailing whitespace. +const SHOW_EXT_SECTION_HEADER_RE = + /^## @@?[A-Za-z0-9._+~-]+\/\/:extensions\.bzl%maven:\s*$/m +// Bullet within `Fetched repositories:` that names a hub repo (one with an +// `(imported by ...)` annotation). Bullets without that annotation are +// generated per-artifact repos and are skipped. +const FETCHED_HUB_BULLET_RE = + /^ {2}- (?\S+) \(imported by (?[^)]+)\)\s*$/ + +// Pure parser for `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven` +// stdout. Returns the names of hub repos listed under `Fetched repositories:` +// — i.e. items annotated with `(imported by ...)`. Generated per-artifact +// repos (no annotation) are skipped. Output is deduplicated and sorted. +// Tolerant of `DEBUG:` / `WARNING:` lines from Bazel; the section header +// `## @@//:extensions.bzl%maven:` is the anchor. +export function parseShowExtensionOutput(stdout: string): string[] { + const headerMatch = SHOW_EXT_SECTION_HEADER_RE.exec(stdout) + if (!headerMatch) { return [] } - const candidates: string[] = [] - for (const [name, canonicalName] of Object.entries(value)) { - if (name.startsWith('@') || typeof canonicalName !== 'string') { - continue - } - if (BAZEL_REPO_NAME_RE.test(name)) { - candidates.push(name) - } - } - return candidates -} - -function normalizeRepoName(name: string): string | undefined { - const repo = name.startsWith('@') ? name.slice(1) : name - return BAZEL_REPO_NAME_RE.test(repo) ? repo : undefined -} - -// Parse `bazel mod dump_repo_mapping "" --output=json` output. Also accept the -// older streamed jsonproto shape in case older Bazel versions or fixtures still -// return repository records with apparentName fields. -export function parseVisibleRepoCandidates(output: string): string[] { - const candidates: string[] = [] - for (const line of output.split(/\r?\n/)) { - const trimmed = line.trim() - if (!trimmed) { - continue - } - try { - const parsed = JSON.parse(trimmed) as unknown - candidates.push(...apparentNamesFromRepoMapping(parsed)) - const apparentName = apparentNameFromJsonValue(parsed) - if (apparentName) { - const repo = normalizeRepoName(apparentName) - if (repo) { - candidates.push(repo) - } - } - } catch { - // Ignore malformed lines; caller will fall back to static discovery when - // no usable visible repo names are found. - } - } - return uniqueSorted(candidates) -} - -// Step 1: parse candidate Maven repo names from Bzlmod and legacy entry points. -export function parseMavenRepoCandidates( - cwd: string, - verbose?: boolean, -): string[] { - const candidates: string[] = [] - - // Bzlmod path: parse MODULE.bazel for use_repo(maven, ...). - const moduleBazel = path.join(cwd, 'MODULE.bazel') - const moduleContent = safeReadFile(moduleBazel) - if (moduleContent) { - const bzlmodHits: string[] = [] - for (const m of moduleContent.matchAll(USE_REPO_RE)) { - const argBlob = m[1] ?? '' - for (const n of argBlob.matchAll(QUOTED_NAME_RE)) { - bzlmodHits.push(n[1] as string) - } - } - candidates.push(...bzlmodHits) - if (verbose) { - logger.log( - '[VERBOSE] discovery: scanned', - moduleBazel, - `(${bzlmodHits.length} use_repo match(es))`, - ) - } - } else if (verbose) { - logger.log( - '[VERBOSE] discovery:', - moduleBazel, - 'not present (skipping bzlmod scan)', - ) - } - - // Legacy path: scan WORKSPACE + top-level .bzl files for maven_install(name=...). - const legacyFiles = listLegacyStarlarkFiles(cwd) - if (verbose) { - logger.log( - '[VERBOSE] discovery: legacy files considered:', - legacyFiles.length ? legacyFiles : '(none)', - ) + const tail = stdout.slice(headerMatch.index + headerMatch[0].length) + // Find the `Fetched repositories:` line within the section. + const fetchedIdx = tail.indexOf('\nFetched repositories:') + if (fetchedIdx === -1) { + return [] } - for (const file of legacyFiles) { - const content = safeReadFile(file) - if (!content) { + const afterFetched = tail.slice(fetchedIdx + '\nFetched repositories:'.length) + const seen = new Set() + for (const line of afterFetched.split(/\r?\n/)) { + // Stop at the next `## ` section header (some Bazel versions print + // multiple extensions in one report). + if (line.startsWith('## ')) { + break + } + // Empty line is fine; bullet that doesn't match is fine (it's an + // un-imported generated artifact repo) — skip it. + const match = FETCHED_HUB_BULLET_RE.exec(line) + if (!match || !match.groups) { continue } - const fileHits: string[] = [] - for (const m of content.matchAll(MAVEN_INSTALL_NAME_RE)) { - fileHits.push(m[1] as string) - } - candidates.push(...fileHits) - if (verbose) { - logger.log( - '[VERBOSE] discovery: scanned', - file, - `(${fileHits.length} maven_install name match(es))`, - ) + const name = match.groups['name'] + if (name && !seen.has(name)) { + seen.add(name) } } - - const deduped = uniqueSorted(candidates) - if (verbose) { - logger.log('[VERBOSE] discovery: candidate set (pre-seed):', deduped) - } - return deduped + return [...seen].sort() } -export type RepoProbe = ( - repoName: string, -) => Promise<{ stdout: string; code: number }> - -export type ValidationResult = { - valid: boolean - // Probe stdout — populated whenever the probe was reachable, even when - // validation rejects the repo. Empty string when the probe itself threw. - stdout: string +// Classify a raw probe result into one of three states. The probe contract +// is whatever the runner emits — typically a lightweight +// `cquery '@//...' --keep_going --output=label`. The orchestrator +// treats `empty` and `not-defined` uniformly as no-ops; the distinction +// is preserved for verbose-mode diagnostics. +export function classifyProbeResult(result: ProbeResult): ProbeStatus { + // A successful probe with any stdout means the repo exists AND has at + // least one target — populated. + if (result.code === 0 && result.stdout.trim().length > 0) { + return 'populated' + } + // Code 1 with the "no repository visible" message → undefined. + if ( + result.code !== 0 && + (NOT_VISIBLE_STDERR_RE.test(result.stderr) || + NO_SUCH_PACKAGE_STDERR_RE.test(result.stderr)) + ) { + return 'not-defined' + } + // Code 1 with the "no targets" message → defined but empty. + if (result.code !== 0 && NO_TARGETS_STDERR_RE.test(result.stderr)) { + return 'empty' + } + // Code 0 with empty stdout: WORKSPACE-mode probes do this when the repo + // name isn't declared (Exp 5c). Treat as not-defined. + if (result.code === 0) { + return 'not-defined' + } + // Code 1 with no recognizable message: be conservative and call it + // not-defined so the orchestrator skips it without erroring the workspace. + return 'not-defined' } -// Step 2: validate a candidate by running the probe and confirming -// `maven_coordinates=` appears in stdout (the marker emitted by jvm_import / -// aar_import rules generated by rules_jvm_external). Returns the probe -// stdout alongside the verdict so the caller can cache it and reuse it -// instead of running an identical extraction query. -export async function validateMavenRepo( +// Convenience: probe a single candidate and return its classified status, +// with optional verbose logging. Pure orchestration around `probe` + +// `classifyProbeResult`; isolated so the test suite can exercise the +// logging contract independently of the runner implementation. +export async function probeCandidate( repoName: string, probe: RepoProbe, verbose?: boolean, -): Promise { +): Promise { + let result: ProbeResult try { - const result = await probe(repoName) - if (result.code !== 0) { - if (verbose) { - logger.log( - `[VERBOSE] discovery: probe @${repoName}: REJECT (code=${result.code})`, - ) - } - return { valid: false, stdout: result.stdout } - } - const valid = MAVEN_COORDINATES_MARKER_RE.test(result.stdout) - if (verbose) { - logger.log( - `[VERBOSE] discovery: probe @${repoName}:`, - valid - ? 'ACCEPT (maven_coordinates marker found)' - : 'REJECT (no maven_coordinates marker in probe stdout)', - ) - } - return { valid, stdout: result.stdout } + result = await probe(repoName) } catch (e) { if (verbose) { logger.log( - `[VERBOSE] discovery: probe @${repoName}: REJECT (probe threw):`, - getErrorCause(e), + `[VERBOSE] discovery: probe @${repoName}: not-defined (probe threw: ${ + e instanceof Error ? e.message : String(e) + })`, ) } - return { valid: false, stdout: '' } - } -} - -// The default maven_install repo name when no explicit `name=` is given. -// Included as a seed so repos that define maven_install in a subdirectory -// .bzl file (not scanned by parseMavenRepoCandidates) are still discovered. -const DEFAULT_MAVEN_REPO_SEED = 'maven' - -// Composition: parse, then validate each candidate; return validated subset -// as a Map keyed by repo name with the validated probe stdout as value. -// Map iteration order matches insertion order, so callers that just want -// the list of repo names can call `Array.from(repos.keys())`. Callers that -// want to skip re-running the same `bazel query` during extraction can read -// the cached stdout off the Map and parse it directly. -// -// Always seeds with the default `@maven` repo name so repos whose -// maven_install is defined in a sub-directory .bzl file (not reachable by -// the top-level static scan) can still be discovered via probe validation. -export async function discoverMavenRepos( - cwd: string, - probe: RepoProbe, - nativeCandidates?: string[], - verbose?: boolean, -): Promise> { - const parsed = - nativeCandidates && nativeCandidates.length - ? nativeCandidates - : parseMavenRepoCandidates(cwd, verbose) - if (verbose) { - logger.log( - '[VERBOSE] discovery: candidate source:', - nativeCandidates && nativeCandidates.length - ? `bzlmod visible-repos (${nativeCandidates.length})` - : `static parse (${parsed.length})`, - ) - } - // Seed with the default repo name first (so it appears first in output if - // validated). Dedup via Set before validation. - const seen = new Set([DEFAULT_MAVEN_REPO_SEED]) - const candidates: string[] = [DEFAULT_MAVEN_REPO_SEED] - for (const c of parsed) { - if (!seen.has(c)) { - seen.add(c) - candidates.push(c) - } - } - if (verbose) { - logger.log( - '[VERBOSE] discovery: candidate set to probe (seed-first, deduped):', - candidates, - ) - } - const validated = new Map() - for (const c of candidates) { - // eslint-disable-next-line no-await-in-loop - const result = await validateMavenRepo(c, probe, verbose) - if (result.valid) { - validated.set(c, result.stdout) - } + return 'not-defined' } + const status = classifyProbeResult(result) if (verbose) { - logger.log( - '[VERBOSE] discovery: validated repos:', - Array.from(validated.keys()), - ) + logger.log(`[VERBOSE] discovery: probe @${repoName}: ${status}`) } - return validated + return status } diff --git a/src/commands/manifest/bazel/bazel-repo-discovery.test.mts b/src/commands/manifest/bazel/bazel-repo-discovery.test.mts index 5755388df..8a7845fd6 100644 --- a/src/commands/manifest/bazel/bazel-repo-discovery.test.mts +++ b/src/commands/manifest/bazel/bazel-repo-discovery.test.mts @@ -1,247 +1,188 @@ -import { mkdtempSync, rmSync, writeFileSync } from 'node:fs' -import os from 'node:os' -import path from 'node:path' -import { fileURLToPath } from 'node:url' - import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import { logger } from '@socketsecurity/registry/lib/logger' import { - discoverMavenRepos, - parseMavenRepoCandidates, - parseVisibleRepoCandidates, - validateMavenRepo, + CONVENTIONAL_MAVEN_REPO_NAMES, + classifyProbeResult, + parseShowExtensionOutput, + probeCandidate, } from './bazel-repo-discovery.mts' -import type { RepoProbe } from './bazel-repo-discovery.mts' - -const __filename = fileURLToPath(import.meta.url) -const __dirname = path.dirname(__filename) - -// from src/commands/manifest/bazel/ to repo root is four levels up, then into -// test/fixtures/manifest-bazel. -const FIXTURES = path.join( - __dirname, - '..', - '..', - '..', - '..', - 'test', - 'fixtures', - 'manifest-bazel', -) - -const acceptingProbe: RepoProbe = async () => ({ - stdout: - 'jvm_import(\n name = "guava",\n maven_coordinates = "com.google.guava:guava:33.0.0-jre",\n)', - code: 0, -}) +import type { + ProbeResult, + ProbeStatus, + RepoProbe, +} from './bazel-repo-discovery.mts' -const compactAcceptingProbe: RepoProbe = async () => ({ - stdout: - 'jvm_import(\n name = "guava",\n maven_coordinates="com.google.guava:guava:33.0.0-jre",\n)', +// Truncated text-format report Bazel 8.4.2 emits on tink-java for +// `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven`. +// The headline shape: a `## @@//:extensions.bzl%maven:` header, +// blank line, then `Fetched repositories:` and a bullet list. Hub repos +// carry `(imported by ...)`; generated artifact repos don't. +const TINK_SHOW_EXTENSION_FIXTURE = `DEBUG: irrelevant noise +WARNING: also irrelevant + +## @@rules_jvm_external+//:extensions.bzl%maven: + +Fetched repositories: + - android_ide_common_30_1_3 (imported by rules_android@0.6.6) + - maven (imported by , bazel_worker_java@0.0.4, protobuf@32.1) + - rules_android_maven (imported by rules_android@0.6.6) + - rules_jvm_external_deps (imported by rules_jvm_external@6.7) + - stardoc_maven (imported by stardoc@0.7.2) + - unpinned_rules_jvm_external_deps (imported by rules_jvm_external@6.7) + - aopalliance_aopalliance_1_0 + - aopalliance_aopalliance_jar_sources_1_0 + - androidx_annotation_annotation +` + +const probeResult = (over: Partial = {}): ProbeResult => ({ code: 0, + stdout: '', + stderr: '', + ...over, }) -const rejectingProbe: RepoProbe = async () => ({ stdout: '', code: 0 }) - -const failingProbe: RepoProbe = async () => ({ stdout: '', code: 1 }) - -const throwingProbe: RepoProbe = async () => { - throw new Error('bazel exploded') -} - -const selectiveProbe: RepoProbe = async name => - name === 'maven' - ? { stdout: 'maven_coordinates=foo', code: 0 } - : { stdout: '', code: 0 } - describe('bazel-repo-discovery', () => { - describe('parseMavenRepoCandidates', () => { - it('parses single use_repo from bzlmod-only', () => { - expect( - parseMavenRepoCandidates(path.join(FIXTURES, 'bzlmod-only')), - ).toEqual(['maven']) - }) - - it('parses multiple names from multi-repo-bzlmod', () => { - expect( - parseMavenRepoCandidates( - path.join(FIXTURES, 'multi-repo-bzlmod'), - ).sort(), - ).toEqual(['maven', 'maven_test'].sort()) + describe('parseShowExtensionOutput', () => { + it('extracts hub repos with (imported by ...) annotations only', () => { + // The 6 hub repos in the fixture are the ones with annotations; + // generated per-artifact repos (no annotation) are skipped. + expect(parseShowExtensionOutput(TINK_SHOW_EXTENSION_FIXTURE)).toEqual([ + 'android_ide_common_30_1_3', + 'maven', + 'rules_android_maven', + 'rules_jvm_external_deps', + 'stardoc_maven', + 'unpinned_rules_jvm_external_deps', + ]) }) - it('recovers custom name from custom-name-bzlmod', () => { + it('returns [] when the maven section is missing', () => { expect( - parseMavenRepoCandidates(path.join(FIXTURES, 'custom-name-bzlmod')), - ).toEqual(['maven_rules_kotlin_example']) + parseShowExtensionOutput( + 'DEBUG: noise\n\n## @@other//:extensions.bzl%other:\n\nFetched repositories:\n - foo (imported by )\n', + ), + ).toEqual([]) }) - it('parses maven_install name from legacy WORKSPACE', () => { + it('returns [] when Fetched repositories: is absent', () => { expect( - parseMavenRepoCandidates(path.join(FIXTURES, 'legacy-only')), - ).toEqual(['maven']) + parseShowExtensionOutput( + '## @@rules_jvm_external+//:extensions.bzl%maven:\n\nOther stuff\n', + ), + ).toEqual([]) }) - it('parses maven_install name from sibling .bzl file (legacy-with-load)', () => { - expect( - parseMavenRepoCandidates(path.join(FIXTURES, 'legacy-with-load')), - ).toEqual(['maven_legacy_app']) + it('stops at the next section header (multiple extensions in one report)', () => { + const input = + '## @@rules_jvm_external+//:extensions.bzl%maven:\n\nFetched repositories:\n - maven (imported by )\n - other (imported by foo)\n\n## @@rules_python+//:extensions.bzl%pip:\n\nFetched repositories:\n - pypi (imported by )\n' + expect(parseShowExtensionOutput(input)).toEqual(['maven', 'other']) }) - it('parses repo names containing hyphens and dots from static sources', () => { - const dir = mkdtempSync(path.join(os.tmpdir(), 'bazel-repos-')) - try { - writeFileSync( - path.join(dir, 'MODULE.bazel'), - 'use_repo(maven, "maven-prod", "third.party.maven")\n', - ) - writeFileSync( - path.join(dir, 'WORKSPACE'), - 'maven_install(name = "legacy-maven.prod", artifacts = [])\n', - ) - - expect(parseMavenRepoCandidates(dir)).toEqual([ - 'legacy-maven.prod', - 'maven-prod', - 'third.party.maven', - ]) - } finally { - rmSync(dir, { recursive: true, force: true }) + it('tolerates canonical-name separator variants (~ and +)', () => { + for (const sep of ['+', '~']) { + const input = `## @@rules_jvm_external${sep}//:extensions.bzl%maven:\n\nFetched repositories:\n - maven (imported by )\n` + expect(parseShowExtensionOutput(input)).toEqual(['maven']) } }) - it('returns empty array on a directory without bazel markers', () => { - // Use the fixtures root itself: no MODULE.bazel/WORKSPACE there. - expect(parseMavenRepoCandidates(FIXTURES)).toEqual([]) + it('deduplicates if the same hub appears twice (defensive)', () => { + const input = + '## @@rules_jvm_external+//:extensions.bzl%maven:\n\nFetched repositories:\n - maven (imported by )\n - maven (imported by foo)\n' + expect(parseShowExtensionOutput(input)).toEqual(['maven']) }) }) - describe('parseVisibleRepoCandidates', () => { - it('parses apparent repo names from dump_repo_mapping JSON output', () => { - const output = JSON.stringify({ - '': '', - '@invalid': 'canonical-invalid', - bazel_tools: 'bazel_tools', - maven: 'rules_jvm_external~~maven~maven', - 'maven-prod': 'rules_jvm_external~~maven~prod', - pypi: 'rules_python~~pip~pypi', - 'third.party.maven': 'rules_jvm_external~~maven~third_party', - }) - - expect(parseVisibleRepoCandidates(output)).toEqual([ - 'bazel_tools', - 'maven', - 'maven-prod', - 'pypi', - 'third.party.maven', - ]) - }) - - it('parses apparent repo names from streamed jsonproto output', () => { - const output = [ - JSON.stringify({ - repository: { - apparentName: '@maven', - canonicalName: 'rules_jvm_external~maven~maven', - }, - }), - JSON.stringify({ - repository: { - apparent_name: 'maven_rules_kotlin_example', - canonical_name: 'rules_jvm_external~maven~custom', - }, - }), - JSON.stringify({ - repository: { - apparentName: '@maven-prod', - canonicalName: 'rules_jvm_external~maven~prod', - }, - }), - JSON.stringify({ - repository: { - apparentName: 'third.party.maven', - canonicalName: 'rules_jvm_external~maven~third_party', - }, - }), - 'not json', - ].join('\n') - - expect(parseVisibleRepoCandidates(output)).toEqual([ - 'maven', - 'maven-prod', - 'maven_rules_kotlin_example', - 'third.party.maven', - ]) + describe('classifyProbeResult', () => { + it('classifies code=0 + non-empty stdout as populated', () => { + expect( + classifyProbeResult( + probeResult({ code: 0, stdout: '@maven//:guava\n' }), + ), + ).toBe('populated') }) - }) - describe('validateMavenRepo', () => { - it('accepts when probe stdout contains spaced maven_coordinates output', async () => { - const r = await validateMavenRepo('maven', acceptingProbe) - expect(r.valid).toBe(true) - expect(r.stdout).toContain('maven_coordinates') + it('classifies code=1 + "No repository visible" stderr as not-defined', () => { + expect( + classifyProbeResult( + probeResult({ + code: 1, + stderr: + "ERROR: No repository visible as '@nonexistent_repo_xyz' from main repository\n", + }), + ), + ).toBe('not-defined') }) - it('accepts when probe stdout contains compact maven_coordinates output', async () => { - const r = await validateMavenRepo('maven', compactAcceptingProbe) - expect(r.valid).toBe(true) - expect(r.stdout).toContain('maven_coordinates') + it('classifies code=1 + "no targets found beneath" stderr as empty', () => { + expect( + classifyProbeResult( + probeResult({ + code: 1, + stderr: + "WARNING: Evaluation of query \"@maven_install//...\" failed: no targets found beneath ''\n", + }), + ), + ).toBe('empty') }) - it('rejects when probe stdout lacks maven_coordinates=', async () => { - expect((await validateMavenRepo('not_maven', rejectingProbe)).valid).toBe( - false, - ) + it('classifies code=0 + empty stdout (WORKSPACE-mode silent miss) as not-defined', () => { + expect( + classifyProbeResult(probeResult({ code: 0, stdout: '' })), + ).toBe('not-defined') }) - it('rejects on non-zero exit code', async () => { + it('classifies code=1 + unrecognized stderr conservatively as not-defined', () => { expect( - (await validateMavenRepo('also_not_maven', failingProbe)).valid, - ).toBe(false) + classifyProbeResult( + probeResult({ code: 1, stderr: 'some other failure\n' }), + ), + ).toBe('not-defined') }) - it('rejects when probe throws', async () => { - expect((await validateMavenRepo('crash', throwingProbe)).valid).toBe( - false, - ) + it('classifies code=1 + "no such package" stderr as not-defined', () => { + expect( + classifyProbeResult( + probeResult({ + code: 1, + stderr: "ERROR: no such package '@unknown_repo//'\n", + }), + ), + ).toBe('not-defined') }) }) - describe('discoverMavenRepos', () => { - it('returns parsed candidates that the probe validates, with cached probe stdout', async () => { - // multi-repo-bzlmod parses to ['maven', 'maven_test']; the accepting probe - // validates both. The returned Map carries the probe stdout for each. - const result = await discoverMavenRepos( - path.join(FIXTURES, 'multi-repo-bzlmod'), - acceptingProbe, - ) - expect(Array.from(result.keys()).sort()).toEqual( - ['maven', 'maven_test'].sort(), - ) - for (const stdout of result.values()) { - expect(stdout).toContain('maven_coordinates') - } + describe('probeCandidate', () => { + it('returns the classified status from a probe', async () => { + const probe: RepoProbe = async () => ({ + code: 0, + stdout: '@maven//:guava\n', + stderr: '', + }) + expect(await probeCandidate('maven', probe)).toBe('populated') }) - it('uses native visible repo candidates instead of static parsing when provided', async () => { - const result = await discoverMavenRepos( - path.join(FIXTURES, 'multi-repo-bzlmod'), - acceptingProbe, - ['native_maven'], + it('returns not-defined when the probe throws', async () => { + const probe: RepoProbe = async () => { + throw new Error('bazel exploded') + } + expect(await probeCandidate('crash', probe)).toBe( + 'not-defined', ) - expect(Array.from(result.keys())).toEqual(['maven', 'native_maven']) }) + }) - it('filters out candidates the probe rejects', async () => { - // Probe accepts only when repo name === 'maven'; rejects 'maven_test'. - const result = await discoverMavenRepos( - path.join(FIXTURES, 'multi-repo-bzlmod'), - selectiveProbe, - ) - expect(Array.from(result.keys())).toEqual(['maven']) + describe('CONVENTIONAL_MAVEN_REPO_NAMES', () => { + it('includes the documented set', () => { + expect(CONVENTIONAL_MAVEN_REPO_NAMES).toEqual([ + 'maven', + 'maven_install', + 'maven_dev', + 'unpinned_maven', + 'maven_unpinned', + ]) }) }) @@ -262,83 +203,34 @@ describe('bazel-repo-discovery', () => { .join('\n') } - it('parseMavenRepoCandidates stays silent when verbose is unset', () => { - parseMavenRepoCandidates(path.join(FIXTURES, 'multi-repo-bzlmod')) + it('probeCandidate stays silent without verbose', async () => { + const probe: RepoProbe = async () => ({ + code: 0, + stdout: '@maven//:x\n', + stderr: '', + }) + await probeCandidate('maven', probe) expect(logSpy).not.toHaveBeenCalled() }) - it('parseMavenRepoCandidates emits scanned-files + candidate set when verbose=true', () => { - parseMavenRepoCandidates(path.join(FIXTURES, 'multi-repo-bzlmod'), true) - const text = loggedLines() - expect(text).toContain('discovery: scanned') - expect(text).toContain('MODULE.bazel') - expect(text).toContain('use_repo match') - expect(text).toContain('candidate set (pre-seed)') + it('probeCandidate logs the status under verbose', async () => { + const probe: RepoProbe = async () => ({ + code: 0, + stdout: '@maven//:x\n', + stderr: '', + }) + await probeCandidate('maven', probe, true) + expect(loggedLines()).toMatch(/probe @maven:\s*populated/) }) - it('validateMavenRepo logs ACCEPT under verbose', async () => { - await validateMavenRepo('maven', acceptingProbe, true) + it('probeCandidate logs the throw reason under verbose', async () => { + const probe: RepoProbe = async () => { + throw new Error('bazel exploded') + } + await probeCandidate('crash', probe, true) expect(loggedLines()).toMatch( - /probe @maven:\s*ACCEPT \(maven_coordinates marker found\)/, - ) - }) - - it('validateMavenRepo logs REJECT (no marker) under verbose', async () => { - await validateMavenRepo('not_maven', rejectingProbe, true) - expect(loggedLines()).toMatch(/probe @not_maven:\s*REJECT/) - }) - - it('validateMavenRepo logs REJECT (probe threw) under verbose', async () => { - await validateMavenRepo('crash', throwingProbe, true) - expect(loggedLines()).toMatch(/probe @crash:\s*REJECT \(probe threw\)/) - }) - - it('discoverMavenRepos propagates verbose into the full pipeline', async () => { - await discoverMavenRepos( - path.join(FIXTURES, 'multi-repo-bzlmod'), - selectiveProbe, - undefined, - true, + /probe @crash:\s*not-defined \(probe threw: bazel exploded\)/, ) - const text = loggedLines() - // Candidate-source label. - expect(text).toContain('candidate source: static parse') - // Seeded-and-deduped candidate set log. - expect(text).toContain('candidate set to probe') - // Per-candidate probe verdicts. - expect(text).toMatch(/probe @maven:\s*ACCEPT/) - expect(text).toMatch(/probe @maven_test:\s*REJECT/) - // Final validated set. - expect(text).toContain('validated repos') - }) - }) - - describe('DoS guard', () => { - it('completes parse on 1MB pathological input within 1s', () => { - // Synthesize a 1MB Bzlmod-shaped file in a tmp dir and feed it through - // parseMavenRepoCandidates. Exercises the bounded USE_REPO_RE + - // QUOTED_NAME_RE windows. - const dir = mkdtempSync(path.join(os.tmpdir(), 'bazel-discover-')) - try { - // Build the fixture content in a single pass (avoid O(n^2) join-in-loop). - const lines: string[] = [] - let totalLen = 0 - while (totalLen < 1_000_000) { - const line = 'use_repo(maven, "x_' + lines.length + '")' - lines.push(line) - // Plus 1 for the eventual newline separator. - totalLen += line.length + 1 - } - writeFileSync(path.join(dir, 'MODULE.bazel'), lines.join('\n')) - const start = process.hrtime.bigint() - const result = parseMavenRepoCandidates(dir) - const elapsed = process.hrtime.bigint() - start - expect(elapsed).toBeLessThan(1_000_000_000n) - // Verify the cap kicks in (length is bounded by MAX_CANDIDATES). - expect(result.length).toBeLessThanOrEqual(256) - } finally { - rmSync(dir, { recursive: true, force: true }) - } }) }) }) diff --git a/src/commands/manifest/bazel/bazel-workspace-walk.mts b/src/commands/manifest/bazel/bazel-workspace-walk.mts new file mode 100644 index 000000000..8f7d95654 --- /dev/null +++ b/src/commands/manifest/bazel/bazel-workspace-walk.mts @@ -0,0 +1,125 @@ +/** + * Walk the directory tree rooted at `cwd` and return every directory that + * looks like a Bazel workspace root — i.e. contains `MODULE.bazel`, + * `WORKSPACE`, or `WORKSPACE.bazel`. Real monorepos host multiple roots + * (e.g. `envoy/mobile/MODULE.bazel`, rules_kotlin's per-example + * `examples//MODULE.bazel`); the per-workspace algorithm in the + * orchestrator runs once per discovered root. + * + * The walker is dependency-injected with the directory-prune policy: + * callers pass the set of basenames and basename prefixes the walk must + * refuse to descend into. This module intentionally hardcodes none of + * the "common" prunes (`.git`, `node_modules`, …) — Bazel callers compose + * the codebase-wide `IGNORED_DIRS` list (`src/utils/glob.mts`) with the + * Bazel-specific bits (`bazel-*` output_base symlinks, + * `.socket-auto-manifest`, build-output `dist*`). + */ + +import { readdirSync } from 'node:fs' +import path from 'node:path' + +import { logger } from '@socketsecurity/registry/lib/logger' + +// Hard ceiling on number of workspace roots we will surface. Real monorepos +// have well under 50; this cap is a guard against pathological inputs. +const MAX_WORKSPACE_ROOTS = 256 +// Hard ceiling on directory walk depth. Deepest workspace marker observed +// across the OSS corpus surveyed is 9 (bazel-self test fixtures); deepest +// in realistic application code is 7 (checkmk's thirdparty layout). Cap +// is set to 8 — one level of headroom over the realistic max, while still +// guarding against pathological symlink loops that slipped past any +// prefix prune. +const MAX_WALK_DEPTH = 8 +// Files whose presence promotes a directory to a workspace root. +const WORKSPACE_MARKER_FILES = new Set([ + 'MODULE.bazel', + 'WORKSPACE', + 'WORKSPACE.bazel', +]) + +export type FindWorkspaceRootsOptions = { + cwd: string + // Directory basenames to skip outright (exact match). Pass the union of + // the codebase-wide ignore set (`IGNORED_DIRS` in `src/utils/glob.mts`) + // and any caller-specific additions (e.g. `.socket-auto-manifest`). + ignoreDirNames?: ReadonlySet + // Directory basename prefixes to skip. Bazel callers pass `['bazel-', + // 'dist']` so the walk never descends into Bazel's output_base symlinks + // or build-output directories. + ignoreDirPrefixes?: readonly string[] + verbose?: boolean +} + +const EMPTY_SET: ReadonlySet = new Set() +const EMPTY_ARRAY: readonly string[] = [] + +// Walks the tree rooted at `opts.cwd` and returns absolute paths to every +// directory that contains at least one workspace marker file. Output is +// sorted for determinism. +export function findWorkspaceRoots(opts: FindWorkspaceRootsOptions): string[] { + const { cwd, verbose } = opts + const ignoreDirNames = opts.ignoreDirNames ?? EMPTY_SET + const ignoreDirPrefixes = opts.ignoreDirPrefixes ?? EMPTY_ARRAY + const out: string[] = [] + // Tuple stack: [absolute dir, depth from cwd]. + const stack: Array<[string, number]> = [[cwd, 0]] + while (stack.length) { + if (out.length >= MAX_WORKSPACE_ROOTS) { + if (verbose) { + logger.log( + `[VERBOSE] workspace walker: hit MAX_WORKSPACE_ROOTS cap (${MAX_WORKSPACE_ROOTS}); truncating walk`, + ) + } + break + } + const next = stack.pop() + if (!next) { + break + } + const { 0: dir, 1: depth } = next + let entries + try { + entries = readdirSync(dir, { withFileTypes: true }) + } catch { + continue + } + // First pass: detect whether this dir is itself a workspace root. + let isWorkspaceRoot = false + for (const entry of entries) { + if (entry.isFile() && WORKSPACE_MARKER_FILES.has(entry.name)) { + isWorkspaceRoot = true + break + } + } + if (isWorkspaceRoot) { + out.push(dir) + } + // Second pass: schedule descents. We descend regardless of whether the + // current dir is itself a root — nested workspaces are common in + // monorepos (root MODULE.bazel + examples/*/MODULE.bazel). + if (depth + 1 > MAX_WALK_DEPTH) { + continue + } + for (const entry of entries) { + if (!entry.isDirectory()) { + continue + } + const name = entry.name + if (ignoreDirNames.has(name)) { + continue + } + let pruned = false + for (const prefix of ignoreDirPrefixes) { + if (name.startsWith(prefix)) { + pruned = true + break + } + } + if (pruned) { + continue + } + stack.push([path.join(dir, name), depth + 1]) + } + } + return out.sort() +} diff --git a/src/commands/manifest/bazel/bazel-workspace-walk.test.mts b/src/commands/manifest/bazel/bazel-workspace-walk.test.mts new file mode 100644 index 000000000..9ca249dce --- /dev/null +++ b/src/commands/manifest/bazel/bazel-workspace-walk.test.mts @@ -0,0 +1,149 @@ +import { + mkdirSync, + mkdtempSync, + rmSync, + symlinkSync, + writeFileSync, +} from 'node:fs' +import os from 'node:os' +import path from 'node:path' + +import { afterEach, beforeEach, describe, expect, it } from 'vitest' + +import { findWorkspaceRoots } from './bazel-workspace-walk.mts' + +function touch(file: string): void { + mkdirSync(path.dirname(file), { recursive: true }) + writeFileSync(file, '') +} + +// Standard prune set Bazel callers pass: the codebase-wide IGNORED_DIRS +// (.git, node_modules, etc.) plus the walker's own output dir, plus +// `bazel-*` output_base symlinks and `dist*` build outputs. Replicated +// inline here so the test stays decoupled from `src/utils/glob.mts`. +const BAZEL_IGNORE_NAMES: ReadonlySet = new Set([ + '.git', + '.hg', + '.idea', + '.pnpm-store', + '.socket-auto-manifest', + '.svn', + '.vscode', + 'node_modules', +]) +const BAZEL_IGNORE_PREFIXES: readonly string[] = ['bazel-', 'dist'] + +describe('bazel-workspace-walk', () => { + let tmp: string + + beforeEach(() => { + tmp = mkdtempSync(path.join(os.tmpdir(), 'sock-bazel-walk-')) + }) + + afterEach(() => { + rmSync(tmp, { recursive: true, force: true }) + }) + + describe('findWorkspaceRoots', () => { + it('returns the root when only the root has MODULE.bazel', () => { + touch(path.join(tmp, 'MODULE.bazel')) + expect(findWorkspaceRoots({ cwd: tmp })).toEqual([tmp]) + }) + + it('detects WORKSPACE and WORKSPACE.bazel as root markers', () => { + touch(path.join(tmp, 'WORKSPACE')) + expect(findWorkspaceRoots({ cwd: tmp })).toEqual([tmp]) + rmSync(path.join(tmp, 'WORKSPACE')) + touch(path.join(tmp, 'WORKSPACE.bazel')) + expect(findWorkspaceRoots({ cwd: tmp })).toEqual([tmp]) + }) + + it('finds nested workspaces at arbitrary depth', () => { + touch(path.join(tmp, 'MODULE.bazel')) + touch(path.join(tmp, 'examples', 'dagger', 'MODULE.bazel')) + touch(path.join(tmp, 'examples', 'android', 'nested', 'WORKSPACE.bazel')) + const found = findWorkspaceRoots({ cwd: tmp }).map(p => + path.relative(tmp, p), + ) + expect(found).toEqual(['', 'examples/android/nested', 'examples/dagger']) + }) + + it('returns [] when there is no workspace root', () => { + writeFileSync(path.join(tmp, 'README.md'), '') + expect(findWorkspaceRoots({ cwd: tmp })).toEqual([]) + }) + + it('does NOT prune by default — pruning policy is caller-supplied', () => { + touch(path.join(tmp, 'MODULE.bazel')) + touch(path.join(tmp, 'node_modules', 'MODULE.bazel')) + const found = findWorkspaceRoots({ cwd: tmp }).map(p => + path.relative(tmp, p), + ) + expect(found).toEqual(['', 'node_modules']) + }) + + it('prunes injected ignoreDirNames', () => { + touch(path.join(tmp, 'MODULE.bazel')) + for (const dir of ['node_modules', '.git', '.socket-auto-manifest']) { + touch(path.join(tmp, dir, 'sub', 'MODULE.bazel')) + } + const found = findWorkspaceRoots({ + cwd: tmp, + ignoreDirNames: BAZEL_IGNORE_NAMES, + }).map(p => path.relative(tmp, p)) + expect(found).toEqual(['']) + }) + + it('prunes injected ignoreDirPrefixes (bazel-* symlinks)', () => { + const fakeOutputBase = mkdtempSync( + path.join(os.tmpdir(), 'sock-fake-outbase-'), + ) + try { + mkdirSync(path.join(fakeOutputBase, 'external', 'maven'), { + recursive: true, + }) + touch(path.join(fakeOutputBase, 'external', 'maven', 'MODULE.bazel')) + symlinkSync(fakeOutputBase, path.join(tmp, 'bazel-out')) + touch(path.join(tmp, 'MODULE.bazel')) + const found = findWorkspaceRoots({ + cwd: tmp, + ignoreDirPrefixes: BAZEL_IGNORE_PREFIXES, + }).map(p => path.relative(tmp, p)) + expect(found).toEqual(['']) + } finally { + rmSync(fakeOutputBase, { recursive: true, force: true }) + } + }) + + it('prunes injected dist* prefix', () => { + touch(path.join(tmp, 'MODULE.bazel')) + touch(path.join(tmp, 'dist', 'MODULE.bazel')) + touch(path.join(tmp, 'distribution', 'MODULE.bazel')) + const found = findWorkspaceRoots({ + cwd: tmp, + ignoreDirPrefixes: BAZEL_IGNORE_PREFIXES, + }).map(p => path.relative(tmp, p)) + expect(found).toEqual(['']) + }) + + it('returns absolute, sorted paths', () => { + touch(path.join(tmp, 'z', 'MODULE.bazel')) + touch(path.join(tmp, 'a', 'MODULE.bazel')) + touch(path.join(tmp, 'm', 'MODULE.bazel')) + const found = findWorkspaceRoots({ cwd: tmp }) + expect(found).toEqual([ + path.join(tmp, 'a'), + path.join(tmp, 'm'), + path.join(tmp, 'z'), + ]) + for (const p of found) { + expect(path.isAbsolute(p)).toBe(true) + } + }) + + it('handles an unreadable directory by skipping it (no throw)', () => { + touch(path.join(tmp, 'MODULE.bazel')) + expect(findWorkspaceRoots({ cwd: path.join(tmp, 'nope') })).toEqual([]) + }) + }) +}) diff --git a/src/commands/manifest/bazel/extract_bazel_to_maven.mts b/src/commands/manifest/bazel/extract_bazel_to_maven.mts index 334b116db..612ee5198 100644 --- a/src/commands/manifest/bazel/extract_bazel_to_maven.mts +++ b/src/commands/manifest/bazel/extract_bazel_to_maven.mts @@ -2,37 +2,40 @@ import { existsSync, promises as fs, mkdirSync, - readFileSync, - realpathSync, + mkdtempSync, } from 'node:fs' +import os from 'node:os' import path from 'node:path' import { logger } from '@socketsecurity/registry/lib/logger' +import { spawn } from '@socketsecurity/registry/lib/spawn' import { resolveBazelBinary } from './bazel-bin-detect.mts' -import { - parseBazelBuildOutput, - parseUnsortedDepsJson, -} from './bazel-build-parser.mts' +import { runMetadataCqueryForRepo } from './bazel-cquery.mts' import { ensureJavaOnPath } from './bazel-java-shim.mts' import { validateOutputBase } from './bazel-output-base-check.mts' import { provisionPythonShim } from './bazel-python-shim.mts' import { - buildProbeFor, - runBazelModShowVisibleRepos, + buildMavenProbeFor, + runBazelModShowMavenExtension, } from './bazel-query-runner.mts' import { - discoverMavenRepos, - parseVisibleRepoCandidates, + CONVENTIONAL_MAVEN_REPO_NAMES, + parseShowExtensionOutput, + probeCandidate, } from './bazel-repo-discovery.mts' import { detectWorkspaceMode, getBazelInvocationFlags, } from './bazel-workspace-detect.mts' +import { findWorkspaceRoots } from './bazel-workspace-walk.mts' import { getErrorCause } from '../../../utils/errors.mts' +import { IGNORED_DIRS } from '../../../utils/glob.mts' +import type { CqueryRepoResult } from './bazel-cquery.mts' import type { ExtractedArtifact } from './bazel-build-parser.mts' import type { BazelQueryOptions } from './bazel-query-runner.mts' +import type { WorkspaceMode } from './bazel-workspace-detect.mts' export type ExtractBazelOptions = { bazelFlags: string | undefined @@ -42,9 +45,18 @@ export type ExtractBazelOptions = { cwd: string // Optional env override used for python-shim PATH augmentation. env?: NodeJS.ProcessEnv + // Customer-supplied Maven hub names augmenting the auto-discovery + // candidate set. Wired in by the `--bazel-maven-repo=` flag for + // legacy WORKSPACE workspaces whose hubs use non-conventional names + // (or custom Bzlmod extensions `mod show_extension` doesn't enumerate). + extraMavenRepoNames?: string[] | undefined out: string // Use the auto-manifest sibling directory instead of writing directly to `out`. outLayout?: 'flat' + // Per-repo cquery timeout in milliseconds. Auto-manifest default is 60s + // (the orchestrator's job is to not stall the wider scan); explicit + // invocations may bump it. + perRepoTimeoutMs?: number | undefined verbose: boolean } @@ -55,6 +67,25 @@ export type ExtractBazelResult = { ok: boolean } +const DEFAULT_PER_REPO_TIMEOUT_MS = 60_000 +const REAP_TIMEOUT_MS = 10_000 + +// Composed prune policy passed to the workspace walker. Reuses the +// codebase-wide `IGNORED_DIRS` and augments it with: the walker's own +// output dir (`.socket-auto-manifest`), VCS/IDE dirs not in the shared +// list (`.hg`, `.svn`, `.idea`, `.vscode`, `.pnpm-store`), Bazel's +// `bazel-*` output_base symlinks, and `dist*` build-output dirs. +const WORKSPACE_WALK_IGNORE_NAMES: ReadonlySet = new Set([ + ...IGNORED_DIRS, + '.hg', + '.idea', + '.pnpm-store', + '.socket-auto-manifest', + '.svn', + '.vscode', +]) +const WORKSPACE_WALK_IGNORE_PREFIXES: readonly string[] = ['bazel-', 'dist'] + type CoordPair = { groupArtifact: string; version: string } // Splits "g:a:v" -> { groupArtifact: "g:a", version: "v" }. @@ -171,15 +202,11 @@ export function normalizeToMavenInstallJson( } // Dependency keys in maven_install.json use "g:a" (no version), // matching the canonical rules_jvm_external lockfile shape. - // Only emit an entry when there are actual dependencies (lockfile omits - // artifacts with an empty dep list). const depKey = split.groupArtifact const depCoords = dependencySets.get(depKey) ?? new Set() for (const depLabel of a.deps) { - // First try our rule-label lookup (the common case for --output=build text). const c = depLabelToCoord(depLabel, labelToCoord) if (c) { - // c is "g:a:v"; strip the version to produce "g:a" per lockfile shape. const cs = splitCoord(c) depCoords.add(cs ? cs.groupArtifact : c) } else if ( @@ -187,9 +214,6 @@ export function normalizeToMavenInstallJson( !depLabel.startsWith('@') && !depLabel.startsWith(':') ) { - // unsorted_deps.json deps may be "g:a:v" in older files or - // "g:a" in v2 lock-file-shaped maps. Strip only when a version is - // present. const parts = depLabel.split(':') depCoords.add( parts.length >= 3 ? parts.slice(0, -1).join(':') : depLabel, @@ -206,127 +230,103 @@ export function normalizeToMavenInstallJson( return out } -// Resolves the bazel `external/` dir for the given workspace. -// -// Bazel's `bazel-out/` convenience symlink points at -// `/execroot//bazel-out/`; the `external/` dir we -// want is at `/external/`. `path.join` is purely lexical and -// would collapse `bazel-out/..` to the cwd itself, which is the wrong place -// Resolve the symlink at the filesystem level and walk up to -// `` instead. -function bazelExternalDir( - cwd: string, - outputBase: string | undefined, -): string | null { - if (outputBase) { - return path.join(outputBase, 'external') - } - const bazelOutLink = path.join(cwd, 'bazel-out') - if (!existsSync(bazelOutLink)) { - return null - } - try { - // realpath follows symlinks: ...//execroot//bazel-out - const real = realpathSync(bazelOutLink) - // Walk up bazel-out -> -> execroot -> , then into external/. - return path.join(real, '..', '..', '..', 'external') - } catch { - return null - } -} - -// Internal diagnostic: when truthy, skip the unsorted_deps.json fast path -// and force the bazel-query regex fallback. Used by bazel-bench to -// deterministically exercise parseBazelBuildOutput on every CI run. Truthy -// values are '1', 'true', 'yes' (case-insensitive); anything else (unset, -// '', '0', 'false') is treated as off. Not exposed as a user-facing CLI -// flag, so it is read here rather than added to constants.mts. -function isForceQueryFallbackEnabled(): boolean { - const raw = process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK'] - if (!raw) { - return false +// Cross-workspace dedup keyed on the full Maven coordinate string +// (`g:a:v[:classifier]`). The metadata cquery emits one entry per rule, +// so the same `androidx.annotation:annotation:1.8.2` can show up in +// `examples/dagger/@maven` and `examples/ksp/@maven` in rules_kotlin — +// downstream only needs it once. +function dedupArtifactsByCoord( + artifacts: ExtractedArtifact[], +): ExtractedArtifact[] { + const seen = new Set() + const out: ExtractedArtifact[] = [] + for (const a of artifacts) { + if (seen.has(a.mavenCoordinates)) { + continue + } + seen.add(a.mavenCoordinates) + out.push(a) } - const normalized = raw.toLowerCase() - return normalized === '1' || normalized === 'true' || normalized === 'yes' + return out } -// Tries `external//unsorted_deps.json` first; falls back to parsing the -// probe stdout the caller already captured during discovery. Discovery runs -// the same `kind("jvm_import rule|aar_import rule", @//:*)` query that -// extraction needs, so reusing its stdout skips one bazel-query invocation -// per repo on the unpinned path (where unsorted_deps.json isn't on disk). -async function extractFromOneRepo( - repoName: string, +// Build the per-workspace candidate Maven hub list. Bzlmod mode prefers +// `bazel mod show_extension`; WORKSPACE mode (and Bzlmod fallback when +// show_extension yields nothing) probes the conventional names plus any +// customer-supplied extras. Returns the list in discovery order. +async function discoverCandidatesForWorkspace( + workspaceRoot: string, + mode: WorkspaceMode, queryOpts: BazelQueryOptions, - cachedProbeStdout: string, -): Promise { - const verbose = queryOpts.verbose - // unsorted_deps.json lives under the bazel external dir. - // When --output_base is set, it's under that; otherwise under the workspace's - // bazel-out symlink (resolved via realpath, NOT lexical path.join — the - // lexical form would collapse `bazel-out/..` to cwd and miss the file). - const externalDir = bazelExternalDir(queryOpts.cwd, queryOpts.bazelOutputBase) - if (verbose) { - logger.log( - `[VERBOSE] @${repoName}: external dir:`, - externalDir ?? '(unresolved — bazel-out symlink absent)', - ) - } - const forceFallback = isForceQueryFallbackEnabled() - if (forceFallback && verbose) { - logger.log( - `[VERBOSE] @${repoName}: SOCKET_BAZEL_FORCE_QUERY_FALLBACK set; skipping unsorted_deps.json fast path.`, - ) - } - const candidates = forceFallback - ? [] - : externalDir - ? [path.join(externalDir, repoName, 'unsorted_deps.json')] - : [] - for (const c of candidates) { - if (existsSync(c)) { - // Bound the read to 1GB to prevent OOM on hostile content while allowing large real-world lockfiles. - // eslint-disable-next-line no-await-in-loop - const stat = await fs.stat(c) - if (stat.size > 1024 * 1024 * 1024) { - logger.warn( - `Skipping oversized ${c} (${stat.size} bytes); falling back to cached probe stdout.`, + extras: readonly string[], + verbose: boolean, +): Promise { + const candidates: string[] = [] + if (mode.bzlmod) { + const extResult = await runBazelModShowMavenExtension(queryOpts) + if (extResult.code === 0) { + candidates.push(...parseShowExtensionOutput(extResult.stdout)) + if (verbose) { + logger.log( + `[VERBOSE] workspace ${workspaceRoot}: show_extension yielded`, + candidates, ) - break - } - const json = readFileSync(c, 'utf8') - const parsed = parseUnsortedDepsJson(json) - if (parsed.length) { - if (verbose) { - logger.log( - `[VERBOSE] @${repoName}: source=unsorted_deps.json (${c}, ${parsed.length} artifact(s))`, - ) - } - return parsed.map(a => ({ ...a, sourceRepo: repoName })) } } else if (verbose) { - logger.log(`[VERBOSE] @${repoName}: unsorted_deps.json miss at`, c) + logger.log( + `[VERBOSE] workspace ${workspaceRoot}: show_extension failed (code=${extResult.code}); falling back to conventional probe`, + ) } } - // Reuse the probe stdout that discovery already captured for this repo. - // The probe ran exactly this query during validation and only validated - // repos with code === 0 make it into the cache, so retry is unnecessary - // — if the probe was flaky, the repo wouldn't be in the map. - if (!cachedProbeStdout) { - logger.warn( - `No cached probe stdout for @${repoName}; skipping. (This shouldn't happen — discovery should have populated it.)`, - ) - return [] + // Probe conventional names + extras for any candidate not already + // discovered. WORKSPACE mode relies entirely on the probe; Bzlmod + // mode uses it as a defensive fallback (e.g. custom Maven extensions + // mod show_extension doesn't enumerate). + const seen = new Set(candidates) + const probe = buildMavenProbeFor(queryOpts) + const toProbe = [...CONVENTIONAL_MAVEN_REPO_NAMES, ...extras].filter( + name => !seen.has(name), + ) + for (const name of toProbe) { + // eslint-disable-next-line no-await-in-loop + const status = await probeCandidate(name, probe, verbose) + if (status === 'populated') { + candidates.push(name) + seen.add(name) + } } - if (verbose) { - logger.log( - `[VERBOSE] @${repoName}: source=cached probe stdout (${cachedProbeStdout.length} bytes)`, + return candidates +} + +// Best-effort reap of a Bazel server. Spawned with a short timeout so +// a wedged server can't itself hang the cleanup; failures are swallowed +// because the caller will `rm -rf` the output_user_root regardless. +async function reapBazelServer( + bin: string, + outputUserRoot: string, +): Promise { + try { + await spawn( + bin, + [`--output_user_root=${outputUserRoot}`, 'shutdown'], + { timeout: REAP_TIMEOUT_MS }, ) + } catch { + // Server may already be dead, or shutdown itself timed out — the + // tempdir removal below is sufficient cleanup. + } +} + +async function removeTempdir(dir: string): Promise { + try { + await fs.rm(dir, { recursive: true, force: true }) + } catch { + // Best effort. The next CLI invocation lands a fresh tempdir. } - return parseBazelBuildOutput(cachedProbeStdout).map(a => ({ - ...a, - sourceRepo: repoName, - })) +} + +function makeOutputUserRoot(): string { + return mkdtempSync(path.join(os.tmpdir(), 'socket-bazel-')) } export async function extractBazelToMaven( @@ -341,102 +341,154 @@ export async function extractBazelToMaven( } logger.groupEnd() + const perRepoTimeoutMs = + opts.perRepoTimeoutMs ?? DEFAULT_PER_REPO_TIMEOUT_MS + const extras = opts.extraMavenRepoNames ?? [] + + // Validate config + ensure toolchains BEFORE we mint a tempdir. + let bin: string + let baseEnv: NodeJS.ProcessEnv | undefined try { - // Validate caller-provided Bazel filesystem settings before invoking Bazel. if (opts.bazelOutputBase) { validateOutputBase(opts.bazelOutputBase, opts.cwd) } - // Java must be available before rules_jvm_external/Coursier runs; - // python shim follows so its augmented PATH inherits the JDK prefix. ensureJavaOnPath() const shim = await provisionPythonShim() - const baseEnv = shim.augmentedEnv ?? opts.env + baseEnv = shim.augmentedEnv ?? opts.env + bin = await resolveBazelBinary(opts.bin) + } catch (e) { + logger.fail(`Unexpected error in bazel2maven: ${getErrorCause(e)}`) + if (verbose) { + logger.group('[VERBOSE] error:') + logger.log(e) + logger.groupEnd() + } + return { artifactCount: 0, ok: false } + } + logger.info(`Using bazel: ${bin}`) - // Step 1: workspace detection. - const mode = detectWorkspaceMode(cwd) - logger.info( - `Workspace mode: bzlmod=${mode.bzlmod} workspace=${mode.workspace}`, + // Track every output_user_root we mint so we can reap them all in + // the cleanup pass, even if a per-repo timeout forced a re-mint. + let outputUserRoot = makeOutputUserRoot() + const mintedRoots: string[] = [outputUserRoot] + if (verbose) { + logger.log( + `[VERBOSE] initial --output_user_root=${outputUserRoot} (will be reaped on completion)`, ) - const invocationFlags = getBazelInvocationFlags(mode) + } - // Step 2: bazel binary resolution. - const bin = await resolveBazelBinary(opts.bin) - logger.info(`Using bazel: ${bin}`) - if (verbose) { - logger.log('[VERBOSE] resolved options:', { - bin, - bazelRc: opts.bazelRc ?? '(unset)', - bazelOutputBase: opts.bazelOutputBase ?? '(unset)', - bazelFlags: opts.bazelFlags ?? '(unset)', - invocationFlags, - }) - } + let anyTimeout = false + let anyRepos = false + const allArtifacts: ExtractedArtifact[] = [] - // Step 3: build the shared query options object. - const queryOpts: BazelQueryOptions = { - bin, + try { + const workspaceRoots = findWorkspaceRoots({ cwd, - invocationFlags, - ...(opts.bazelRc ? { bazelRc: opts.bazelRc } : {}), - ...(opts.bazelFlags ? { bazelFlags: opts.bazelFlags } : {}), - ...(opts.bazelOutputBase - ? { bazelOutputBase: opts.bazelOutputBase } - : {}), - ...(baseEnv ? { env: baseEnv } : {}), + ignoreDirNames: WORKSPACE_WALK_IGNORE_NAMES, + ignoreDirPrefixes: WORKSPACE_WALK_IGNORE_PREFIXES, verbose, + }) + if (!workspaceRoots.length) { + logger.warn( + `No Bazel workspace found at ${cwd} or beneath (looked for MODULE.bazel / WORKSPACE / WORKSPACE.bazel).`, + ) + return { artifactCount: 0, noEcosystemFound: true, ok: false } + } + if (verbose) { + logger.log( + `[VERBOSE] discovered ${workspaceRoots.length} workspace root(s):`, + workspaceRoots, + ) } - // Step 4: discover validated Maven repos via the two-step recipe. - // Bzlmod has a native visible-repository surface; prefer that over static - // MODULE.bazel parsing and keep bounded parsing as the legacy/fallback path. - let nativeCandidates: string[] | undefined - if (mode.bzlmod) { - const visibleRepos = await runBazelModShowVisibleRepos(queryOpts) - if (visibleRepos.code === 0) { - nativeCandidates = parseVisibleRepoCandidates(visibleRepos.stdout) + for (const workspaceRoot of workspaceRoots) { + const relPath = path.relative(cwd, workspaceRoot) + let mode: WorkspaceMode + try { + mode = detectWorkspaceMode(workspaceRoot) + } catch (e) { if (verbose) { logger.log( - '[VERBOSE] Bzlmod visible repo candidates:', - nativeCandidates, + `[VERBOSE] workspace ${workspaceRoot}: detect failed (${getErrorCause(e)}); skipping`, ) } - } else if (verbose) { - logger.log( - '[VERBOSE] bazel mod show_repo failed; falling back to static candidate parsing:', - visibleRepos.stderr, - ) + continue } - } - // Returns Map so extraction can reuse the probe - // output and skip running an identical bazel-query a second time. - const probe = buildProbeFor(queryOpts) - const repos = await discoverMavenRepos( - cwd, - probe, - nativeCandidates, - verbose, - ) - const repoNames = Array.from(repos.keys()) - logger.info( - `Discovered ${repos.size} Maven repo(s): ${repoNames.join(', ') || '(none)'}`, - ) + logger.info( + `Workspace ${relPath || '.'}: bzlmod=${mode.bzlmod} workspace=${mode.workspace}`, + ) + const invocationFlags = getBazelInvocationFlags(mode) + const buildQueryOpts = ( + userRoot: string, + spawnCwd: string, + ): BazelQueryOptions => ({ + bin, + cwd: spawnCwd, + invocationFlags, + outputUserRoot: userRoot, + ...(opts.bazelRc ? { bazelRc: opts.bazelRc } : {}), + ...(opts.bazelFlags ? { bazelFlags: opts.bazelFlags } : {}), + ...(opts.bazelOutputBase + ? { bazelOutputBase: opts.bazelOutputBase } + : {}), + ...(baseEnv ? { env: baseEnv } : {}), + verbose, + }) - // Step 5: extract artifacts from each repo (preferring unsorted_deps.json). - const allArtifacts: ExtractedArtifact[] = [] - for (const [repo, probeStdout] of repos) { // eslint-disable-next-line no-await-in-loop - const artifacts = await extractFromOneRepo(repo, queryOpts, probeStdout) - allArtifacts.push(...artifacts) - logger.info(`@${repo}: ${artifacts.length} artifact(s)`) + const candidates = await discoverCandidatesForWorkspace( + workspaceRoot, + mode, + buildQueryOpts(outputUserRoot, workspaceRoot), + extras, + verbose, + ) + logger.info( + `Workspace ${relPath || '.'}: discovered ${candidates.length} Maven repo(s): ${ + candidates.join(', ') || '(none)' + }`, + ) + for (const repoName of candidates) { + anyRepos = true + // eslint-disable-next-line no-await-in-loop + const result: CqueryRepoResult = await runMetadataCqueryForRepo({ + opts: buildQueryOpts(outputUserRoot, workspaceRoot), + repoName, + timeoutMs: perRepoTimeoutMs, + workspaceRelPath: relPath, + workspaceRoot, + }) + allArtifacts.push(...result.artifacts) + if (result.status === 'ok' || result.status === 'partial') { + logger.info( + `@${repoName}: ${result.artifacts.length} artifact(s) (status=${result.status})`, + ) + } else if (result.status === 'timeout') { + logger.warn( + `@${repoName}: cquery timed out after ${perRepoTimeoutMs}ms; reaping server`, + ) + anyTimeout = true + // eslint-disable-next-line no-await-in-loop + await reapBazelServer(bin, outputUserRoot) + // eslint-disable-next-line no-await-in-loop + await removeTempdir(outputUserRoot) + outputUserRoot = makeOutputUserRoot() + mintedRoots.push(outputUserRoot) + if (verbose) { + logger.log( + `[VERBOSE] minted fresh --output_user_root=${outputUserRoot} after timeout`, + ) + } + } else if (verbose) { + logger.log( + `[VERBOSE] @${repoName}: status=${result.status} (no artifacts)`, + ) + } + } } - // Step 6: normalize to maven_install.json shape. - const normalized = normalizeToMavenInstallJson(allArtifacts) - - // Step 7: write outputs. - // Standalone output writes directly to `out`; auto-manifest uses a sibling directory - // to avoid colliding with a repo's checked-in rules_jvm_external lockfile and - // to avoid repo-root gitignore patterns such as `/maven_install.json`. + const deduped = dedupArtifactsByCoord(allArtifacts) + const normalized = normalizeToMavenInstallJson(deduped) const layout = opts.outLayout ?? 'standalone' const manifestDir = layout === 'flat' ? path.join(out, '.socket-auto-manifest') : out @@ -450,18 +502,15 @@ export async function extractBazelToMaven( if (verbose) { logger.log('[VERBOSE] outputs:', { - artifactCount: allArtifacts.length, - generatedManifest: path.relative(out, manifestPath), + artifactCount: deduped.length, + complete: !anyTimeout, layout, - manifest: manifestPath, - mavenRepos: repoNames, - tool: 'socket manifest bazel', - workspace: { bzlmod: mode.bzlmod, legacyWorkspace: mode.workspace }, + manifestPath, }) } - if (!allArtifacts.length) { - if (!repos.size) { + if (!deduped.length) { + if (!anyRepos) { if (verbose) { logger.info( 'No Maven artifacts extracted. failureCategory=no-supported-ecosystem', @@ -475,26 +524,19 @@ export async function extractBazelToMaven( } } logger.fail( - `Discovered Maven repo(s) ${repoNames.join(', ')} but extracted zero artifacts. failureCategory=ecosystem-detected-but-empty`, + 'Discovered Maven repo(s) but extracted zero artifacts. failureCategory=ecosystem-detected-but-empty', ) - return { - artifactCount: 0, - manifestPath, - ok: false, - } + return { artifactCount: 0, manifestPath, ok: false } } logger.success( - `Wrote ${allArtifacts.length} artifact(s) to ${path.relative(cwd, manifestPath)}.`, + `Wrote ${deduped.length} artifact(s) to ${path.relative(cwd, manifestPath)}.`, ) return { - artifactCount: allArtifacts.length, + artifactCount: deduped.length, manifestPath, - ok: true, + ok: !anyTimeout, } } catch (e) { - // Always surface the error message; users should not have to - // re-run a multi-minute bazel build with --verbose just to see whether - // the failure was a missing dependency, permission error, or network blip. logger.fail(`Unexpected error in bazel2maven: ${getErrorCause(e)}`) if (verbose) { logger.group('[VERBOSE] error:') @@ -504,5 +546,12 @@ export async function extractBazelToMaven( logger.info('Re-run with --verbose for the full stack.') } return { artifactCount: 0, ok: false } + } finally { + for (const dir of mintedRoots) { + // eslint-disable-next-line no-await-in-loop + await reapBazelServer(bin, dir) + // eslint-disable-next-line no-await-in-loop + await removeTempdir(dir) + } } } diff --git a/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts b/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts index 4d43c1da5..4e3923d27 100644 --- a/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts +++ b/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts @@ -3,7 +3,6 @@ import { mkdirSync, mkdtempSync, readFileSync, - readdirSync, rmSync, writeFileSync, } from 'node:fs' @@ -12,157 +11,153 @@ import path from 'node:path' import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' -// Mock the helpers BEFORE importing the orchestrator. -vi.mock('./bazel-workspace-detect.mts', () => ({ - detectWorkspaceMode: vi.fn(), - getBazelInvocationFlags: vi.fn(() => []), -})) +import type { CqueryRepoResult } from './bazel-cquery.mts' +import type { ExtractedArtifact } from './bazel-build-parser.mts' + +// Mock collaborators BEFORE importing the orchestrator. The orchestrator +// composes pure-function discovery + the metadata cquery + a workspace +// walker; mocking these lets us drive end-to-end behaviour without a +// real Bazel toolchain. vi.mock('./bazel-bin-detect.mts', () => ({ resolveBazelBinary: vi.fn(async () => '/usr/local/bin/bazel'), })) -vi.mock('./bazel-repo-discovery.mts', () => ({ - discoverMavenRepos: vi.fn(), - parseVisibleRepoCandidates: vi.fn(() => []), - parseMavenRepoCandidates: vi.fn(), - validateMavenRepo: vi.fn(), -})) -const { probe } = vi.hoisted(() => ({ - probe: async () => ({ code: 0, stdout: 'maven_coordinates=' }), -})) -vi.mock('./bazel-query-runner.mts', () => ({ - buildProbeFor: vi.fn(() => probe), - runBazelModShowVisibleRepos: vi.fn(async () => ({ - code: 0, - stderr: '', - stdout: '', - })), - runBazelQuery: vi.fn(), -})) -// Mock hardening helpers so unit tests run without real fs/network side-effects. vi.mock('./bazel-output-base-check.mts', () => ({ validateOutputBase: vi.fn(), })) +vi.mock('./bazel-java-shim.mts', () => ({ + ensureJavaOnPath: vi.fn(), +})) vi.mock('./bazel-python-shim.mts', () => ({ provisionPythonShim: vi.fn(async () => ({ augmentedEnv: undefined, shimDir: undefined, })), })) -// ensureJavaOnPath now throws when java is missing; unit tests must not -// depend on the host having a JDK installed. -vi.mock('./bazel-java-shim.mts', () => ({ - ensureJavaOnPath: vi.fn(), +vi.mock('./bazel-workspace-detect.mts', () => ({ + detectWorkspaceMode: vi.fn(), + getBazelInvocationFlags: vi.fn(() => []), +})) +vi.mock('./bazel-workspace-walk.mts', () => ({ + findWorkspaceRoots: vi.fn(), +})) +vi.mock('./bazel-query-runner.mts', () => ({ + buildMavenProbeFor: vi.fn(() => async (_: string) => ({ + code: 1, + stdout: '', + stderr: "ERROR: No repository visible as '@x' from main repository\n", + })), + runBazelModShowMavenExtension: vi.fn(), +})) +vi.mock('./bazel-repo-discovery.mts', async () => { + // Preserve `CONVENTIONAL_MAVEN_REPO_NAMES` + `probeCandidate` while + // overriding `parseShowExtensionOutput` with a spy. + const actual = await vi.importActual< + typeof import('./bazel-repo-discovery.mts') + >('./bazel-repo-discovery.mts') + return { + ...actual, + parseShowExtensionOutput: vi.fn(actual.parseShowExtensionOutput), + } +}) +vi.mock('./bazel-cquery.mts', () => ({ + runMetadataCqueryForRepo: vi.fn(), +})) +// Quiet the spawn calls reapBazelServer makes during cleanup. +vi.mock('@socketsecurity/registry/lib/spawn', () => ({ + spawn: vi.fn(async () => ({ code: 0, stdout: '', stderr: '' })), })) -import { validateOutputBase } from './bazel-output-base-check.mts' -import { discoverMavenRepos } from './bazel-repo-discovery.mts' +import { runMetadataCqueryForRepo } from './bazel-cquery.mts' +import { + buildMavenProbeFor, + runBazelModShowMavenExtension, +} from './bazel-query-runner.mts' +import { parseShowExtensionOutput } from './bazel-repo-discovery.mts' import { detectWorkspaceMode } from './bazel-workspace-detect.mts' +import { findWorkspaceRoots } from './bazel-workspace-walk.mts' import { extractBazelToMaven, normalizeToMavenInstallJson, } from './extract_bazel_to_maven.mts' -const FIXTURES = path.join( - import.meta.dirname, - '..', - '..', - '..', - '..', - 'test', - 'fixtures', - 'manifest-bazel', - 'query-output', -) +const mkResult = (over: Partial): CqueryRepoResult => ({ + artifacts: [], + durationMs: 0, + repoName: 'maven', + status: 'ok', + stderr: '', + workspaceRelPath: '', + ...over, +}) -// Walk a directory recursively and return all file paths. -function walk(dir: string): string[] { - const acc: string[] = [] - for (const e of readdirSync(dir, { withFileTypes: true })) { - const p = path.join(dir, e.name) - if (e.isDirectory()) { - acc.push(...walk(p)) - } else { - acc.push(p) - } - } - return acc -} +const mkArt = ( + coord: string, + ruleName: string, + over: Partial = {}, +): ExtractedArtifact => ({ + deps: [], + mavenCoordinates: coord, + ruleKind: 'jvm_import', + ruleName, + sourceRepo: 'maven', + ...over, +}) + +const SHOW_EXT_HUB_ONLY = `## @@rules_jvm_external+//:extensions.bzl%maven: + +Fetched repositories: + - maven (imported by ) +` describe('extractBazelToMaven', () => { let tmp: string beforeEach(() => { - tmp = mkdtempSync(path.join(os.tmpdir(), 'bazel-extract-')) + tmp = mkdtempSync(path.join(os.tmpdir(), 'sock-bazel-x2m-')) vi.mocked(detectWorkspaceMode).mockReturnValue({ bzlmod: true, workspace: false, }) - process.exitCode = 0 + vi.mocked(findWorkspaceRoots).mockReturnValue([tmp]) + vi.mocked(runBazelModShowMavenExtension).mockResolvedValue({ + code: 0, + stdout: SHOW_EXT_HUB_ONLY, + stderr: '', + }) + vi.mocked(parseShowExtensionOutput).mockClear() + vi.mocked(runBazelModShowMavenExtension).mockClear() + vi.mocked(runMetadataCqueryForRepo).mockReset() + vi.mocked(buildMavenProbeFor).mockReset() + vi.mocked(buildMavenProbeFor).mockReturnValue(async () => ({ + code: 1, + stdout: '', + stderr: "ERROR: No repository visible as '@x' from main repository\n", + })) }) afterEach(() => { rmSync(tmp, { recursive: true, force: true }) - vi.resetAllMocks() - process.exitCode = 0 - }) - - it('dedupes exact duplicate coordinates without failing', () => { - const manifest = normalizeToMavenInstallJson([ - { - ruleKind: 'jvm_import', - ruleName: 'com_example_demo', - mavenCoordinates: 'com.example:demo:1.0.0', - deps: [], - }, - { - ruleKind: 'jvm_import', - ruleName: 'com_example_demo', - mavenCoordinates: 'com.example:demo:1.0.0', - deps: [], - }, - ]) - - expect(Object.keys(manifest.artifacts)).toEqual(['com.example:demo']) - expect(manifest.artifacts['com.example:demo']).toEqual({ - shasums: {}, - version: '1.0.0', - }) - }) - - it('fails on duplicate label suffixes when dependency resolution is ambiguous', () => { - expect(() => - normalizeToMavenInstallJson([ - { - ruleKind: 'jvm_import', - ruleName: 'root', - mavenCoordinates: 'com.example:root:1.0.0', - deps: [':shared_rule_name'], - }, - { - ruleKind: 'jvm_import', - ruleName: 'shared_rule_name', - mavenCoordinates: 'com.one:lib:1.0.0', - deps: [], - }, - { - ruleKind: 'jvm_import', - ruleName: 'shared_rule_name', - mavenCoordinates: 'com.two:lib:1.0.0', - deps: [], - }, - ]), - ).toThrow(/Ambiguous Bazel dependency label :shared_rule_name/) }) - it('writes maven_install.json directly under out without a summary sidecar', async () => { - const sample = readFileSync( - path.join(FIXTURES, 'jvm-import-sample.txt'), - 'utf8', - ) - vi.mocked(discoverMavenRepos).mockResolvedValue( - new Map([['maven', sample]]), + function readManifest(out: string): unknown { + return JSON.parse( + readFileSync( + path.join(out, '.socket-auto-manifest', 'maven_install.json'), + 'utf8', + ), ) + } + it('extracts a single Bzlmod workspace end-to-end', async () => { + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [ + mkArt('com.google.guava:guava:33.0.0-jre', 'com_google_guava_guava'), + mkArt('androidx.annotation:annotation:1.8.2', 'androidx_annotation'), + ], + repoName: 'maven', + }), + ) const result = await extractBazelToMaven({ bazelFlags: undefined, bazelOutputBase: undefined, @@ -170,43 +165,22 @@ describe('extractBazelToMaven', () => { bin: undefined, cwd: tmp, out: tmp, + outLayout: 'flat', verbose: false, }) - expect(result).toEqual({ - artifactCount: 2, - manifestPath: path.join(tmp, 'maven_install.json'), - ok: true, - }) - - const manifestText = readFileSync( - path.join(tmp, 'maven_install.json'), - 'utf8', - ) - const manifest = JSON.parse(manifestText) - expect(manifest.artifacts['com.google.guava:guava']).toEqual({ - shasums: { jar: expect.stringMatching(/^9408c2c4/) }, - version: '33.0.0-jre', - }) - // Per the canonical rules_jvm_external maven_install.json shape (see - // normalizeToMavenInstallJson), dependency keys and values use "g:a" - // (no version) — matching rules_jvm_external lockfile output. - expect(manifest.dependencies['com.google.guava:guava']).toContain( - 'com.google.guava:failureaccess', - ) - - expect(existsSync(path.join(tmp, 'socket-bazel-summary.json'))).toBe(false) - expect(existsSync(path.join(tmp, '_whole_repo'))).toBe(false) + expect(result.ok).toBe(true) + expect(result.artifactCount).toBe(2) + const manifest = readManifest(tmp) as { + artifacts: Record + } + expect(Object.keys(manifest.artifacts).sort()).toEqual([ + 'androidx.annotation:annotation', + 'com.google.guava:guava', + ]) }) - it('writes outputs to .socket-auto-manifest/ when outLayout is "flat"', async () => { - const sample = readFileSync( - path.join(FIXTURES, 'jvm-import-sample.txt'), - 'utf8', - ) - vi.mocked(discoverMavenRepos).mockResolvedValue( - new Map([['maven', sample]]), - ) - + it('returns noEcosystemFound when no workspace roots are discovered', async () => { + vi.mocked(findWorkspaceRoots).mockReturnValue([]) const result = await extractBazelToMaven({ bazelFlags: undefined, bazelOutputBase: undefined, @@ -217,40 +191,14 @@ describe('extractBazelToMaven', () => { outLayout: 'flat', verbose: false, }) - expect(result).toEqual({ - artifactCount: 2, - manifestPath: path.join( - tmp, - '.socket-auto-manifest', - 'maven_install.json', - ), - ok: true, - }) - - // Manifest lands inside the sibling dir. - expect( - existsSync(path.join(tmp, '.socket-auto-manifest', 'maven_install.json')), - ).toBe(true) - expect( - existsSync( - path.join(tmp, '.socket-auto-manifest', 'socket-bazel-summary.json'), - ), - ).toBe(false) - // Neither output bleeds into / itself nor a _whole_repo/ wrapper. - expect(existsSync(path.join(tmp, 'maven_install.json'))).toBe(false) - expect(existsSync(path.join(tmp, 'socket-bazel-summary.json'))).toBe(false) - expect(existsSync(path.join(tmp, '_whole_repo'))).toBe(false) + expect(result.ok).toBe(false) + expect(result.noEcosystemFound).toBe(true) }) - it('writes NO .socket.facts.json files anywhere under out', async () => { - const sample = readFileSync( - path.join(FIXTURES, 'jvm-import-sample.txt'), - 'utf8', - ) - vi.mocked(discoverMavenRepos).mockResolvedValue( - new Map([['maven', sample]]), + it('reports detected-but-empty when discovered repos extract zero artifacts', async () => { + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ artifacts: [], status: 'empty', repoName: 'maven' }), ) - const result = await extractBazelToMaven({ bazelFlags: undefined, bazelOutputBase: undefined, @@ -258,19 +206,41 @@ describe('extractBazelToMaven', () => { bin: undefined, cwd: tmp, out: tmp, + outLayout: 'flat', verbose: false, }) - - const files = walk(tmp) - expect( - files.find(f => path.basename(f) === '.socket.facts.json'), - ).toBeUndefined() - expect(result.ok).toBe(true) + expect(result.ok).toBe(false) + expect(result.noEcosystemFound).toBeUndefined() }) - it('reports noEcosystemFound without mutating process.exitCode when no repos discovered', async () => { - vi.mocked(discoverMavenRepos).mockResolvedValue(new Map()) - + it('dedups artifacts across multiple workspaces by full Maven coordinate', async () => { + const nested = path.join(tmp, 'examples', 'dagger') + mkdirSync(nested, { recursive: true }) + vi.mocked(findWorkspaceRoots).mockReturnValue([tmp, nested]) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [ + mkArt('com.google.guava:guava:33.0.0-jre', 'com_google_guava_guava'), + ], + repoName: 'maven', + workspaceRelPath: '', + }), + ) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [ + // Same coord as the root workspace — must be deduped. + mkArt('com.google.guava:guava:33.0.0-jre', 'com_google_guava_guava', { + sourceRepo: 'examples/dagger:maven', + }), + mkArt('com.google.dagger:dagger:2.50', 'com_google_dagger_dagger', { + sourceRepo: 'examples/dagger:maven', + }), + ], + repoName: 'maven', + workspaceRelPath: 'examples/dagger', + }), + ) const result = await extractBazelToMaven({ bazelFlags: undefined, bazelOutputBase: undefined, @@ -278,30 +248,41 @@ describe('extractBazelToMaven', () => { bin: undefined, cwd: tmp, out: tmp, + outLayout: 'flat', verbose: false, }) + expect(result.artifactCount).toBe(2) + const manifest = readManifest(tmp) as { + artifacts: Record + } + expect(Object.keys(manifest.artifacts).sort()).toEqual([ + 'com.google.dagger:dagger', + 'com.google.guava:guava', + ]) + }) - expect(process.exitCode).toBe(0) - expect(result).toEqual({ - artifactCount: 0, - manifestPath: path.join(tmp, 'maven_install.json'), - noEcosystemFound: true, - ok: false, + it('reports ok:false on per-repo timeout but keeps going', async () => { + // Two candidates: first times out, second succeeds. The orchestrator + // re-mints --output_user_root after the timeout. + vi.mocked(runBazelModShowMavenExtension).mockResolvedValue({ + code: 0, + stdout: `## @@rules_jvm_external+//:extensions.bzl%maven: + +Fetched repositories: + - maven (imported by ) + - maven_dev (imported by ) +`, + stderr: '', }) - // Empty manifest is still written. - const manifestText = readFileSync( - path.join(tmp, 'maven_install.json'), - 'utf8', + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ artifacts: [], status: 'timeout', repoName: 'maven' }), ) - const manifest = JSON.parse(manifestText) - expect(manifest.artifacts).toEqual({}) - }) - - it('reports hard failure when discovered repos extract zero artifacts', async () => { - vi.mocked(discoverMavenRepos).mockResolvedValue( - new Map([['maven', '# no parseable rules\n']]), + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:after:1.0', 'after')], + repoName: 'maven_dev', + }), ) - const result = await extractBazelToMaven({ bazelFlags: undefined, bazelOutputBase: undefined, @@ -309,319 +290,186 @@ describe('extractBazelToMaven', () => { bin: undefined, cwd: tmp, out: tmp, + outLayout: 'flat', + perRepoTimeoutMs: 60_000, verbose: false, }) - - expect(result).toEqual({ - artifactCount: 0, - manifestPath: path.join(tmp, 'maven_install.json'), - ok: false, - }) - expect(result.noEcosystemFound).toBeUndefined() + expect(result.ok).toBe(false) + expect(result.artifactCount).toBe(1) }) - it('iterates each discovered repo independently when one has no parseable rules', async () => { - const sample = readFileSync( - path.join(FIXTURES, 'jvm-import-sample.txt'), - 'utf8', - ) - // First repo's probe stdout has the canonical sample (2 artifacts). - // Second repo's probe stdout has no parseable jvm_import / aar_import - // blocks, so the parser yields 0 artifacts for it — the iteration must - // still surface the first repo's results. - vi.mocked(discoverMavenRepos).mockResolvedValue( - new Map([ - ['maven', sample], - ['maven_test', '# no rules here\n'], - ]), + it('threads extraMavenRepoNames into the candidate list (WORKSPACE mode)', async () => { + vi.mocked(detectWorkspaceMode).mockReturnValue({ + bzlmod: false, + workspace: true, + }) + // Probe accepts only `my_jars`; conventional names all return not-defined. + vi.mocked(buildMavenProbeFor).mockReturnValue(async (name: string) => { + if (name === 'my_jars') { + return { code: 0, stdout: '@my_jars//:foo\n', stderr: '' } + } + return { + code: 1, + stdout: '', + stderr: "ERROR: No repository visible as '@x' from main repository\n", + } + }) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:custom:1.0', 'custom')], + repoName: 'my_jars', + }), ) - const result = await extractBazelToMaven({ bazelFlags: undefined, bazelOutputBase: undefined, bazelRc: undefined, bin: undefined, cwd: tmp, + extraMavenRepoNames: ['my_jars'], out: tmp, + outLayout: 'flat', verbose: false, }) - - const manifest = JSON.parse( - readFileSync(path.join(tmp, 'maven_install.json'), 'utf8'), + expect(result.ok).toBe(true) + expect(result.artifactCount).toBe(1) + expect(runMetadataCqueryForRepo).toHaveBeenCalledTimes(1) + expect(vi.mocked(runMetadataCqueryForRepo).mock.calls[0]![0]).toMatchObject( + { repoName: 'my_jars' }, ) - // Only the successful repo's artifacts (2); maven_test was skipped. - expect(Object.keys(manifest.artifacts)).toHaveLength(2) - expect(result).toEqual({ - artifactCount: 2, - manifestPath: path.join(tmp, 'maven_install.json'), - ok: true, - }) + // show_extension must NOT be called in pure WORKSPACE mode. + expect(runBazelModShowMavenExtension).not.toHaveBeenCalled() }) - it('returns failure without mutating process.exitCode when one group:artifact has conflicting versions', async () => { - const conflictingStdout = [ - 'jvm_import(', - ' name = "com_example_demo_v1",', - ' maven_coordinates = "com.example:demo:1.0.0",', - ')', - 'jvm_import(', - ' name = "com_example_demo_v2",', - ' maven_coordinates = "com.example:demo:2.0.0",', - ')', - ].join('\n') - vi.mocked(discoverMavenRepos).mockResolvedValue( - new Map([['maven', conflictingStdout]]), + it('writes maven_install.json into .socket-auto-manifest in flat layout', async () => { + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + }), ) - - const result = await extractBazelToMaven({ + await extractBazelToMaven({ bazelFlags: undefined, bazelOutputBase: undefined, bazelRc: undefined, bin: undefined, cwd: tmp, out: tmp, + outLayout: 'flat', verbose: false, }) - - expect(process.exitCode).toBe(0) - expect(result).toEqual({ - artifactCount: 0, - ok: false, - }) - expect(existsSync(path.join(tmp, 'maven_install.json'))).toBe(false) + expect( + existsSync(path.join(tmp, '.socket-auto-manifest', 'maven_install.json')), + ).toBe(true) }) +}) - it('calls validateOutputBase when bazelOutputBase is set', async () => { - vi.mocked(discoverMavenRepos).mockResolvedValue(new Map()) - await extractBazelToMaven({ - bazelFlags: undefined, - bazelOutputBase: tmp, - bazelRc: undefined, - bin: undefined, - cwd: tmp, - out: tmp, - verbose: false, - }) - // validateOutputBase is mocked; verify it was called with the provided path. - expect(vi.mocked(validateOutputBase)).toHaveBeenCalledWith(tmp, tmp) +describe('normalizeToMavenInstallJson', () => { + it('dedupes exact duplicate coordinates without failing', () => { + const result = normalizeToMavenInstallJson([ + { + deps: [], + mavenCoordinates: 'com.google.guava:guava:33.0.0-jre', + ruleKind: 'jvm_import', + ruleName: 'com_google_guava_guava', + }, + { + deps: [], + mavenCoordinates: 'com.google.guava:guava:33.0.0-jre', + ruleKind: 'jvm_import', + ruleName: 'com_google_guava_guava', + }, + ]) + expect(Object.keys(result.artifacts)).toEqual(['com.google.guava:guava']) }) - it('propagates verbose into discovery and emits resolved-options / outputs diagnostics', async () => { - const sample = readFileSync( - path.join(FIXTURES, 'jvm-import-sample.txt'), - 'utf8', - ) - vi.mocked(discoverMavenRepos).mockResolvedValue( - new Map([['maven', sample]]), - ) - const { logger } = await import('@socketsecurity/registry/lib/logger') - const logSpy = vi.spyOn(logger, 'log').mockImplementation(() => logger) - - try { - await extractBazelToMaven({ - bazelFlags: undefined, - bazelOutputBase: undefined, - bazelRc: undefined, - bin: undefined, - cwd: tmp, - out: tmp, - verbose: true, - }) - - const text = logSpy.mock.calls - .map(args => - args - .map(a => (typeof a === 'string' ? a : JSON.stringify(a))) - .join(' '), - ) - .join('\n') - // Resolved-options block — names a few known load-bearing fields. - expect(text).toContain('[VERBOSE] resolved options:') - expect(text).toContain('bin') - expect(text).toContain('bazelRc') - expect(text).toContain('bazelOutputBase') - // Outputs block names manifest path and extracted summary fields. - expect(text).toContain('[VERBOSE] outputs:') - expect(text).toContain('manifest') - expect(text).toContain('artifactCount') - expect(text).toContain('generatedManifest') - expect(text).toContain('mavenRepos') - - // Discovery was called with verbose=true as the 4th positional. The - // 3rd positional reflects whatever parseVisibleRepoCandidates returned - // (an empty array in this mocked setup). - expect(vi.mocked(discoverMavenRepos)).toHaveBeenCalledWith( - expect.any(String), - expect.any(Function), - expect.any(Array), - true, - ) - } finally { - logSpy.mockRestore() - } + it('fails on conflicting versions for the same group:artifact', () => { + expect(() => + normalizeToMavenInstallJson([ + { + deps: [], + mavenCoordinates: 'com.example:lib:1.0', + ruleKind: 'jvm_import', + ruleName: 'a', + }, + { + deps: [], + mavenCoordinates: 'com.example:lib:2.0', + ruleKind: 'jvm_import', + ruleName: 'b', + }, + ]), + ).toThrow(/Conflicting versions/) }) -}) -describe('SOCKET_BAZEL_FORCE_QUERY_FALLBACK', () => { - // These tests pit two parsers against each other by giving each a - // coordinate the other does not produce, then assert which one ran by - // checking which coordinate landed in the manifest. - // - unsorted_deps.json (fast path) → `com.example:from-json:9.9.9` - // - cached probe stdout (regex fallback) → `com.example:from-regex:1.0.0` - const FAST_PATH_JSON = JSON.stringify({ - artifacts: [ + it('preserves the first artifact’s sha256 when subsequent dupes lack one', () => { + const result = normalizeToMavenInstallJson([ + { + deps: [], + mavenCoordinates: 'com.example:lib:1.0', + mavenSha256: 'a'.repeat(64), + ruleKind: 'jvm_import', + ruleName: 'a', + }, { - coordinates: 'com.example:from-json:9.9.9', - url: 'https://example.invalid/from-json-9.9.9.jar', - sha256: - '1111111111111111111111111111111111111111111111111111111111111111', deps: [], + mavenCoordinates: 'com.example:lib:1.0', + ruleKind: 'jvm_import', + ruleName: 'a', }, - ], + ]) + expect(result.artifacts['com.example:lib']?.shasums.jar).toBe('a'.repeat(64)) }) +}) - const FALLBACK_PROBE_STDOUT = [ - 'jvm_import(', - ' name = "com_example_from_regex",', - ' jars = ["@maven//:from-regex-1.0.0.jar"],', - ' maven_coordinates = "com.example:from-regex:1.0.0",', - ' deps = [],', - ')', - '', - ].join('\n') - +describe('fixture-driven write-output', () => { let tmp: string - let originalEnv: string | undefined beforeEach(() => { - tmp = mkdtempSync(path.join(os.tmpdir(), 'bazel-extract-fallback-')) - // Place unsorted_deps.json under /external/maven/. - // This is what bazelExternalDir resolves to when bazelOutputBase is set. - const externalRepoDir = path.join(tmp, 'external', 'maven') - mkdirSync(externalRepoDir, { recursive: true }) - writeFileSync( - path.join(externalRepoDir, 'unsorted_deps.json'), - FAST_PATH_JSON, - 'utf8', - ) + tmp = mkdtempSync(path.join(os.tmpdir(), 'sock-bazel-write-')) vi.mocked(detectWorkspaceMode).mockReturnValue({ bzlmod: true, workspace: false, }) - vi.mocked(discoverMavenRepos).mockResolvedValue( - new Map([['maven', FALLBACK_PROBE_STDOUT]]), + vi.mocked(findWorkspaceRoots).mockReturnValue([tmp]) + vi.mocked(runBazelModShowMavenExtension).mockResolvedValue({ + code: 0, + stdout: SHOW_EXT_HUB_ONLY, + stderr: '', + }) + vi.mocked(runMetadataCqueryForRepo).mockReset() + vi.mocked(runMetadataCqueryForRepo).mockResolvedValue( + mkResult({ + artifacts: [mkArt('com.example:lib:1.0', 'lib')], + repoName: 'maven', + }), ) - originalEnv = process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK'] - process.exitCode = 0 }) afterEach(() => { - if (originalEnv === undefined) { - delete process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK'] - } else { - process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK'] = originalEnv - } rmSync(tmp, { recursive: true, force: true }) - vi.resetAllMocks() - process.exitCode = 0 - }) - - it('uses the unsorted_deps.json fast path when the env var is unset', async () => { - delete process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK'] - - const result = await extractBazelToMaven({ - bazelFlags: undefined, - bazelOutputBase: tmp, - bazelRc: undefined, - bin: undefined, - cwd: tmp, - out: tmp, - verbose: false, - }) - - expect(result.ok).toBe(true) - const manifest = JSON.parse( - readFileSync(path.join(tmp, 'maven_install.json'), 'utf8'), - ) - // The JSON parser ran: from-json coord is present, from-regex is absent. - expect(manifest.artifacts['com.example:from-json']).toBeDefined() - expect(manifest.artifacts['com.example:from-regex']).toBeUndefined() }) - it('skips the unsorted_deps.json fast path and uses the regex fallback when the env var is "1"', async () => { - process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK'] = '1' - - const result = await extractBazelToMaven({ - bazelFlags: undefined, - bazelOutputBase: tmp, - bazelRc: undefined, - bin: undefined, - cwd: tmp, - out: tmp, - verbose: false, - }) - - expect(result.ok).toBe(true) - const manifest = JSON.parse( - readFileSync(path.join(tmp, 'maven_install.json'), 'utf8'), - ) - // The regex parser ran: from-regex coord is present, from-json is absent. - expect(manifest.artifacts['com.example:from-regex']).toBeDefined() - expect(manifest.artifacts['com.example:from-json']).toBeUndefined() - }) - - it.each([ - ['unset', undefined], - ['empty string', ''], - ['"0"', '0'], - ['"false"', 'false'], - ])('treats %s as falsy and uses the fast path', async (_label, value) => { - if (value === undefined) { - delete process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK'] - } else { - process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK'] = value - } - - const result = await extractBazelToMaven({ - bazelFlags: undefined, - bazelOutputBase: tmp, - bazelRc: undefined, - bin: undefined, - cwd: tmp, - out: tmp, - verbose: false, - }) - - expect(result.ok).toBe(true) - const manifest = JSON.parse( - readFileSync(path.join(tmp, 'maven_install.json'), 'utf8'), - ) - expect(manifest.artifacts['com.example:from-json']).toBeDefined() - expect(manifest.artifacts['com.example:from-regex']).toBeUndefined() - }) - - it.each([ - ['"1"', '1'], - ['"true"', 'true'], - ['"YES"', 'YES'], - ])('treats %s as truthy and forces the fallback', async (_label, value) => { - process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK'] = value - - const result = await extractBazelToMaven({ + it('does not emit any .socket.facts.json file (Maven path is BOM-only)', async () => { + const outDir = path.join(tmp, 'out') + mkdirSync(outDir, { recursive: true }) + // Sanity: ensure unrelated files in out/ are not touched. + writeFileSync(path.join(outDir, 'README.md'), '') + await extractBazelToMaven({ bazelFlags: undefined, - bazelOutputBase: tmp, + bazelOutputBase: undefined, bazelRc: undefined, bin: undefined, cwd: tmp, - out: tmp, + out: outDir, + outLayout: 'flat', verbose: false, }) - - expect(result.ok).toBe(true) - const manifest = JSON.parse( - readFileSync(path.join(tmp, 'maven_install.json'), 'utf8'), - ) - expect(manifest.artifacts['com.example:from-regex']).toBeDefined() - expect(manifest.artifacts['com.example:from-json']).toBeUndefined() + expect( + existsSync(path.join(outDir, '.socket-auto-manifest', '.socket.facts.json')), + ).toBe(false) + expect( + existsSync(path.join(outDir, '.socket-auto-manifest', 'maven_install.json')), + ).toBe(true) }) }) diff --git a/src/commands/manifest/bazel/extract_bazel_to_pypi.mts b/src/commands/manifest/bazel/extract_bazel_to_pypi.mts index c23f4fe6b..95385df42 100644 --- a/src/commands/manifest/bazel/extract_bazel_to_pypi.mts +++ b/src/commands/manifest/bazel/extract_bazel_to_pypi.mts @@ -25,7 +25,7 @@ import { runBazelModShowVisibleRepos, runBazelQuery, } from './bazel-query-runner.mts' -import { parseVisibleRepoCandidates } from './bazel-repo-discovery.mts' +import { parseVisibleRepoCandidates } from './bazel-pypi-discovery.mts' import { detectWorkspaceMode, getBazelInvocationFlags, diff --git a/src/commands/manifest/bazel/extract_bazel_to_pypi.test.mts b/src/commands/manifest/bazel/extract_bazel_to_pypi.test.mts index 652d4eb40..df3d4a1b0 100644 --- a/src/commands/manifest/bazel/extract_bazel_to_pypi.test.mts +++ b/src/commands/manifest/bazel/extract_bazel_to_pypi.test.mts @@ -29,13 +29,13 @@ vi.mock('./bazel-pypi-discovery.mts', () => ({ workspaceMode: 'bzlmod', }, ]), + parseVisibleRepoCandidates: vi.fn(() => []), })) const { probe } = vi.hoisted(() => ({ - probe: async () => ({ code: 0, stdout: '@pypi//requests:pkg\n' }), + probe: async () => ({ code: 0, stdout: '@pypi//requests:pkg\n', stderr: '' }), })) vi.mock('./bazel-query-runner.mts', () => ({ buildPypiProbeFor: vi.fn(() => probe), - buildProbeFor: vi.fn(() => probe), runBazelModShowVisibleRepos: vi.fn(async () => ({ code: 0, stderr: '', diff --git a/src/utils/glob.mts b/src/utils/glob.mts index dd89f37ef..2d3561869 100644 --- a/src/utils/glob.mts +++ b/src/utils/glob.mts @@ -22,7 +22,7 @@ const DEFAULT_IGNORE_FOR_GIT_IGNORE = defaultIgnore.filter( p => !p.endsWith('.gitignore'), ) -const IGNORED_DIRS = [ +export const IGNORED_DIRS = [ // Taken from ignore-by-default: // https://github.com/novemberborn/ignore-by-default/blob/v2.1.0/index.js '.git', // Git repository files, see