From f8f3e80af3021c66064df5e870ea2972de01fe4b Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 07:02:42 +0000 Subject: [PATCH 01/24] feat: version-key the daemon socket to prevent stale-daemon adoption after upgrade (#2) --- .../sonarpredict/cli/DaemonLauncher.java | 33 +++++++-- .../sonarpredict/cli/SonarCommand.java | 21 +++++- .../sonarpredict/daemon/DaemonMain.java | 7 +- .../sonarpredict/protocol/SocketPaths.java | 72 ++++++++++++++++--- .../sonarpredict/cli/DaemonLauncherTest.java | 25 +++++-- .../protocol/SocketPathsTest.java | 45 ++++++++++++ 6 files changed, 178 insertions(+), 25 deletions(-) diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncher.java b/src/main/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncher.java index 7be7f51..894c464 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncher.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncher.java @@ -49,7 +49,7 @@ * *

Java runtime. The daemon launches with the {@code java} named by * {@code -Dsonar.java.exe} when set — the distribution's {@code bin/sonar} - * launcher sets it to a Java 17+ runtime it auto-discovered. With the property + * launcher sets it to a Java 21+ runtime it auto-discovered. With the property * absent the daemon launches with the current/system {@code java} that started * the CLI. No JRE is provisioned or bundled. */ @@ -64,7 +64,7 @@ public final class DaemonLauncher { /** * System property naming the {@code java} executable used to spawn the * daemon. The distribution's {@code bin/sonar} launcher sets it to the - * Java 17+ runtime it auto-discovered, so the daemon launches on a + * Java 21+ runtime it auto-discovered, so the daemon launches on a * verified-compatible JVM rather than whichever {@code java} happens to be * on {@code PATH}. Absent, the launcher falls back to the current JVM's * {@code java.home} (the dev default). @@ -79,6 +79,17 @@ public final class DaemonLauncher { */ public static final String DAEMON_PLUGINS_DIR_PROPERTY = "sonar.plugins.dir"; + /** + * System property carrying the socket-version token to the spawned daemon + * so it resolves the identical version-keyed socket/pidfile names this + * launcher polls. The value is exactly the {@link SocketPaths#version()} of + * the paths this launcher was constructed with — relayed verbatim, never + * recomputed, so the two ends cannot disagree. Kept as a literal here + * because the {@code cli} module must not depend on the {@code daemon} + * module — both ends agree on the name. + */ + public static final String SOCKET_VERSION_PROPERTY = "sonar.socket.version"; + /** Default bounded wait for the daemon socket to start accepting. */ public static final Duration DEFAULT_STARTUP_TIMEOUT = Duration.ofSeconds(60); @@ -180,7 +191,8 @@ private Process spawn() { if (!Files.isRegularFile(jar)) { throw new IllegalStateException("daemon jar does not exist: " + jar); } - List command = buildSpawnCommand(jar, resolveProvisionedLayout()); + List command = + buildSpawnCommand(jar, resolveProvisionedLayout(), paths.version()); ProcessBuilder builder = new ProcessBuilder(command); // The daemon resolves its plugins/ directory relative to its working // directory; run it from the module root that holds plugins/. @@ -201,7 +213,7 @@ private Process spawn() { * Builds the JVM command line that launches the daemon. * *

Java runtime. When {@code -Dsonar.java.exe} is set — the skill - * bundle's {@code bin/sonar} launcher sets it to the Java 17+ runtime it + * bundle's {@code bin/sonar} launcher sets it to the Java 21+ runtime it * auto-discovered — the daemon spawns with that executable. Absent, the * daemon launches with the current JVM's {@code java} (the dev default). * @@ -214,17 +226,24 @@ private Process spawn() { * daemon resolves a {@code plugins/} directory relative to its working * directory. * - * @param jar the daemon fat jar to run - * @param provisioned a fully provisioned runtime layout, or {@code null} + * @param jar the daemon fat jar to run + * @param provisioned a fully provisioned runtime layout, or {@code null} + * @param socketVersion the socket-version token to relay via + * {@code -Dsonar.socket.version}; {@code null}/blank + * omits it (the daemon then uses the bare socket name) * @return the command line for {@link ProcessBuilder} */ - static List buildSpawnCommand(Path jar, RuntimeLayout provisioned) { + static List buildSpawnCommand( + Path jar, RuntimeLayout provisioned, String socketVersion) { List command = new ArrayList<>(); command.add(javaExecutable()); String pluginsDir = resolvePluginsDir(provisioned); if (pluginsDir != null) { command.add("-D" + DAEMON_PLUGINS_DIR_PROPERTY + "=" + pluginsDir); } + if (socketVersion != null && !socketVersion.isBlank()) { + command.add("-D" + SOCKET_VERSION_PROPERTY + "=" + socketVersion); + } command.add("-jar"); command.add(jar.toString()); return command; diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/cli/SonarCommand.java b/src/main/java/io/github/randomcodespace/sonarpredict/cli/SonarCommand.java index bc4675a..07d961c 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/cli/SonarCommand.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/cli/SonarCommand.java @@ -52,7 +52,7 @@ exitCodeList = { "0:clean — no issues at or above the severity floor", "1:issues found / coverage below threshold", - "2:tool error — bad input, daemon unreachable, no Java 17+"}, + "2:tool error — bad input, daemon unreachable, no Java 21+"}, footerHeading = "%nExamples:%n", footer = { " Check the current git changeset (primary agent path):", @@ -128,7 +128,7 @@ public final class SonarCommand implements Runnable { /** Production constructor: real socket-backed collaborators. */ public SonarCommand() { - SocketPaths paths = SocketPaths.resolve(); + SocketPaths paths = SocketPaths.resolve(System.getenv(), bundleSocketVersion()); DaemonLauncher launcher = new DaemonLauncher(paths); this.rpc = new DaemonClient(paths, launcher); this.control = new LauncherDaemonControl(paths, launcher); @@ -140,6 +140,23 @@ public SonarCommand(DaemonRpc rpc, DaemonControl control) { this.control = Objects.requireNonNull(control, "control"); } + /** + * The bundle's engine version, keyed into the daemon socket name so a CLI + * from one bundle never adopts a daemon spawned by a different bundle + * version (which would silently serve stale analyzers across an in-place + * upgrade). Empty when the manifest is unavailable (dev/test runs) — the + * socket name is then the bare {@code sonar-daemon}, preserving the prior + * single-daemon-per-machine behaviour. + */ + private static String bundleSocketVersion() { + try { + return io.github.randomcodespace.sonarpredict.cli.setup.Manifest + .bundled().version(); + } catch (RuntimeException unavailable) { + return ""; + } + } + /** With no subcommand, print usage. */ @Override public void run() { diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/DaemonMain.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/DaemonMain.java index d7583bf..f1d36c4 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/DaemonMain.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/DaemonMain.java @@ -30,7 +30,12 @@ private DaemonMain() { * waiting for the daemon to stop */ public static void main(String[] args) throws InterruptedException { - SocketPaths paths = SocketPaths.resolve(); + // The launcher relays its SocketPaths.version() here so the daemon binds + // the identical version-keyed socket name the launcher polls. The + // property name is a literal: the daemon module must not depend on the + // cli module's DaemonLauncher — both ends agree on "sonar.socket.version". + String socketVersion = System.getProperty("sonar.socket.version", ""); + SocketPaths paths = SocketPaths.resolve(System.getenv(), socketVersion); Daemon daemon = Daemon.start(paths, DaemonServer.DEFAULT_IDLE_TIMEOUT); if (!daemon.startedNewListener()) { diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/protocol/SocketPaths.java b/src/main/java/io/github/randomcodespace/sonarpredict/protocol/SocketPaths.java index 416fcea..7c034ef 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/protocol/SocketPaths.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/protocol/SocketPaths.java @@ -12,8 +12,15 @@ * Resolves the filesystem locations of the daemon's Unix domain socket and its * pidfile. * - *

One shared daemon per machine: the names are fixed ({@code sonar-daemon}), - * so any CLI process resolves the same pair and finds the running daemon. + *

Version-keyed names. The socket/pidfile/lockfile names are keyed by + * the bundle version ({@code sonar-daemon-}) when a version is supplied, + * so a CLI from one bundle never adopts a daemon left running by a different + * bundle version — an in-place upgrade silently serving stale analyzers is + * thereby impossible; the pre-upgrade daemon simply idles out on its own, + * now-orphaned socket. When no version is supplied (dev builds, tests) the bare + * {@code sonar-daemon} name is used and the one-daemon-per-machine behaviour is + * preserved. The launcher relays the exact token its {@link #version()} carries + * to the spawned daemon, so the two ends always resolve the identical names. * *

Directory choice. On Linux a logged-in session has * {@code $XDG_RUNTIME_DIR} — a per-user, {@code 0700}, tmpfs-backed directory @@ -40,37 +47,71 @@ */ public final class SocketPaths { - /** Shared base name for the socket/pidfile pair — one daemon per machine. */ + /** Base name for the socket/pidfile/lockfile triple. */ private static final String BASE_NAME = "sonar-daemon"; - private static final String SOCKET_NAME = BASE_NAME + ".sock"; - private static final String PID_NAME = BASE_NAME + ".pid"; - private static final String LOCK_NAME = BASE_NAME + ".lock"; private final Path socket; private final Path pidFile; private final Path lockFile; + private final String version; - private SocketPaths(Path socket, Path pidFile, Path lockFile) { + private SocketPaths(Path socket, Path pidFile, Path lockFile, String version) { this.socket = socket; this.pidFile = pidFile; this.lockFile = lockFile; + this.version = version; } - /** Resolves the paths from the current process environment. */ + /** Resolves the unversioned paths from the current process environment. */ public static SocketPaths resolve() { return resolve(System.getenv()); } /** - * Resolves the paths from the given environment map (test seam). + * Resolves the unversioned paths from the given environment map (test seam). * * @param env the environment to read {@code XDG_RUNTIME_DIR} from - * @return the resolved socket/pidfile pair + * @return the resolved triple using the bare {@code sonar-daemon} names */ public static SocketPaths resolve(Map env) { + return resolve(env, null); + } + + /** + * Resolves the paths from the given environment, keying the + * socket/pidfile/lockfile names by {@code version} so a CLI never adopts a + * daemon from a different bundle version. + * + * @param env the environment to read {@code XDG_RUNTIME_DIR} from + * @param version the bundle version token to key the names by; {@code null} + * or blank yields the bare {@code sonar-daemon} names + * @return the resolved socket/pidfile/lockfile triple + */ + public static SocketPaths resolve(Map env, String version) { Path dir = runtimeDir(env); + String token = sanitizeVersion(version); + String base = token.isEmpty() ? BASE_NAME : BASE_NAME + "-" + token; return new SocketPaths( - dir.resolve(SOCKET_NAME), dir.resolve(PID_NAME), dir.resolve(LOCK_NAME)); + dir.resolve(base + ".sock"), + dir.resolve(base + ".pid"), + dir.resolve(base + ".lock"), + token); + } + + /** + * Normalizes a version token to a filesystem-safe path segment: runs of any + * character other than {@code [A-Za-z0-9._-]} collapse to a single + * {@code _}. {@code null} or blank yields the empty string (no suffix). + */ + private static String sanitizeVersion(String version) { + if (version == null) { + return ""; + } + String trimmed = version.strip(); + if (trimmed.isEmpty()) { + return ""; + } + return trimmed.replaceAll("[^A-Za-z0-9._-]+", "_"); } private static Path runtimeDir(Map env) { @@ -150,4 +191,13 @@ public Path pidFile() { public Path lockFile() { return lockFile; } + + /** + * The sanitized version token keyed into the socket/pidfile/lockfile names, + * or the empty string when the names are unversioned. The launcher relays + * this verbatim to the spawned daemon so both ends resolve identical names. + */ + public String version() { + return version; + } } diff --git a/src/test/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncherTest.java b/src/test/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncherTest.java index 72d631c..eab7a66 100644 --- a/src/test/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncherTest.java +++ b/src/test/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncherTest.java @@ -136,7 +136,7 @@ void provisionedRuntimeDrivesTheSpawnCommand(@TempDir Path dir) throws Exception assertTrue(layout.isProvisioned(), "the stub layout must be provisioned"); Path daemonJar = Files.createFile(dir.resolve("daemon.jar")); - var command = DaemonLauncher.buildSpawnCommand(daemonJar, layout); + var command = DaemonLauncher.buildSpawnCommand(daemonJar, layout, ""); assertFalse(command.get(0).startsWith(dir.toString()), "a provisioned launch must use the system java, not a bundled JRE, got: " @@ -153,7 +153,7 @@ void provisionedRuntimeDrivesTheSpawnCommand(@TempDir Path dir) throws Exception @DisplayName("with no provisioned runtime the dev default uses the system JVM") void devDefaultDrivesTheSpawnCommand(@TempDir Path dir) throws Exception { Path daemonJar = Files.createFile(dir.resolve("daemon.jar")); - var command = DaemonLauncher.buildSpawnCommand(daemonJar, null); + var command = DaemonLauncher.buildSpawnCommand(daemonJar, null, ""); assertFalse(command.stream().anyMatch( arg -> arg.startsWith("-D" + DaemonLauncher.DAEMON_PLUGINS_DIR_PROPERTY)), @@ -162,6 +162,23 @@ void devDefaultDrivesTheSpawnCommand(@TempDir Path dir) throws Exception { "the command must run the daemon jar"); } + @Test + @DisplayName("a non-blank socket version is relayed as -Dsonar.socket.version") + void socketVersionRelayedToSpawnCommand(@TempDir Path dir) throws Exception { + Path daemonJar = Files.createFile(dir.resolve("daemon.jar")); + + var versioned = DaemonLauncher.buildSpawnCommand(daemonJar, null, "11.3.0.85510"); + assertTrue(versioned.contains( + "-D" + DaemonLauncher.SOCKET_VERSION_PROPERTY + "=11.3.0.85510"), + "a versioned launch must relay -Dsonar.socket.version, got: " + versioned); + + var unversioned = DaemonLauncher.buildSpawnCommand(daemonJar, null, ""); + assertFalse(unversioned.stream().anyMatch( + arg -> arg.startsWith("-D" + DaemonLauncher.SOCKET_VERSION_PROPERTY)), + "an unversioned launch must not relay -Dsonar.socket.version, got: " + + unversioned); + } + @Test @DisplayName("sonar.java.exe drives the java executable in the spawn command") void javaExePropertyDrivesTheSpawnCommand(@TempDir Path dir) throws Exception { @@ -170,7 +187,7 @@ void javaExePropertyDrivesTheSpawnCommand(@TempDir Path dir) throws Exception { String previous = System.getProperty(DaemonLauncher.JAVA_EXE_PROPERTY); System.setProperty(DaemonLauncher.JAVA_EXE_PROPERTY, bundledJava.toString()); try { - var command = DaemonLauncher.buildSpawnCommand(daemonJar, null); + var command = DaemonLauncher.buildSpawnCommand(daemonJar, null, ""); assertEquals(bundledJava.toString(), command.get(0), "the spawn command must use the java named by sonar.java.exe, got: " @@ -192,7 +209,7 @@ void pluginsDirPropertyIsForwardedToTheSpawnCommand(@TempDir Path dir) throws Ex String previous = System.getProperty(DaemonLauncher.DAEMON_PLUGINS_DIR_PROPERTY); System.setProperty(DaemonLauncher.DAEMON_PLUGINS_DIR_PROPERTY, pluginsDir.toString()); try { - var command = DaemonLauncher.buildSpawnCommand(daemonJar, null); + var command = DaemonLauncher.buildSpawnCommand(daemonJar, null, ""); assertTrue(command.contains( "-D" + DaemonLauncher.DAEMON_PLUGINS_DIR_PROPERTY + "=" diff --git a/src/test/java/io/github/randomcodespace/sonarpredict/protocol/SocketPathsTest.java b/src/test/java/io/github/randomcodespace/sonarpredict/protocol/SocketPathsTest.java index 9597382..3905a0f 100644 --- a/src/test/java/io/github/randomcodespace/sonarpredict/protocol/SocketPathsTest.java +++ b/src/test/java/io/github/randomcodespace/sonarpredict/protocol/SocketPathsTest.java @@ -111,6 +111,51 @@ void noArgResolveUsesProcessEnv() { assertTrue(paths.pidFile().getFileName().toString().endsWith(".pid")); } + @Test + @DisplayName("a supplied version keys the socket/pidfile/lockfile names") + void versionKeysTheNames() { + Path runtime = Path.of("/run/user/1000"); + SocketPaths paths = SocketPaths.resolve( + Map.of("XDG_RUNTIME_DIR", runtime.toString()), "11.3.0.85510"); + + assertEquals("sonar-daemon-11.3.0.85510.sock", + paths.socket().getFileName().toString(), + "the socket name must carry the version token"); + assertEquals("sonar-daemon-11.3.0.85510.pid", + paths.pidFile().getFileName().toString()); + assertEquals("sonar-daemon-11.3.0.85510.lock", + paths.lockFile().getFileName().toString()); + assertEquals("11.3.0.85510", paths.version(), + "resolved paths must expose their version token for the launcher to relay"); + } + + @Test + @DisplayName("a blank version yields the bare unversioned names (back-compat)") + void blankVersionYieldsBareNames() { + Map env = Map.of("XDG_RUNTIME_DIR", "/run/user/1000"); + SocketPaths blank = SocketPaths.resolve(env, " "); + + assertEquals("sonar-daemon.sock", blank.socket().getFileName().toString(), + "a blank version must fall back to the bare socket name"); + assertEquals("", blank.version(), "a blank version must expose an empty token"); + assertEquals(SocketPaths.resolve(env).socket(), blank.socket(), + "blank-version and no-version resolution must agree"); + } + + @Test + @DisplayName("a version with path separators is sanitized to a single safe segment") + void versionIsSanitizedToSingleSegment() { + Path runtime = Path.of("/run/user/1000"); + SocketPaths paths = SocketPaths.resolve( + Map.of("XDG_RUNTIME_DIR", runtime.toString()), "1.0/evil sub"); + + assertEquals(runtime, paths.socket().getParent(), + "a malicious version must not escape the runtime directory"); + String name = paths.socket().getFileName().toString(); + assertTrue(name.startsWith("sonar-daemon-") && name.endsWith(".sock"), + "name must remain a single sonar-daemon-*.sock segment, got: " + name); + } + private static String stem(String name) { int dot = name.lastIndexOf('.'); return dot < 0 ? name : name.substring(0, dot); From 57431b01386577ef10cb0d7b95749a0b3d6d6b77 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 07:02:42 +0000 Subject: [PATCH 02/24] feat(daemon): verify plugin jars against the manifest before loading (#5) --- .../sonarpredict/daemon/AnalysisService.java | 12 +- .../sonarpredict/daemon/PluginVerifier.java | 137 ++++++++++++++++++ .../daemon/PluginVerifierTest.java | 113 +++++++++++++++ 3 files changed, 260 insertions(+), 2 deletions(-) create mode 100644 src/main/java/io/github/randomcodespace/sonarpredict/daemon/PluginVerifier.java create mode 100644 src/test/java/io/github/randomcodespace/sonarpredict/daemon/PluginVerifierTest.java diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/AnalysisService.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/AnalysisService.java index 9717370..fa0f5b4 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/AnalysisService.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/AnalysisService.java @@ -33,6 +33,7 @@ import org.sonarsource.sonarlint.core.commons.progress.TaskManager; import org.sonarsource.sonarlint.core.plugin.commons.LoadedPlugins; +import io.github.randomcodespace.sonarpredict.cli.setup.Manifest; import io.github.randomcodespace.sonarpredict.protocol.dto.AnalysisWarning; import io.github.randomcodespace.sonarpredict.protocol.dto.AnalyzeRequest; import io.github.randomcodespace.sonarpredict.protocol.dto.AnalyzeResponse; @@ -43,7 +44,8 @@ * supported files in one pass, and returns an {@link AnalyzeResponse} of mapped * issues and warnings. * - *

Warm engine. Plugins ({@link PluginRuntime#loadAll}), the + *

Warm engine. Plugins (the manifest-verified allow-list from + * {@link PluginVerifier}, loaded via {@link PluginRuntime#loadFrom}), the * {@link RuleCatalog}, and the engine {@link AnalysisScheduler} are all built * once in the constructor and reused for every {@link #analyze} call. * The {@code AnalysisScheduler} is explicitly designed for this: it owns a @@ -135,7 +137,13 @@ public AnalysisService(Path pluginsDir) { // SAME instance is what keeps every per-analysis sensor message — most // importantly a swallowed "Error executing sensor" — visible here. this.engineLog = EngineLog.installAndCapture(); - this.loadedPlugins = PluginRuntime.loadAll(pluginsDir); + // Load an explicit, manifest-verified allow-list rather than blindly + // globbing every jar: an attacker-placed or tampered analyzer in the + // plugins directory makes the daemon refuse to start instead of being + // loaded into the engine. The trust decision is made here, in the + // process that actually loads the bytecode, not only in the launcher. + this.loadedPlugins = PluginRuntime.loadFrom( + PluginVerifier.verifiedJars(pluginsDir, Manifest.bundled())); // The real loaded-language set: an analyzer skipped at load time (e.g. // the JS/TS/CSS plugin with no Node.js runtime) is absent here, so // loadedLanguages() and the PING response never over-report capability. diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/PluginVerifier.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/PluginVerifier.java new file mode 100644 index 0000000..7bd480a --- /dev/null +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/PluginVerifier.java @@ -0,0 +1,137 @@ +package io.github.randomcodespace.sonarpredict.daemon; + +import java.io.IOException; +import java.io.InputStream; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.HashMap; +import java.util.HexFormat; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; +import java.util.stream.Stream; + +import io.github.randomcodespace.sonarpredict.cli.setup.Manifest; + +/** + * Builds the verified allow-list of analyzer plugin jars the daemon may load. + * + *

This moves the trust decision the launcher's + * {@code RuntimeLayout.isVerified} makes into the daemon process itself: rather + * than blindly globbing every {@code *.jar} in the plugins directory (which the + * launcher's verification cannot vouch for, since it runs in a different + * process), the daemon hands {@link PluginRuntime#loadFrom} an explicit + * allow-list. A jar that is neither a manifest-pinned analyzer (matched by name + * and SHA-256) nor one of the project's own co-distributed jars causes + * a refuse-to-start, so an attacker-placed analyzer dropped into the plugins + * directory is never loaded into the engine. + * + *

Unlike {@code RuntimeLayout.isVerified} it tolerates the daemon's two real + * compositions instead of demanding the exact provisioned set: + *

+ * Both the engine jar and the host plugin are the project's own artifacts + * (allow-listed by name), not third-party analyzers, so they carry no separate + * manifest pin here. + */ +final class PluginVerifier { + + private static final String ENGINE_JAR = "sonarlint-analysis-engine.jar"; + private static final String HOST_PLUGIN_PREFIX = "sonar-predictor-"; + + private PluginVerifier() { + } + + /** + * Returns the verified set of plugin jars in {@code pluginsDir} to hand to + * {@link PluginRuntime#loadFrom}. + * + * @param pluginsDir the directory holding the analyzer plugin jars + * @param manifest the pinned runtime manifest (per-plugin SHA-256) + * @return the exact, verified jar set the daemon may load + * @throws IllegalStateException if a manifest-pinned analyzer fails its + * SHA-256 check, an unaccounted-for jar is + * present, or no loadable jar is found + */ + static Set verifiedJars(Path pluginsDir, Manifest manifest) { + Map pinnedSha = new HashMap<>(); + for (Manifest.Artifact plugin : manifest.plugins()) { + pinnedSha.put(plugin.jarName(), plugin.sha256()); + } + + Set allowed = new LinkedHashSet<>(); + for (Path jar : listJars(pluginsDir)) { + String name = jar.getFileName().toString(); + String pinned = pinnedSha.get(name); + if (pinned != null) { + if (!sha256(jar).equalsIgnoreCase(pinned)) { + throw new IllegalStateException( + "refusing to start: analyzer plugin failed SHA-256 verification: " + + name + " in " + pluginsDir.toAbsolutePath()); + } + allowed.add(jar); + } else if (isProjectJar(name)) { + allowed.add(jar); + } else { + throw new IllegalStateException( + "refusing to start: unaccounted jar in the plugins directory " + + "(neither a manifest-pinned analyzer nor a project jar): " + + name + " in " + pluginsDir.toAbsolutePath()); + } + } + if (allowed.isEmpty()) { + throw new IllegalStateException( + "no analyzer plugin jars found in " + pluginsDir.toAbsolutePath()); + } + return allowed; + } + + /** + * Whether {@code jarName} is one of the project's own co-distributed jars — + * the embedded analysis engine jar or the {@code sonar-predictor} host + * plugin — allow-listed by name rather than by a third-party manifest pin. + */ + private static boolean isProjectJar(String jarName) { + return jarName.equals(ENGINE_JAR) + || (jarName.startsWith(HOST_PLUGIN_PREFIX) && jarName.endsWith(".jar")); + } + + private static Set listJars(Path pluginsDir) { + try (Stream entries = Files.list(pluginsDir)) { + Set jars = new LinkedHashSet<>(); + entries.filter(Files::isRegularFile) + .filter(p -> p.getFileName().toString().endsWith(".jar")) + .map(p -> p.toAbsolutePath().normalize()) + .forEach(jars::add); + return jars; + } catch (IOException e) { + throw new UncheckedIOException( + "could not list plugins directory: " + pluginsDir.toAbsolutePath(), e); + } + } + + private static String sha256(Path file) { + try { + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + try (InputStream in = Files.newInputStream(file)) { + byte[] buffer = new byte[8192]; + int read; + while ((read = in.read(buffer)) != -1) { + digest.update(buffer, 0, read); + } + } + return HexFormat.of().formatHex(digest.digest()); + } catch (IOException | NoSuchAlgorithmException e) { + throw new IllegalStateException("could not hash analyzer plugin jar: " + file, e); + } + } +} diff --git a/src/test/java/io/github/randomcodespace/sonarpredict/daemon/PluginVerifierTest.java b/src/test/java/io/github/randomcodespace/sonarpredict/daemon/PluginVerifierTest.java new file mode 100644 index 0000000..14caa12 --- /dev/null +++ b/src/test/java/io/github/randomcodespace/sonarpredict/daemon/PluginVerifierTest.java @@ -0,0 +1,113 @@ +package io.github.randomcodespace.sonarpredict.daemon; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.security.MessageDigest; +import java.util.HexFormat; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import io.github.randomcodespace.sonarpredict.cli.setup.Manifest; + +/** + * Unit tests for {@link PluginVerifier}: it must accept exactly the + * manifest-pinned analyzers (by name and SHA-256) plus the project's own engine + * and host jars, and refuse to start on a tampered or unaccounted-for jar. + */ +class PluginVerifierTest { + + private static final String PLUGIN_JAR = "sonar-foo-plugin-1.0.jar"; + + private static String sha256(byte[] content) throws Exception { + return HexFormat.of().formatHex(MessageDigest.getInstance("SHA-256").digest(content)); + } + + /** A manifest with one analyzer plugin pinned to {@code pluginSha}. */ + private static Manifest manifestWith(String pluginSha) { + Manifest.Artifact engine = new Manifest.Artifact( + "org.sonarsource.sonarlint.core", "sonarlint-analysis-engine", + "1.0", "0".repeat(64)); + Manifest.Artifact plugin = new Manifest.Artifact( + "org.example", "sonar-foo-plugin", "1.0", pluginSha); + return new Manifest("1.0", engine, List.of(plugin)); + } + + private static Set names(Set jars) { + return jars.stream().map(p -> p.getFileName().toString()).collect(Collectors.toSet()); + } + + @Test + @DisplayName("a SHA-matching analyzer plus the host plugin are both allowed") + void cleanDirAllowsManifestPluginAndHostPlugin(@TempDir Path dir) throws Exception { + byte[] body = "real-analyzer".getBytes(StandardCharsets.UTF_8); + Files.write(dir.resolve(PLUGIN_JAR), body); + Files.write(dir.resolve("sonar-predictor-0.3.0-SNAPSHOT-host.jar"), + "host".getBytes(StandardCharsets.UTF_8)); + + Set verified = PluginVerifier.verifiedJars(dir, manifestWith(sha256(body))); + + assertEquals( + Set.of(PLUGIN_JAR, "sonar-predictor-0.3.0-SNAPSHOT-host.jar"), + names(verified), + "the verified set must be the manifest analyzer plus the host plugin"); + } + + @Test + @DisplayName("the embedded engine jar is allowed by name (no manifest pin needed)") + void engineJarAllowedByName(@TempDir Path dir) throws Exception { + byte[] body = "real-analyzer".getBytes(StandardCharsets.UTF_8); + Files.write(dir.resolve(PLUGIN_JAR), body); + Files.write(dir.resolve("sonarlint-analysis-engine.jar"), + "engine".getBytes(StandardCharsets.UTF_8)); + + Set verified = PluginVerifier.verifiedJars(dir, manifestWith(sha256(body))); + + assertTrue(names(verified).contains("sonarlint-analysis-engine.jar"), + "the project's own engine jar must be allow-listed by name"); + } + + @Test + @DisplayName("a tampered analyzer (SHA mismatch) makes the daemon refuse to start") + void tamperedPluginRejected(@TempDir Path dir) throws Exception { + Files.write(dir.resolve(PLUGIN_JAR), "tampered".getBytes(StandardCharsets.UTF_8)); + // Manifest pins the SHA of DIFFERENT content than what is on disk. + Manifest manifest = manifestWith(sha256("original".getBytes(StandardCharsets.UTF_8))); + + IllegalStateException ex = assertThrows(IllegalStateException.class, + () -> PluginVerifier.verifiedJars(dir, manifest)); + assertTrue(ex.getMessage().contains("SHA-256"), + "the rejection must cite the failed checksum, got: " + ex.getMessage()); + } + + @Test + @DisplayName("an unaccounted-for jar makes the daemon refuse to start") + void unaccountedJarRejected(@TempDir Path dir) throws Exception { + byte[] body = "real-analyzer".getBytes(StandardCharsets.UTF_8); + Files.write(dir.resolve(PLUGIN_JAR), body); + Files.write(dir.resolve("evil-analyzer.jar"), "pwn".getBytes(StandardCharsets.UTF_8)); + + IllegalStateException ex = assertThrows(IllegalStateException.class, + () -> PluginVerifier.verifiedJars(dir, manifestWith(sha256(body)))); + assertTrue(ex.getMessage().contains("unaccounted"), + "the rejection must flag the unaccounted jar, got: " + ex.getMessage()); + } + + @Test + @DisplayName("an empty plugins directory is rejected") + void emptyDirRejected(@TempDir Path dir) { + IllegalStateException ex = assertThrows(IllegalStateException.class, + () -> PluginVerifier.verifiedJars(dir, manifestWith("0".repeat(64)))); + assertTrue(ex.getMessage().contains("no analyzer plugin"), + "an empty directory must be rejected, got: " + ex.getMessage()); + } +} From 473fd1545222752277f7981d3716f47339871517 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 07:02:42 +0000 Subject: [PATCH 03/24] fix(cli): reject git-diff option injection; add XXE + tar guard tests (#17) --- .../sonarpredict/cli/FileResolver.java | 19 ++- .../sonarpredict/cli/FileResolverTest.java | 14 ++ .../cli/coverage/CoverageXmlSecurityTest.java | 81 ++++++++++ .../sonarpredict/cli/setup/TarTest.java | 144 ++++++++++++++++++ .../sonarpredict/cli/setup/TarWriter.java | 35 +++++ 5 files changed, 290 insertions(+), 3 deletions(-) create mode 100644 src/test/java/io/github/randomcodespace/sonarpredict/cli/coverage/CoverageXmlSecurityTest.java create mode 100644 src/test/java/io/github/randomcodespace/sonarpredict/cli/setup/TarTest.java diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/cli/FileResolver.java b/src/main/java/io/github/randomcodespace/sonarpredict/cli/FileResolver.java index 6b57f93..fd68e4c 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/cli/FileResolver.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/cli/FileResolver.java @@ -131,7 +131,9 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) { * @param projectDir the git working tree to inspect * @param ref the ref to diff against, or {@code null} for {@code HEAD} * @return the resolved base directory and the changed files that still exist - * @throws IllegalArgumentException if {@code projectDir} is not a directory + * @throws IllegalArgumentException if {@code projectDir} is not a directory, + * or {@code ref} starts with {@code '-'} + * (rejected to prevent git option injection) * @throws DaemonException if the {@code git} invocation fails */ public ResolvedFiles resolveDiff(Path projectDir, String ref) { @@ -140,7 +142,15 @@ public ResolvedFiles resolveDiff(Path projectDir, String ref) { if (!Files.isDirectory(base)) { throw new IllegalArgumentException("not a directory: " + projectDir); } - String against = (ref == null || ref.isBlank()) ? "HEAD" : ref; + String against = (ref == null || ref.isBlank()) ? "HEAD" : ref.strip(); + // Reject git option injection: a ref starting with '-' would be parsed + // by git as an option (e.g. --output=, --ext-diff) rather than a + // revision. A legitimate git ref never begins with '-' + // (git check-ref-format), so this rejects only hostile input. + if (against.startsWith("-")) { + throw new IllegalArgumentException( + "invalid git ref (must not start with '-'): " + against); + } List changed = gitDiffNames(base, against); List existing = new ArrayList<>(); for (String name : changed) { @@ -161,8 +171,11 @@ public ResolvedFiles resolveDiff(Path projectDir, String ref) { */ @SuppressWarnings("java:S4036") private static List gitDiffNames(Path workingTree, String ref) { + // Trailing "--" ends option parsing and separates the revision from any + // pathspec, so the ref cannot be reinterpreted as a path; combined with + // the leading-'-' rejection in resolveDiff this closes option injection. ProcessBuilder builder = new ProcessBuilder( - "git", "diff", "--name-only", ref) + "git", "diff", "--name-only", ref, "--") .directory(workingTree.toFile()) .redirectError(ProcessBuilder.Redirect.DISCARD); List names = new ArrayList<>(); diff --git a/src/test/java/io/github/randomcodespace/sonarpredict/cli/FileResolverTest.java b/src/test/java/io/github/randomcodespace/sonarpredict/cli/FileResolverTest.java index 7d5b98d..27c5575 100644 --- a/src/test/java/io/github/randomcodespace/sonarpredict/cli/FileResolverTest.java +++ b/src/test/java/io/github/randomcodespace/sonarpredict/cli/FileResolverTest.java @@ -126,6 +126,20 @@ void diffDefaultsToHead(@TempDir Path dir) throws Exception { "diff against the default ref (HEAD) must see the working-tree change"); } + @Test + @DisplayName("--diff rejects a ref that would inject a git option (e.g. --output=)") + void diffRejectsOptionInjectionRef(@TempDir Path dir) { + // A ref beginning with '-' would be parsed by git as an option + // (e.g. --output= writing attacker-chosen paths) rather than a + // revision. It must be rejected before git is ever invoked. + assertThrows(IllegalArgumentException.class, + () -> resolver.resolveDiff(dir, "--output=/tmp/pwned"), + "a long-option-style ref must be rejected to prevent git option injection"); + assertThrows(IllegalArgumentException.class, + () -> resolver.resolveDiff(dir, "-O/tmp/pwned"), + "a short-option-style ref must also be rejected"); + } + private static void git(Path dir, String... args) throws Exception { java.util.List command = new java.util.ArrayList<>(); command.add("git"); diff --git a/src/test/java/io/github/randomcodespace/sonarpredict/cli/coverage/CoverageXmlSecurityTest.java b/src/test/java/io/github/randomcodespace/sonarpredict/cli/coverage/CoverageXmlSecurityTest.java new file mode 100644 index 0000000..d2571b2 --- /dev/null +++ b/src/test/java/io/github/randomcodespace/sonarpredict/cli/coverage/CoverageXmlSecurityTest.java @@ -0,0 +1,81 @@ +package io.github.randomcodespace.sonarpredict.cli.coverage; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.nio.file.Files; +import java.nio.file.Path; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.w3c.dom.Document; + +/** + * Adversarial tests that lock in {@link CoverageXml}'s XXE hardening. Coverage + * reports come from CI artifacts that are not necessarily trusted, so the parser + * must never resolve an external entity into the document and must never detonate + * an entity-expansion bomb. These tests guard the control so a later + * "simplification" of {@code newSecureBuilder()} that reopens the hole fails the + * build. (The pre-existing parser tests only prove a benign DOCTYPE is tolerated.) + */ +class CoverageXmlSecurityTest { + + private static final String SENTINEL = "TOP_SECRET_XXE_SENTINEL_4f3a"; + + @Test + @DisplayName("an external SYSTEM entity is never resolved into the parsed document") + void externalEntityIsNotResolved(@TempDir Path dir) throws Exception { + Path secret = Files.writeString(dir.resolve("secret.txt"), SENTINEL); + Path report = Files.writeString(dir.resolve("evil.xml"), + "\n" + + "\n" + + "]>\n" + + "&xxe;\n"); + + // Blocked either way: the parser may reject the document outright, or + // parse it with the entity left unresolved. The hole is open ONLY if the + // external file's content is inlined into the DOM. + try { + Document doc = CoverageXml.parse(report); + assertFalse(doc.getDocumentElement().getTextContent().contains(SENTINEL), + "external entity content must never be inlined into the parsed document"); + } catch (CoverageException blocked) { + // Acceptable: the malicious document was rejected rather than parsed. + assertFalse(blocked.getMessage().contains(SENTINEL), + "even the error message must not leak the external file content"); + } + } + + @Test + @DisplayName("an entity-expansion bomb is capped or rejected, never detonated") + void entityExpansionBombIsNeutralized(@TempDir Path dir) throws Exception { + StringBuilder dtd = new StringBuilder(" \n"); + String prev = "a"; + for (int level = 1; level <= 8; level++) { + String name = "e" + level; + dtd.append(" \n"); + prev = name; + } + Path report = Files.writeString(dir.resolve("bomb.xml"), + "\n" + + "\n" + + "&" + prev + ";\n"); + + // A detonated bomb would expand to ~10^9 characters. Secure processing + // caps entity expansion, so parse must either throw or leave the + // reference unexpanded — never materialize the blown-up text. + boolean neutralized; + try { + Document doc = CoverageXml.parse(report); + neutralized = doc.getDocumentElement().getTextContent().length() < 100_000; + } catch (CoverageException rejected) { + neutralized = true; + } + assertTrue(neutralized, + "an entity-expansion bomb must be capped or rejected, not detonated"); + } +} diff --git a/src/test/java/io/github/randomcodespace/sonarpredict/cli/setup/TarTest.java b/src/test/java/io/github/randomcodespace/sonarpredict/cli/setup/TarTest.java new file mode 100644 index 0000000..bde2638 --- /dev/null +++ b/src/test/java/io/github/randomcodespace/sonarpredict/cli/setup/TarTest.java @@ -0,0 +1,144 @@ +package io.github.randomcodespace.sonarpredict.cli.setup; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.PosixFilePermission; +import java.util.Set; +import java.util.zip.GZIPOutputStream; + +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +/** + * Adversarial tests for {@link Tar}'s security-relevant extraction paths: + * tar-slip rejection, top-dir stripping, GNU long-name handling, truncated-entry + * rejection, and owner-only execute clamping. Plain extraction is already + * exercised end-to-end via {@code SetupCommandTest}; these lock in the guards a + * later "simplification" could silently reopen. + */ +class TarTest { + + @FunctionalInterface + private interface TarBody { + void write(TarWriter tar) throws IOException; + } + + /** Builds a complete (end-blocked) {@code .tar.gz} from a writer recipe. */ + private static byte[] targz(TarBody body) throws IOException { + ByteArrayOutputStream raw = new ByteArrayOutputStream(); + try (GZIPOutputStream gz = new GZIPOutputStream(raw); + TarWriter tar = new TarWriter(gz)) { + body.write(tar); + } + return raw.toByteArray(); + } + + private static void extract(byte[] targz, Path dest, boolean stripTopDir) throws IOException { + Tar.extractTarGz(new ByteArrayInputStream(targz), dest, stripTopDir); + } + + @Test + @DisplayName("a tar-slip entry that escapes the destination is rejected") + void tarSlipEntryRejected(@TempDir Path dir) throws Exception { + Path dest = dir.resolve("dest"); + byte[] archive = targz(tar -> + tar.addFile("../escaped.txt", "pwned".getBytes(StandardCharsets.UTF_8), false)); + + IOException ex = assertThrows(IOException.class, () -> extract(archive, dest, false), + "an entry resolving outside the destination must be rejected"); + assertTrue(ex.getMessage().contains("escapes destination"), + "the rejection must name the escape, got: " + ex.getMessage()); + assertFalse(Files.exists(dir.resolve("escaped.txt")), + "the escaping file must never be written outside the destination"); + } + + @Test + @DisplayName("stripTopDir drops the single leading path component") + void stripTopDirDropsLeadingComponent(@TempDir Path dest) throws Exception { + byte[] archive = targz(tar -> { + tar.addDirectory("jdk-21/"); + tar.addDirectory("jdk-21/bin/"); + tar.addFile("jdk-21/bin/java", "ELF".getBytes(StandardCharsets.UTF_8), true); + }); + + extract(archive, dest, true); + + assertTrue(Files.isRegularFile(dest.resolve("bin/java")), + "stripping the top dir must land bin/java directly under the destination"); + assertFalse(Files.exists(dest.resolve("jdk-21")), + "the stripped top-level component must not appear under the destination"); + } + + @Test + @DisplayName("a GNU long-name entry (>100 bytes) is extracted under its full name") + void longNameEntryHandled(@TempDir Path dest) throws Exception { + String longName = "deep/" + "x".repeat(150) + ".txt"; + byte[] archive = targz(tar -> + tar.addLongNameFile(longName, "ok".getBytes(StandardCharsets.UTF_8), false)); + + extract(archive, dest, false); + + assertTrue(Files.isRegularFile(dest.resolve(longName)), + "a >100-byte GNU long name must be honored, expected: " + longName); + } + + @Test + @DisplayName("a truncated entry (declared size exceeds the stream) is rejected") + @SuppressWarnings("resource") // TarWriter is intentionally not closed: closing + // writes the end-of-archive blocks, which would pad the stream and defeat the + // truncation we are simulating. The underlying gz IS closed (try-with-resources). + void truncatedEntryRejected(@TempDir Path dest) throws Exception { + ByteArrayOutputStream raw = new ByteArrayOutputStream(); + try (GZIPOutputStream gz = new GZIPOutputStream(raw)) { + new TarWriter(gz).addTruncatedFile( + "short.bin", 4096, "only-ten!!".getBytes(StandardCharsets.UTF_8)); + } + byte[] archive = raw.toByteArray(); + + assertThrows(IOException.class, () -> extract(archive, dest, false), + "a truncated entry must fail loudly, not write a partial file silently"); + } + + @Test + @DisplayName("an executable entry is clamped to owner-only execute") + void executableBitClampedToOwner(@TempDir Path dest) throws Exception { + byte[] archive = targz(tar -> { + tar.addFile("run.sh", "#!/bin/sh\n".getBytes(StandardCharsets.UTF_8), true); + tar.addFile("data.txt", "plain".getBytes(StandardCharsets.UTF_8), false); + }); + + extract(archive, dest, false); + + Path exec = dest.resolve("run.sh"); + Path plain = dest.resolve("data.txt"); + assertTrue(Files.isRegularFile(exec) && Files.isRegularFile(plain), + "both entries must be extracted"); + + Assumptions.assumeTrue( + dest.getFileSystem().supportedFileAttributeViews().contains("posix"), + "execute-bit clamping is only assertable on a POSIX filesystem"); + + Set execPerms = Files.getPosixFilePermissions(exec); + assertTrue(execPerms.contains(PosixFilePermission.OWNER_EXECUTE), + "an executable entry must stay owner-executable, got: " + execPerms); + assertFalse(execPerms.contains(PosixFilePermission.GROUP_EXECUTE) + || execPerms.contains(PosixFilePermission.OTHERS_EXECUTE), + "execute must be clamped to the owner (no group/other), got: " + execPerms); + + Set plainPerms = Files.getPosixFilePermissions(plain); + assertFalse(plainPerms.contains(PosixFilePermission.OWNER_EXECUTE) + || plainPerms.contains(PosixFilePermission.GROUP_EXECUTE) + || plainPerms.contains(PosixFilePermission.OTHERS_EXECUTE), + "a non-executable entry must carry no execute bits, got: " + plainPerms); + } +} diff --git a/src/test/java/io/github/randomcodespace/sonarpredict/cli/setup/TarWriter.java b/src/test/java/io/github/randomcodespace/sonarpredict/cli/setup/TarWriter.java index 00e6589..8d59ab6 100644 --- a/src/test/java/io/github/randomcodespace/sonarpredict/cli/setup/TarWriter.java +++ b/src/test/java/io/github/randomcodespace/sonarpredict/cli/setup/TarWriter.java @@ -36,6 +36,41 @@ void addFile(String name, byte[] content, boolean executable) throws IOException } } + /** + * Writes a GNU long-name ({@code 'L'}) entry whose payload is + * {@code longName}, then the real file entry — exercises {@code Tar}'s + * long-name handling for paths longer than the 100-byte USTAR name field. + */ + void addLongNameFile(String longName, byte[] content, boolean executable) throws IOException { + byte[] nameBytes = longName.getBytes(StandardCharsets.UTF_8); + byte[] payload = new byte[nameBytes.length + 1]; // GNU long names are NUL-terminated + System.arraycopy(nameBytes, 0, payload, 0, nameBytes.length); + writeBlocked(header("././@LongLink", payload.length, 'L', 0_644), payload); + String shortName = longName.length() <= 100 + ? longName : longName.substring(longName.length() - 100); + writeBlocked(header(shortName, content.length, '0', executable ? 0_755 : 0_644), content); + } + + /** + * Writes a file header declaring {@code declaredSize} bytes but emits only + * {@code body} (fewer) and no end-of-archive blocks — exercises {@code Tar}'s + * truncated-entry handling. Do not call {@link #close()} afterwards. + */ + void addTruncatedFile(String name, int declaredSize, byte[] body) throws IOException { + out.write(header(name, declaredSize, '0', 0_644)); + out.write(body); + } + + /** Writes a header followed by its payload padded to the 512-byte block. */ + private void writeBlocked(byte[] header, byte[] payload) throws IOException { + out.write(header); + out.write(payload); + int pad = (int) ((BLOCK - (payload.length % BLOCK)) % BLOCK); + if (pad > 0) { + out.write(new byte[pad]); + } + } + private static byte[] header(String name, int size, char type, int mode) { byte[] h = new byte[BLOCK]; byte[] nameBytes = name.getBytes(StandardCharsets.UTF_8); From 9a38cd1d9d0107208a25dd64664bd5434c9d41a1 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 07:02:42 +0000 Subject: [PATCH 04/24] fix(setup): atomic offline-bundle extraction + owner-only execute clamp (#18) --- .../sonarpredict/cli/setup/SetupRunner.java | 73 +++++++++++++++++-- .../sonarpredict/cli/setup/Tar.java | 56 +++++++------- 2 files changed, 91 insertions(+), 38 deletions(-) diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/cli/setup/SetupRunner.java b/src/main/java/io/github/randomcodespace/sonarpredict/cli/setup/SetupRunner.java index d850fa5..3077140 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/cli/setup/SetupRunner.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/cli/setup/SetupRunner.java @@ -5,7 +5,10 @@ import java.io.PrintWriter; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Comparator; +import java.util.List; import java.util.Objects; +import java.util.stream.Stream; /** * Orchestrates {@code sonar setup}: provisions the analyzer plugins and the @@ -85,20 +88,74 @@ public void provisionOffline(Path bundle, PrintWriter out) { if (!Files.isRegularFile(bundle)) { throw new DownloadException("offline archive not found: " + bundle); } + // Idempotent: an already-complete, verified runtime is left untouched + // (mirrors provision()), so we never re-extract over a good runtime. + if (layout.isVerified(manifest)) { + out.println("runtime already provisioned and verified: " + layout.versionDir()); + out.flush(); + return; + } out.println("provisioning runtime " + manifest.version() + " from offline archive " + bundle); - try (InputStream in = Files.newInputStream(bundle)) { - Tar.extractTarGz(in, layout.versionDir(), false); + + // Extract into a sibling staging directory and verify it there; only a + // fully verified runtime is renamed onto the version directory. On any + // failure the staging tree is removed, so unverified — and thus + // attacker-influenced — files never persist in, or partially populate, + // the real version directory (mirrors Downloader's verify-then-promote). + Path versionDir = layout.versionDir(); + Path baseDir = versionDir.getParent(); + Path stagingBase = null; + try { + Files.createDirectories(baseDir); + stagingBase = Files.createTempDirectory( + baseDir, versionDir.getFileName() + ".staging-"); + RuntimeLayout staging = + new RuntimeLayout(stagingBase, versionDir.getFileName().toString()); + try (InputStream in = Files.newInputStream(bundle)) { + Tar.extractTarGz(in, staging.versionDir(), false); + } + if (!staging.isVerified(manifest)) { + throw new DownloadException( + "the offline archive did not yield a complete, verified runtime at " + + versionDir); + } + // Promote: clear any prior partial/corrupt runtime, then rename the + // verified staging tree into place (same filesystem ⇒ atomic). + deleteRecursively(versionDir); + Files.move(staging.versionDir(), versionDir); } catch (IOException e) { throw new DownloadException( "could not extract the offline archive: " + e.getMessage(), e); + } finally { + if (stagingBase != null) { + deleteRecursivelyQuietly(stagingBase); + } } - if (!layout.isVerified(manifest)) { - throw new DownloadException( - "the offline archive did not yield a complete, verified runtime at " - + layout.versionDir()); - } - out.println("runtime ready: " + layout.versionDir()); + out.println("runtime ready: " + versionDir); out.flush(); } + + /** Recursively deletes {@code dir} (deepest first); a no-op if absent. */ + private static void deleteRecursively(Path dir) throws IOException { + if (!Files.exists(dir)) { + return; + } + List paths; + try (Stream walk = Files.walk(dir)) { + paths = walk.sorted(Comparator.reverseOrder()).toList(); + } + for (Path p : paths) { + Files.deleteIfExists(p); + } + } + + /** Best-effort recursive delete for staging cleanup — never throws. */ + private static void deleteRecursivelyQuietly(Path dir) { + try { + deleteRecursively(dir); + } catch (IOException ignored) { + // Cleanup failure must not mask the provisioning outcome. + } + } } diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/cli/setup/Tar.java b/src/main/java/io/github/randomcodespace/sonarpredict/cli/setup/Tar.java index 3283e10..770222a 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/cli/setup/Tar.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/cli/setup/Tar.java @@ -13,14 +13,15 @@ import java.util.zip.GZIPInputStream; /** - * Minimal reader for {@code .tar.gz} archives — enough to extract a Temurin - * JRE without pulling in a third-party archive library. + * Minimal reader for {@code .tar.gz} archives — enough to extract the runtime + * and plugin bundles this tool provisions without pulling in a third-party + * archive library. * - *

Supports the USTAR fields the JRE archives use: regular files, directories, - * the file mode (so {@code bin/java} stays executable), and the GNU long-name - * extension ({@code 'L'} type-flag) some archives use for deep paths. Symbolic - * links inside the archive are skipped — the Temurin JRE layout does not rely - * on them for {@code bin/java}. + *

Supports the USTAR fields these archives use: regular files, directories, + * the file mode (so any extracted executable stays runnable), and the GNU + * long-name extension ({@code 'L'} type-flag) some archives use for deep paths. + * Symbolic links inside the archive are skipped — the bundles extracted here do + * not rely on them. * *

Path safety. Each entry path is resolved against the destination * and rejected if it escapes it ({@code zip-slip} / {@code tar-slip}). @@ -39,8 +40,9 @@ private Tar() { /** * Extracts a gzip-compressed tar stream into {@code destDir}, optionally * stripping the archive's single top-level directory component so that, for - * a {@code jdk-17.../bin/java} archive, {@code bin/java} lands directly - * under {@code destDir}. + * an archive wrapping its payload in one top-level folder (e.g. + * {@code bundle-1.2.3/bin/...}), the inner entries land directly under + * {@code destDir}. * * @param in the {@code .tar.gz} stream * @param destDir the directory to extract into @@ -124,36 +126,30 @@ private static String readEntryString(InputStream in, long size) throws IOExcept } /** - * Suppresses {@code java:S2612}: the {@code OTHERS_EXECUTE} bit is mirrored - * from the trusted Temurin JRE tar entry's recorded mode (Temurin ships - * shared binaries as world-executable so any user can run the JRE); we - * are not granting permissions beyond what the verified archive declares. + * Marks {@code file} owner-executable when the archive entry's mode carries + * any execute bit, so an extracted executable stays runnable by its owner. + * The archive's group- and other-execute bits are deliberately not + * mirrored: the daemon runs as the extracting user, owner execute is + * sufficient (see the non-POSIX fallback below, which has always granted + * owner-only), and the extracted bundle is not assumed to be trusted — + * granting world-execute from archive-declared bits would hand an + * attacker-influenced archive an unnecessary privilege. Owner being the only + * execute grant is also why this no longer trips {@code java:S2612}. */ - @SuppressWarnings("java:S2612") private static void applyMode(Path file, int mode) { if ((mode & 0_111) == 0) { - return; // no execute bits set + return; // no execute bits set in the archive entry } try { Set perms = EnumSet.copyOf(Files.getPosixFilePermissions(file)); - if ((mode & 0_100) != 0) { - perms.add(PosixFilePermission.OWNER_EXECUTE); - } - if ((mode & 0_010) != 0) { - perms.add(PosixFilePermission.GROUP_EXECUTE); - } - if ((mode & 0_001) != 0) { - // NOSONAR(java:S2612) the execute bit is mirrored from the - // tar entry's recorded mode (Temurin JRE binaries ship as - // world-executable); we are not granting permissions beyond - // what the trusted archive declares. - perms.add(PosixFilePermission.OTHERS_EXECUTE); - } + // Clamp to owner-only execute regardless of the archive's group/other + // bits — extraction must never widen executability beyond the owner. + perms.add(PosixFilePermission.OWNER_EXECUTE); Files.setPosixFilePermissions(file, perms); } catch (UnsupportedOperationException | IOException notPosix) { - // Owner-only execute is sufficient for our extracted JRE binaries; - // group/other execute are not required for the daemon launcher path. + // Non-POSIX filesystem: owner-only execute is sufficient for our + // extracted binaries; group/other execute are not required. if (!file.toFile().setExecutable(true, true)) { throw new UncheckedIOException(new IOException( "could not mark " + file + " executable " From b1e3614a80f7d1f263a4ebd25a3aeca3e1c767e6 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 07:02:42 +0000 Subject: [PATCH 05/24] fix(scripts): SHA-256-verify dist+JDK before extract; sign release assets (#1, #14) --- .github/workflows/publish.yml | 69 ++++++++++++++-- scripts/sonar-cli.ps1 | 116 +++++++++++++++++++++++++- scripts/sonar-cli.sh | 151 +++++++++++++++++++++++++++++++++- 3 files changed, 326 insertions(+), 10 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 2f2adbd..06abdb7 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -46,12 +46,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout (full history for git archive + release notes) - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 - name: Set up JDK 21 + GPG - uses: actions/setup-java@v4 + uses: actions/setup-java@c5195efecf7bdfc987ee8bae7a71cb8b11521c00 # v4.7.1 with: distribution: temurin java-version: '21' @@ -62,7 +62,7 @@ jobs: gpg-passphrase: MAVEN_GPG_PASSPHRASE - name: Set up Node.js (JS analyzer tests need a Node runtime) - uses: actions/setup-node@v4 + uses: actions/setup-node@cdca7365b2dadb8aad0a33bc7601856ffabcc48e # v4.4.0 with: node-version: '20' @@ -114,6 +114,56 @@ jobs: set -euo pipefail mvn -B -ntp -Prelease clean deploy + - name: Compute SHA256SUMS for release assets + # The bootstrap wrappers (scripts/sonar-cli.{sh,ps1}) verify the dist + # zip against a pinned SHA-256 before extracting. They read the expected + # hash from a sibling SHA256SUMS file at the same base URL, so emit one + # here. Entries use bare filenames (no path) so the wrappers can match + # by artifact name. Signing/attestation is a separate, out-of-scope item. + run: | + set -euo pipefail + VERSION="${{ steps.version.outputs.version }}" + SRC_ZIP="sonar-predict-${VERSION}-src.zip" + DIST_ZIP="target/sonar-predictor-dist-${VERSION}.zip" + if [ ! -f "${DIST_ZIP}" ]; then + echo "::error::distribution bundle not found at ${DIST_ZIP}" + ls -la target || true + exit 1 + fi + : > SHA256SUMS + ( cd "$(dirname "${DIST_ZIP}")" && sha256sum "$(basename "${DIST_ZIP}")" ) >> SHA256SUMS + if [ -f "${SRC_ZIP}" ]; then + sha256sum "${SRC_ZIP}" >> SHA256SUMS + fi + echo "SHA256SUMS:" + cat SHA256SUMS + + - name: GPG-sign release assets (detached, armored) + # Detached signatures over the release assets, using the SAME GPG key + # already imported by setup-java above (gpg-private-key / + # gpg-passphrase). Consumers can verify the dist zip and the checksum + # manifest against the project's public key. This is the GitHub-Release + # counterpart to the Central deploy's maven-gpg-plugin signing — both + # use the same key; neither replaces the other. + env: + MAVEN_GPG_PASSPHRASE: ${{ secrets.MAVEN_GPG_PASSPHRASE }} + run: | + set -euo pipefail + VERSION="${{ steps.version.outputs.version }}" + SRC_ZIP="sonar-predict-${VERSION}-src.zip" + DIST_ZIP="target/sonar-predictor-dist-${VERSION}.zip" + sign() { + gpg --batch --yes --pinentry-mode loopback \ + --passphrase "${MAVEN_GPG_PASSPHRASE}" \ + --armor --detach-sign "$1" + echo "Signed $1 -> $1.asc" + } + sign SHA256SUMS + sign "${DIST_ZIP}" + if [ -f "${SRC_ZIP}" ]; then + sign "${SRC_ZIP}" + fi + - name: Create the GitHub Release env: GH_TOKEN: ${{ github.token }} @@ -132,8 +182,17 @@ jobs: else TAG="v${VERSION}" fi + ASSETS=( + "${SRC_ZIP}" + "${DIST_ZIP}" + "SHA256SUMS" + "SHA256SUMS.asc" + "${DIST_ZIP}.asc" + ) + if [ -f "${SRC_ZIP}.asc" ]; then + ASSETS+=("${SRC_ZIP}.asc") + fi gh release create "${TAG}" \ --title "sonar-predictor ${VERSION}" \ --generate-notes \ - "${SRC_ZIP}" \ - "${DIST_ZIP}" + "${ASSETS[@]}" diff --git a/scripts/sonar-cli.ps1 b/scripts/sonar-cli.ps1 index 73cbd7a..df005fd 100644 --- a/scripts/sonar-cli.ps1 +++ b/scripts/sonar-cli.ps1 @@ -18,9 +18,27 @@ # SONAR_PREDICTOR_HOME Default: $env:USERPROFILE\.sonar-predictor # SONAR_PREDICTOR_FORCE "1" to re-download even if cached # +# Integrity verification (fail-closed by default): +# Every artifact (dist zip, JDK zip) is SHA-256-verified against a pinned +# expected value BEFORE it is extracted or moved into place. This mirrors the +# download->verify->promote contract the Java Downloader enforces, and matters +# most for the JDK: it is the trust root that runs every downstream in-JVM +# check, so an unverified JDK makes those checks moot. +# +# SONAR_DIST_SHA256 Expected lowercase-hex SHA-256 of the dist zip. +# SONAR_JDK_SHA256 Expected lowercase-hex SHA-256 of the JDK zip. +# If unset, the expected hash is read from a sibling checksum file at the SAME +# base URL the artifact came from (SHA256SUMS for the dist; .sha256.txt +# for Adoptium JDKs). +# SONAR_ALLOW_UNVERIFIED "1" to proceed when NO expected hash can be +# obtained. Prints a loud warning. Default (unset) +# refuses — no unverified bytes are extracted. +# # Nexus path conventions (rename if your layout differs): # {base}/sonar-predictor/{version}/sonar-predictor-dist-{version}.zip +# {base}/sonar-predictor/{version}/SHA256SUMS (sibling; lists dist-{version}.zip) # {base}/temurin/21/windows-{arch}.zip +# {base}/temurin/21/windows-{arch}.zip.sha256.txt (Adoptium sibling checksum) # # Usage: powershell -ExecutionPolicy Bypass -File scripts\sonar-cli.ps1 -- @@ -74,6 +92,89 @@ function Fetch([string]$Url, [string]$Target) { Move-Item $tmp $Target } +# Fetch-Optional — like Fetch, but returns $false instead of dying when the URL +# is absent (used for sibling checksum files, which may not exist on every +# mirror). Leaves no partial behind on failure. +function Fetch-Optional([string]$Url, [string]$Target) { + $dir = Split-Path -Parent $Target + New-Item -ItemType Directory -Force -Path $dir | Out-Null + $tmp = "$Target.partial" + if (Test-Path $tmp) { Remove-Item $tmp -Force } + try { + Invoke-WebRequest -Uri $Url -OutFile $tmp -UseBasicParsing -ErrorAction Stop + } catch { + if (Test-Path $tmp) { Remove-Item $tmp -Force } + return $false + } + if (Test-Path $Target) { Remove-Item $Target -Force } + Move-Item $tmp $Target + return $true +} + +# --- Integrity verification ------------------------------------------------ +# Get-Sha256 — lowercase-hex SHA-256 of a file via Get-FileHash (built in). +function Get-Sha256([string]$File) { + return (Get-FileHash -Path $File -Algorithm SHA256).Hash.ToLowerInvariant() +} + +# Get-ChecksumForArtifact — extracts the expected hash for $Name from a +# checksum file. Handles both the multi-line GNU " " SHA256SUMS +# format and a single bare/" " line (Adoptium .sha256.txt). Returns +# the lowercase hash, or $null if not found. +function Get-ChecksumForArtifact([string]$SumsFile, [string]$Name) { + $lines = Get-Content -Path $SumsFile -ErrorAction SilentlyContinue + if (-not $lines) { return $null } + foreach ($line in $lines) { + if ($line -match "[\s\*]$([regex]::Escape($Name))\s*$") { + return ($line -split '\s+')[0].ToLowerInvariant() + } + } + # Single-entry file: first line that begins with a 64-hex-char hash. + foreach ($line in $lines) { + if ($line -match '^\s*([0-9a-fA-F]{64})(\s|$)') { + return $Matches[1].ToLowerInvariant() + } + } + return $null +} + +# Resolve-ExpectedSha — resolves the expected SHA-256 in priority order: +# 1. $Override (explicit env value), if non-empty; +# 2. the sibling checksum file fetched from $SumsUrl. +# Returns the lowercase hash, or $null if none could be obtained. +function Resolve-ExpectedSha([string]$Override, [string]$Name, [string]$SumsUrl, [string]$CacheDir) { + if ($Override) { return $Override.Trim().ToLowerInvariant() } + $sumsFile = Join-Path $CacheDir ([System.IO.Path]::GetFileName($SumsUrl)) + if (Fetch-Optional $SumsUrl $sumsFile) { + return (Get-ChecksumForArtifact $sumsFile $Name) + } + return $null +} + +# Verify-Artifact — verifies $File's SHA-256 against the resolved expected +# value BEFORE any extract/move. On mismatch: deletes $File and dies non-zero, +# naming expected vs actual. If no expected hash is obtainable: dies unless +# SONAR_ALLOW_UNVERIFIED=1, in which case it prints a loud warning and returns. +function Verify-Artifact([string]$File, [string]$Name, [string]$Override, [string]$SumsUrl) { + $cache = Split-Path -Parent $File + $expected = Resolve-ExpectedSha $Override $Name $SumsUrl $cache + if (-not $expected) { + if ($env:SONAR_ALLOW_UNVERIFIED -eq '1') { + Write-Warning "sonar-cli: no SHA-256 available for $Name; proceeding UNVERIFIED" + Write-Warning "sonar-cli: SONAR_ALLOW_UNVERIFIED=1 bypasses integrity checks — this is NOT recommended." + return + } + if (Test-Path $File) { Remove-Item $File -Force } + Die "no expected SHA-256 for $Name (set SONAR_DIST_SHA256/SONAR_JDK_SHA256, publish a sibling checksum file, or set SONAR_ALLOW_UNVERIFIED=1 to override). Refusing to extract unverified bytes." + } + $actual = Get-Sha256 $File + if ($actual -ne $expected) { + if (Test-Path $File) { Remove-Item $File -Force } + Die "SHA-256 mismatch for ${Name}: expected $expected but got $actual. Refusing to extract." + } + Write-Host " verified $Name (sha256 $expected)" +} + # --- Bundle --------------------------------------------------------------- $DistDir = Join-Path $HomeDir "dist\$Version" $BundleDir = Join-Path $DistDir 'sonar-predictor' @@ -82,9 +183,13 @@ $BundleMarker = Join-Path $BundleDir 'bin\sonar.bat' function Ensure-Bundle { if ($env:SONAR_PREDICTOR_FORCE -ne '1' -and (Test-Path $BundleMarker)) { return } Write-Host "sonar-cli: provisioning sonar-predictor $Version into $DistDir" - $zip = Join-Path $HomeDir "cache\sonar-predictor-dist-$Version.zip" - $url = "$NexusBase/sonar-predictor/$Version/sonar-predictor-dist-$Version.zip" + $zip = Join-Path $HomeDir "cache\sonar-predictor-dist-$Version.zip" + $name = "sonar-predictor-dist-$Version.zip" + $url = "$NexusBase/sonar-predictor/$Version/$name" + $sumsUrl = "$NexusBase/sonar-predictor/$Version/SHA256SUMS" Fetch $url $zip + # Verify integrity BEFORE extracting or touching the install location. + Verify-Artifact $zip $name $env:SONAR_DIST_SHA256 $sumsUrl if (Test-Path $DistDir) { Remove-Item $DistDir -Recurse -Force } New-Item -ItemType Directory -Force -Path $DistDir | Out-Null try { @@ -148,8 +253,13 @@ function Ensure-CachedJdk { if ((Test-Path $JdkMarker) -and (Test-JavaMinPlus $JdkMarker)) { return } Write-Host "sonar-cli: provisioning Temurin JDK 21 ($Os-$arch) into $JdkDir" $archive = Join-Path $HomeDir "cache\temurin-21-$Os-$arch.zip" - $url = "$NexusBase/temurin/21/$Os-$arch.zip" + $name = "$Os-$arch.zip" + $url = "$NexusBase/temurin/21/$name" + $sumsUrl = "$url.sha256.txt" Fetch $url $archive + # Verify integrity BEFORE extracting — the JDK is the trust root that runs + # every downstream in-JVM SHA-256 check, so this gate is the critical one. + Verify-Artifact $archive $name $env:SONAR_JDK_SHA256 $sumsUrl $stage = Join-Path $HomeDir "cache\jdk-stage-$PID" if (Test-Path $stage) { Remove-Item $stage -Recurse -Force } diff --git a/scripts/sonar-cli.sh b/scripts/sonar-cli.sh index 9705366..b031ea3 100755 --- a/scripts/sonar-cli.sh +++ b/scripts/sonar-cli.sh @@ -20,9 +20,27 @@ # SONAR_PREDICTOR_HOME Default: $HOME/.sonar-predictor # SONAR_PREDICTOR_FORCE "1" to re-download even if cached # +# Integrity verification (fail-closed by default): +# Every artifact (dist zip, JDK tarball) is SHA-256-verified against a pinned +# expected value BEFORE it is extracted or moved into place. This mirrors the +# download->verify->promote contract the Java Downloader enforces, and matters +# most for the JDK: it is the trust root that runs every downstream in-JVM +# check, so an unverified JDK makes those checks moot. +# +# SONAR_DIST_SHA256 Expected lowercase-hex SHA-256 of the dist zip. +# SONAR_JDK_SHA256 Expected lowercase-hex SHA-256 of the JDK tarball. +# If unset, the expected hash is read from a sibling checksum file at the SAME +# base URL the artifact came from (SHA256SUMS for the dist — see publish.yml; +# .sha256.txt for Adoptium JDKs). +# SONAR_ALLOW_UNVERIFIED "1" to proceed when NO expected hash can be +# obtained. Prints a loud warning. Default (unset) +# refuses — no unverified bytes are extracted. +# # Nexus path conventions (rename via search-replace if your layout differs): # {base}/sonar-predictor/{version}/sonar-predictor-dist-{version}.zip +# {base}/sonar-predictor/{version}/SHA256SUMS (sibling; lists dist-{version}.zip) # {base}/temurin/21/{os}-{arch}.tar.gz (linux, mac) +# {base}/temurin/21/{os}-{arch}.tar.gz.sha256.txt (Adoptium sibling checksum) # {base}/temurin/21/{os}-{arch}.zip (windows — handled by .ps1) # # POSIX sh — no bashisms. Tested with dash, bash, zsh. @@ -72,6 +90,17 @@ fi command -v unzip >/dev/null 2>&1 || die "unzip is required (extracts the dist zip)." command -v tar >/dev/null 2>&1 || die "tar is required (extracts the JDK tarball)." +# SHA-256 tool: sha256sum (Linux) or `shasum -a 256` (macOS). Required to +# verify artifact integrity before extract. +SHA256_TOOL="" +if command -v sha256sum >/dev/null 2>&1; then + SHA256_TOOL="sha256sum" +elif command -v shasum >/dev/null 2>&1; then + SHA256_TOOL="shasum -a 256" +else + die "neither sha256sum nor shasum is on PATH — required to verify artifact integrity." +fi + # --- Download helpers ------------------------------------------------------ # fetch URL TARGET — atomic write via .partial; fails loudly on HTTP error. fetch() { @@ -95,6 +124,115 @@ fetch() { mv "$_tmp" "$_target" } +# fetch_optional URL TARGET — like fetch, but returns non-zero instead of +# dying when the URL is absent (used for sibling checksum files, which may not +# exist on every mirror). Leaves no partial behind on failure. +fetch_optional() { + _url="$1" + _target="$2" + _tmp="${_target}.partial" + mkdir -p "$(dirname "$_target")" + rm -f "$_tmp" + case "$DOWNLOADER" in + curl) + curl --fail --silent --show-error --location \ + --output "$_tmp" "$_url" >/dev/null 2>&1 \ + || { rm -f "$_tmp"; return 1; } + ;; + wget) + wget --quiet --output-document="$_tmp" "$_url" >/dev/null 2>&1 \ + || { rm -f "$_tmp"; return 1; } + ;; + esac + mv "$_tmp" "$_target" +} + +# --- Integrity verification ------------------------------------------------ +# sha256_of FILE — prints the lowercase-hex SHA-256 of FILE using whichever +# standard tool is present (sha256sum on Linux, shasum -a 256 on macOS). +sha256_of() { + _f="$1" + if [ -n "$SHA256_TOOL" ]; then + # Word-splitting of SHA256_TOOL is intentional (e.g. "shasum -a 256"). + # shellcheck disable=SC2086 + $SHA256_TOOL "$_f" | awk '{print $1}' + else + die "no SHA-256 tool available (need sha256sum or shasum)." + fi +} + +# checksum_for_artifact SUMS_FILE ARTIFACT_NAME — extracts the expected hash +# for ARTIFACT_NAME from a checksum file. Handles both the multi-line GNU +# " " SHA256SUMS format and a single bare/" " line as +# Adoptium publishes in .sha256.txt. Prints the lowercase hash, or +# nothing if not found. +checksum_for_artifact() { + _sums="$1" + _name="$2" + # Prefer a line that names the artifact; fall back to a lone bare hash + # (Adoptium's per-file .sha256.txt is sometimes just " "). + _line=$(grep -E "[[:space:]]\*?${_name}\$" "$_sums" 2>/dev/null | head -n 1) + if [ -z "$_line" ]; then + # Single-entry file: take the first field of the first non-empty line. + _line=$(grep -E '^[0-9a-fA-F]{64}([[:space:]]|$)' "$_sums" 2>/dev/null | head -n 1) + fi + [ -n "$_line" ] || return 0 + printf '%s' "$_line" | awk '{print $1}' | tr 'A-Z' 'a-z' +} + +# expected_sha ENV_OVERRIDE ARTIFACT_URL ARTIFACT_NAME SUMS_URL CACHE_DIR +# Resolves the expected SHA-256 in priority order: +# 1. ENV_OVERRIDE (already-resolved value passed by caller), if non-empty; +# 2. the sibling checksum file fetched from SUMS_URL. +# Prints the lowercase-hex hash on success, or nothing if none could be +# obtained. Never extracts/promotes anything. +expected_sha() { + _override="$1" + _name="$3" + _sums_url="$4" + _cache="$5" + if [ -n "$_override" ]; then + printf '%s' "$_override" | tr 'A-Z' 'a-z' + return 0 + fi + _sums_file="$_cache/$(basename "$_sums_url")" + if fetch_optional "$_sums_url" "$_sums_file"; then + checksum_for_artifact "$_sums_file" "$_name" + fi +} + +# verify_artifact FILE ARTIFACT_URL ARTIFACT_NAME ENV_OVERRIDE SUMS_URL +# Verifies FILE's SHA-256 against the resolved expected value BEFORE any +# extract/move. On mismatch: deletes FILE and dies non-zero, naming expected +# vs actual. If no expected hash is obtainable: dies unless +# SONAR_ALLOW_UNVERIFIED=1, in which case it prints a loud warning and +# returns. Never returns success on a mismatch. +verify_artifact() { + _file="$1" + _art_url="$2" + _name="$3" + _override="$4" + _sums_url="$5" + _cache=$(dirname "$_file") + + _exp=$(expected_sha "$_override" "$_art_url" "$_name" "$_sums_url" "$_cache") + if [ -z "$_exp" ]; then + if [ "${SONAR_ALLOW_UNVERIFIED:-}" = "1" ]; then + echo "sonar-cli: WARNING: no SHA-256 available for $_name; proceeding UNVERIFIED" >&2 + echo "sonar-cli: WARNING: SONAR_ALLOW_UNVERIFIED=1 bypasses integrity checks — this is NOT recommended." >&2 + return 0 + fi + rm -f "$_file" + die "no expected SHA-256 for $_name (set SONAR_DIST_SHA256/SONAR_JDK_SHA256, publish a sibling checksum file, or set SONAR_ALLOW_UNVERIFIED=1 to override). Refusing to extract unverified bytes." + fi + _act=$(sha256_of "$_file") + if [ "$_act" != "$_exp" ]; then + rm -f "$_file" + die "SHA-256 mismatch for $_name: expected $_exp but got $_act. Refusing to extract." + fi + echo " verified $_name (sha256 $_exp)" +} + # --- Bundle: ensure ~/.sonar-predictor/dist//sonar-predictor/bin/sonar -- DIST_DIR="$HOME_DIR/dist/$VERSION" BUNDLE_DIR="$DIST_DIR/sonar-predictor" @@ -106,8 +244,12 @@ ensure_bundle() { fi echo "sonar-cli: provisioning sonar-predictor $VERSION into $DIST_DIR" _zip="$HOME_DIR/cache/sonar-predictor-dist-$VERSION.zip" - _url="$NEXUS_BASE/sonar-predictor/$VERSION/sonar-predictor-dist-$VERSION.zip" + _name="sonar-predictor-dist-$VERSION.zip" + _url="$NEXUS_BASE/sonar-predictor/$VERSION/$_name" + _sums_url="$NEXUS_BASE/sonar-predictor/$VERSION/SHA256SUMS" fetch "$_url" "$_zip" + # Verify integrity BEFORE extracting or touching the install location. + verify_artifact "$_zip" "$_url" "$_name" "${SONAR_DIST_SHA256:-}" "$_sums_url" rm -rf "$DIST_DIR" mkdir -p "$DIST_DIR" unzip -q "$_zip" -d "$DIST_DIR" || die "could not extract $_zip" @@ -169,8 +311,13 @@ ensure_cached_jdk() { fi echo "sonar-cli: provisioning Temurin JDK 21 ($OS-$ARCH) into $JDK_DIR" _archive="$HOME_DIR/cache/temurin-21-$OS-$ARCH.tar.gz" - _url="$NEXUS_BASE/temurin/21/$OS-$ARCH.tar.gz" + _name="$OS-$ARCH.tar.gz" + _url="$NEXUS_BASE/temurin/21/$_name" + _sums_url="$_url.sha256.txt" fetch "$_url" "$_archive" + # Verify integrity BEFORE extracting — the JDK is the trust root that runs + # every downstream in-JVM SHA-256 check, so this gate is the critical one. + verify_artifact "$_archive" "$_url" "$_name" "${SONAR_JDK_SHA256:-}" "$_sums_url" # Extract into a staging dir, then move the (single) top-level JDK dir # contents into $JDK_DIR. Temurin tarballs nest one level (jdk-21.0.5+11/), From 559e0b4e38ae2c80ffc0f0a006ad43f27dcd55eb Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 07:02:42 +0000 Subject: [PATCH 06/24] ci: CVE/SBOM gate, dependabot, test+job timeouts, pin actions to SHAs (#4, #6, #16) --- .github/dependabot.yml | 36 +++++++ .github/workflows/ci.yml | 103 ++++++++++++++++++- .github/workflows/parity.yml | 13 +-- .github/workflows/sonar.yml | 10 +- pom.xml | 56 ++++++++++ src/test/resources/junit-platform.properties | 11 ++ 6 files changed, 215 insertions(+), 14 deletions(-) create mode 100644 .github/dependabot.yml create mode 100644 src/test/resources/junit-platform.properties diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..455dde1 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,36 @@ +# Dependabot keeps the Maven dependency tree and the GitHub Actions used in +# our workflows up to date. Weekly cadence keeps PR noise low for a project +# whose third-party surface (Jackson, picocli, SonarSource analyzers) moves +# slowly. Security updates from Dependabot complement the osv-scanner CVE +# gate in ci.yml: the gate blocks, Dependabot proposes the fix. +version: 2 +updates: + - package-ecosystem: maven + directory: "/" + schedule: + interval: weekly + day: monday + open-pull-requests-limit: 5 + labels: + - dependencies + commit-message: + prefix: "chore(deps)" + groups: + # Batch routine version bumps into one PR per ecosystem to cut churn; + # security updates still come through individually. + maven-minor-patch: + update-types: + - minor + - patch + + - package-ecosystem: github-actions + directory: "/" + schedule: + interval: weekly + day: monday + open-pull-requests-limit: 5 + labels: + - dependencies + - github-actions + commit-message: + prefix: "chore(ci)" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2c11fe9..1cca48a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,19 +12,20 @@ on: jobs: test: runs-on: ubuntu-latest + timeout-minutes: 30 steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Set up JDK 21 - uses: actions/setup-java@v4 + uses: actions/setup-java@c5195efecf7bdfc987ee8bae7a71cb8b11521c00 # v4.7.1 with: distribution: temurin java-version: '21' cache: maven - name: Set up Node.js (JS/TS analyzer tests need a Node runtime) - uses: actions/setup-node@v4 + uses: actions/setup-node@cdca7365b2dadb8aad0a33bc7601856ffabcc48e # v4.4.0 with: node-version: '20' @@ -32,3 +33,99 @@ jobs: # `verify`, not `test`: the cli integration tests spawn the daemon and # need its shaded fat jar, which is only built at the package phase. run: mvn -B -ntp verify + + # Dependency CVE gate + SBOM. Runs independently of `test` so neither blocks + # the other. The SBOM is produced via the `-Psbom` Maven profile (off by + # default — a plain `mvn verify` never touches it), then scanned offline with + # osv-scanner against a cached vulnerability DB. Gate policy (security.md): + # HIGH/CRITICAL (CVSS base score >= 7.0) hard-fail the build; Medium/Low are + # surfaced in the logs but do NOT block (documented, not gated). + security: + runs-on: ubuntu-latest + timeout-minutes: 30 + permissions: + contents: read + env: + # osv-scanner reads its local DB from here; we cache this directory so + # the NVD/OSV feed is fetched at most once per week, never per-run, and + # the scan itself runs with --offline-vulnerabilities (no live feed). + OSV_SCANNER_LOCAL_DB_CACHE_DIRECTORY: ${{ github.workspace }}/.osv-db + OSV_SCANNER_VERSION: v2.3.8 + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Set up JDK 21 + uses: actions/setup-java@c5195efecf7bdfc987ee8bae7a71cb8b11521c00 # v4.7.1 + with: + distribution: temurin + java-version: '21' + cache: maven + + - name: Generate CycloneDX SBOM + # `package`, because the SBOM aggregates the resolved dependency graph; + # -Psbom is the only thing that activates the cyclonedx plugin. Skip + # tests here — the `test` job already runs them; we only need the graph. + run: mvn -B -ntp -Psbom -DskipTests package + # Plugins/ jars are fetched as part of package; resolution stays inside + # Maven (settings.xml mirrors / Nexus / proxy apply), as elsewhere. + + - name: Cache OSV vulnerability database + uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 + with: + path: ${{ env.OSV_SCANNER_LOCAL_DB_CACHE_DIRECTORY }} + # Rotate weekly so the offline DB stays reasonably fresh without + # re-downloading on every run. + key: osv-db-${{ env.OSV_SCANNER_VERSION }}-${{ github.run_id }} + restore-keys: | + osv-db-${{ env.OSV_SCANNER_VERSION }}- + + - name: Install osv-scanner (pinned) + run: | + set -euo pipefail + mkdir -p "$RUNNER_TEMP/osv" + curl -fsSL -o "$RUNNER_TEMP/osv/osv-scanner" \ + "https://github.com/google/osv-scanner/releases/download/${OSV_SCANNER_VERSION}/osv-scanner_linux_amd64" + chmod +x "$RUNNER_TEMP/osv/osv-scanner" + echo "$RUNNER_TEMP/osv" >> "$GITHUB_PATH" + + - name: Scan dependencies (offline DB) and gate on HIGH/CRITICAL + run: | + set -uo pipefail + mkdir -p "$OSV_SCANNER_LOCAL_DB_CACHE_DIRECTORY" + # outputName uses ${project.version}; resolve the actual emitted file. + SBOM=$(ls target/*-bom.json | head -n1) + echo "Scanning SBOM: $SBOM" + # --offline-vulnerabilities: never contacts the live feed during scan. + # --download-offline-databases: refresh the cached DB if cache missed. + # osv-scanner exits 1 on ANY finding; we ignore its exit code and gate + # ourselves on CVSS severity from the JSON instead. + osv-scanner scan \ + --offline-vulnerabilities \ + --download-offline-databases \ + --format json \ + --sbom "$SBOM" > osv-results.json || true + # Gate: max_severity is a CVSS base-score string per vulnerability + # group. >= 7.0 == HIGH or CRITICAL (CVSS v3). Anything below is + # non-blocking per security.md. An unscored group (null or "") is + # coerced to 0 so a missing CVSS never crashes or trips the gate. + SEV='((.max_severity // "0") | if . == "" then "0" else . end | tonumber)' + HIGH=$(jq "[.results[]?.packages[]?.groups[]? | ${SEV} | select(. >= 7.0)] | length" osv-results.json) + echo "HIGH/CRITICAL findings (CVSS >= 7.0): ${HIGH}" + jq -r ".results[]?.packages[]? | .package as \$p | .groups[]? | ${SEV} as \$s | select(\$s >= 7.0) | \" BLOCK \(\$p.name)@\(\$p.version) CVSS=\(.max_severity) \(.ids | join(\",\"))\"" osv-results.json || true + if [ "${HIGH:-0}" -gt 0 ]; then + echo "::error::${HIGH} HIGH/CRITICAL dependency vulnerabilit(ies) found — failing per security policy." + exit 1 + fi + echo "No HIGH/CRITICAL dependency vulnerabilities. (Medium/Low, if any, are non-blocking.)" + + - name: Upload SBOM + scan results + if: always() + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: sbom-and-cve-scan + path: | + target/*-bom.json + target/*-bom.xml + osv-results.json + if-no-files-found: warn diff --git a/.github/workflows/parity.yml b/.github/workflows/parity.yml index cc73b57..dcd6d1d 100644 --- a/.github/workflows/parity.yml +++ b/.github/workflows/parity.yml @@ -35,6 +35,7 @@ jobs: parity: name: Scan parity runs-on: ubuntu-latest + timeout-minutes: 45 # Skip when the token isn't reachable (fork PRs) — the standalone # self-scan workflow still gates fork PRs on our daemon's findings. if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }} @@ -45,23 +46,23 @@ jobs: MAVEN_OPTS: -Xmx2g steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 - name: Set up JDK 21 - uses: actions/setup-java@v4 + uses: actions/setup-java@c5195efecf7bdfc987ee8bae7a71cb8b11521c00 # v4.7.1 with: distribution: temurin java-version: '21' - name: Set up Node.js 20 - uses: actions/setup-node@v4 + uses: actions/setup-node@cdca7365b2dadb8aad0a33bc7601856ffabcc48e # v4.4.0 with: node-version: '20' - name: Cache Maven repository - uses: actions/cache@v4 + uses: actions/cache@v4 # TODO(pin): resolve SHA for actions/cache@v4 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} @@ -69,7 +70,7 @@ jobs: ${{ runner.os }}-maven- - name: Cache SonarQube Cloud packages - uses: actions/cache@v4 + uses: actions/cache@v4 # TODO(pin): resolve SHA for actions/cache@v4 with: path: ~/.sonar/cache key: ${{ runner.os }}-sonar @@ -195,7 +196,7 @@ jobs: - name: Upload scan + parity artifacts if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v4 # TODO(pin): resolve SHA for actions/upload-artifact@v4 with: name: scan-parity-${{ github.run_id }} path: | diff --git a/.github/workflows/sonar.yml b/.github/workflows/sonar.yml index e521b74..fe49f1a 100644 --- a/.github/workflows/sonar.yml +++ b/.github/workflows/sonar.yml @@ -35,14 +35,14 @@ jobs: # fetch-depth: 0 keeps the full history available so we can switch to # `--diff`-style semantics later without re-checking out the repo. - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 # JDK 21 is the project's build/runtime target (required by # sonarlint-analysis-engine 11.x / LTA 2026.1). Temurin is the safe default. - name: Set up JDK 21 - uses: actions/setup-java@v4 + uses: actions/setup-java@c5195efecf7bdfc987ee8bae7a71cb8b11521c00 # v4.7.1 with: distribution: temurin java-version: '21' @@ -50,7 +50,7 @@ jobs: # The JS/TS analyzer plugin spawns Node at runtime to lint JS/TS sources, # so Node must be on PATH when the scan runs (not just at build time). - name: Set up Node.js 20 - uses: actions/setup-node@v4 + uses: actions/setup-node@cdca7365b2dadb8aad0a33bc7601856ffabcc48e # v4.4.0 with: node-version: '20' @@ -58,7 +58,7 @@ jobs: # dependency change invalidates cleanly; restore-keys lets a partial # cache hit still seed most of ~/.m2. - name: Cache Maven repository - uses: actions/cache@v4 + uses: actions/cache@v4 # TODO(pin): resolve SHA for actions/cache@v4 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} @@ -146,7 +146,7 @@ jobs: # cover a typical PR review cycle without pinning storage forever. - name: Upload scan JSON if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v4 # TODO(pin): resolve SHA for actions/upload-artifact@v4 with: name: sonar-scan-${{ github.run_id }} path: .sonar-predictor/scan.json diff --git a/pom.xml b/pom.xml index 1fa3a28..021e6ca 100644 --- a/pom.xml +++ b/pom.xml @@ -192,6 +192,22 @@ maven-compiler-plugin + + + org.apache.maven.plugins + maven-surefire-plugin + + 900 + + + + + sbom + + + + org.cyclonedx + cyclonedx-maven-plugin + 2.9.1 + + + make-bom + package + makeAggregateBom + + + + sonar-predictor-${project.version}-bom + + all + application + 1.5 + + + + + + + 2026-01-01T00:00:00Z 2.17.2 5.10.2 From 208bf2235f3e796552a2c81c56df00d18fbab8f6 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 07:31:29 +0000 Subject: [PATCH 13/24] build: enforce 0.80 line-coverage floor via JaCoCo check (was docs-only) (#19) --- pom.xml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pom.xml b/pom.xml index ef10708..925b7fd 100644 --- a/pom.xml +++ b/pom.xml @@ -278,6 +278,32 @@ verify report + + + check + verify + check + + + + BUNDLE + + + LINE + COVEREDRATIO + 0.80 + + + + + + From 62e1360cd9c1963c2cdac955905b648681e03040 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 07:40:30 +0000 Subject: [PATCH 14/24] fix(cli): report the real build version in --version, version cmd, and SARIF (#15) --- .../sonarpredict/cli/SarifReporter.java | 2 +- .../sonarpredict/cli/SonarCommand.java | 6 +-- .../cli/SonarVersionProvider.java | 38 +++++++++++++++++++ .../sonarpredict/cli/CommandTest.java | 18 ++++++++- .../sonarpredict/cli/SarifReporterTest.java | 5 +++ 5 files changed, 63 insertions(+), 6 deletions(-) create mode 100644 src/main/java/io/github/randomcodespace/sonarpredict/cli/SonarVersionProvider.java diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/cli/SarifReporter.java b/src/main/java/io/github/randomcodespace/sonarpredict/cli/SarifReporter.java index dffd90c..695645c 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/cli/SarifReporter.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/cli/SarifReporter.java @@ -48,7 +48,7 @@ public String render(AnalyzeResponse response, RuleMetadataIndex index, ObjectNode driver = tool.putObject("driver"); driver.put("name", "sonar-predictor"); driver.put("informationUri", "https://github.com/sonarcli/sonar-predictor"); - driver.put("version", SonarCommand.VERSION); + driver.put("version", SonarVersionProvider.version()); // Distinct rule keys, in first-seen order, become driver.rules[]. Set ruleKeys = new LinkedHashSet<>(); diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/cli/SonarCommand.java b/src/main/java/io/github/randomcodespace/sonarpredict/cli/SonarCommand.java index a006f88..713bb27 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/cli/SonarCommand.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/cli/SonarCommand.java @@ -35,7 +35,7 @@ @Command( name = "sonar", mixinStandardHelpOptions = true, - version = "sonar " + SonarCommand.VERSION, + versionProvider = SonarVersionProvider.class, description = { "Run the sonar analysis quality gate.", "", @@ -85,8 +85,6 @@ public final class SonarCommand implements Runnable { /** Exit code for a tool error — bad input, daemon unreachable. */ public static final int EXIT_TOOL_ERROR = 2; - static final String VERSION = "0.1.0-SNAPSHOT"; - @Option(names = "--format", description = "Output format: sarif, json, or text. Default: sarif.") private OutputFormat format = OutputFormat.SARIF; @@ -362,7 +360,7 @@ static final class VersionCommand implements Runnable { @Override public void run() { - parent.spec.commandLine().getOut().println("sonar " + VERSION); + parent.spec.commandLine().getOut().println("sonar " + SonarVersionProvider.version()); } } diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/cli/SonarVersionProvider.java b/src/main/java/io/github/randomcodespace/sonarpredict/cli/SonarVersionProvider.java new file mode 100644 index 0000000..773db4c --- /dev/null +++ b/src/main/java/io/github/randomcodespace/sonarpredict/cli/SonarVersionProvider.java @@ -0,0 +1,38 @@ +package io.github.randomcodespace.sonarpredict.cli; + +import picocli.CommandLine.IVersionProvider; + +/** + * Supplies the CLI version at runtime for picocli's {@code --version} option and + * for every other surface that reports the tool version (the {@code version} + * subcommand and the SARIF {@code tool.driver.version}). + * + *

Resolved from the build, mirroring the daemon's own version resolution: the + * {@code Implementation-Version} of this class's package, populated from the + * Maven artifact version when the CLI runs from its packaged (shaded) JAR — the + * shade manifest sets {@code Implementation-Version} to {@code ${project.version}}, + * so the CLI and daemon report the same number. In a test/exploded-classes run no + * manifest is present, so a {@code dev} fallback is used and the value is never + * {@code null}. + * + *

This lives in the {@code cli} package — not borrowed from the daemon — to + * keep the cli↛daemon boundary intact; it merely replicates the same minimal + * manifest-read mechanism. + */ +public final class SonarVersionProvider implements IVersionProvider { + + private static final String FALLBACK = "0.3.0-dev"; + + /** The current CLI version, never {@code null}. */ + public static String version() { + Package pkg = SonarVersionProvider.class.getPackage(); + String version = pkg != null ? pkg.getImplementationVersion() : null; + return version != null && !version.isBlank() ? version : FALLBACK; + } + + /** picocli's {@code --version} contract: the lines to print. */ + @Override + public String[] getVersion() { + return new String[] {"sonar " + version()}; + } +} diff --git a/src/test/java/io/github/randomcodespace/sonarpredict/cli/CommandTest.java b/src/test/java/io/github/randomcodespace/sonarpredict/cli/CommandTest.java index 63c4206..364f705 100644 --- a/src/test/java/io/github/randomcodespace/sonarpredict/cli/CommandTest.java +++ b/src/test/java/io/github/randomcodespace/sonarpredict/cli/CommandTest.java @@ -125,12 +125,28 @@ private static Issue issue(String file, String ruleKey, String severity) { } @Test - @DisplayName("version prints the CLI version and exits 0") + @DisplayName("version prints the runtime CLI version and exits 0") void versionExitsZero() { Run run = run(rpc(), control(), "version"); assertEquals(0, run.exitCode(), "version must exit 0"); assertFalse(run.out().isBlank(), "version must print something"); + assertTrue(run.out().contains(SonarVersionProvider.version()), + "version must report the runtime build version, got: " + run.out()); + assertFalse(run.out().contains("0.1.0"), + "version must not report the stale 0.1.0 literal, got: " + run.out()); + } + + @Test + @DisplayName("--version reports the runtime CLI version (not the stale literal)") + void versionFlagReportsRuntimeVersion() { + Run run = run(rpc(), control(), "--version"); + + assertEquals(0, run.exitCode(), "--version must exit 0"); + assertTrue(run.out().contains(SonarVersionProvider.version()), + "--version must report the runtime build version, got: " + run.out()); + assertFalse(run.out().contains("0.1.0"), + "--version must not report the stale 0.1.0 literal, got: " + run.out()); } @Test diff --git a/src/test/java/io/github/randomcodespace/sonarpredict/cli/SarifReporterTest.java b/src/test/java/io/github/randomcodespace/sonarpredict/cli/SarifReporterTest.java index e1ed141..045def6 100644 --- a/src/test/java/io/github/randomcodespace/sonarpredict/cli/SarifReporterTest.java +++ b/src/test/java/io/github/randomcodespace/sonarpredict/cli/SarifReporterTest.java @@ -1,6 +1,7 @@ package io.github.randomcodespace.sonarpredict.cli; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -63,6 +64,10 @@ void sarifHasOneRunWithDriver() throws Exception { assertEquals("sonar-predictor", driver.get("name").asText(), "the driver name must be sonar-predictor"); assertNotNull(driver.get("rules"), "the driver must carry a 'rules' array"); + assertEquals(SonarVersionProvider.version(), driver.get("version").asText(), + "the driver version must be the runtime build version"); + assertNotEquals("0.1.0-SNAPSHOT", driver.get("version").asText(), + "the driver version must not report the stale 0.1.0 literal"); } @Test From 03d5463e97712396b25754cd22220f267ff09eb9 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 07:47:38 +0000 Subject: [PATCH 15/24] fix(daemon): reset EngineLog per scan to stop unbounded growth across the daemon's life (#9) --- .../sonarpredict/daemon/AnalysisService.java | 8 ++++ .../sonarpredict/daemon/EngineLog.java | 21 ++++++++++ .../sonarpredict/daemon/EngineLogTest.java | 42 +++++++++++++++++++ 3 files changed, 71 insertions(+) diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/AnalysisService.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/AnalysisService.java index fa0f5b4..c8db8ba 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/AnalysisService.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/AnalysisService.java @@ -291,6 +291,14 @@ private AnalyzeResponse analyzeLocked(AnalyzeRequest request) { Set.of(), Map.of()); + // Clear the engine log before posting. One EngineLog lives for the + // whole daemon; left alone its backing list grows for every log line of + // every scan across the daemon's ~50-minute life — an unbounded leak. + // Resetting here bounds it to this single analysis. Safe: every analysis + // holds analysisLock and the prior scan's await() returned before the + // lock was released, so no engine worker thread is appending right now. + engineLog.reset(); + // Snapshot the engine log before posting: every analysis runs holding // analysisLock, so the messages appended between here and the result // belong to exactly this analysis. They are scanned below for a diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/EngineLog.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/EngineLog.java index ae154eb..287dd26 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/EngineLog.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/EngineLog.java @@ -72,6 +72,27 @@ public void log(String formattedMessage, Level level) { } } + /** + * Discards every message collected so far, scoping this target to the lines + * received from now on. + * + *

A single {@code EngineLog} lives for the whole daemon and receives a + * line for every engine log message of every scan. Without this reset the + * backing list grows unbounded across the daemon's lifetime — hundreds of + * scans over its ~50-minute life — a memory leak. {@code AnalysisService} + * calls it at the start of each analysis (holding its analysis lock, before + * the engine worker thread is posted), so the list is bounded to one scan's + * lines. + * + *

{@code clear()} on the backing {@link CopyOnWriteArrayList} is atomic + * and publishes through the same volatile array as {@link #log}, so the + * reset cannot race or tear against an {@link #log add} from the engine + * worker thread. + */ + public void reset() { + messages.clear(); + } + /** Messages received by this instance, in arrival order. */ public List messages() { return List.copyOf(messages); diff --git a/src/test/java/io/github/randomcodespace/sonarpredict/daemon/EngineLogTest.java b/src/test/java/io/github/randomcodespace/sonarpredict/daemon/EngineLogTest.java index 8c99ba3..9307731 100644 --- a/src/test/java/io/github/randomcodespace/sonarpredict/daemon/EngineLogTest.java +++ b/src/test/java/io/github/randomcodespace/sonarpredict/daemon/EngineLogTest.java @@ -1,9 +1,11 @@ package io.github.randomcodespace.sonarpredict.daemon; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.util.List; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; import org.sonarsource.sonarlint.core.commons.log.LogOutput; @@ -27,4 +29,44 @@ void engineLog_collectsMessages() { assertFalse(log.messages().isEmpty()); assertTrue(log.messages().contains("hello engine")); } + + @Test + @DisplayName("reset discards earlier messages so a single scan still sees its own lines") + void reset_thenLog_keepsCurrentScanLines() { + EngineLog log = new EngineLog(); + log.log("Error executing sensor: 'JavaScript/TypeScript analysis'", + LogOutput.Level.ERROR); + + // Start of a scan: discard whatever a prior scan left behind. + log.reset(); + assertTrue(log.messages().isEmpty()); + + // The current scan's lines are still fully captured (equivalence: a + // single scan that logs a warning surfaces exactly that warning). + log.log("Error executing sensor: 'CSS Rules'", LogOutput.Level.ERROR); + + assertEquals( + List.of("Error executing sensor: 'CSS Rules'"), + log.messages()); + } + + @Test + @DisplayName("messages() holds only the last scan's lines across many reset scans (leak guard)") + void reset_acrossManyScans_doesNotAccumulate() { + EngineLog log = new EngineLog(); + + // Three simulated scans, each resetting at its start and adding two + // lines. Pre-fix the backing list grew to 6 lines (the daemon-lifetime + // leak); with reset() it stays bounded to one scan's two lines. + for (int scan = 0; scan < 3; scan++) { + log.reset(); + log.log("scan " + scan + " line A", LogOutput.Level.INFO); + log.log("scan " + scan + " line B", LogOutput.Level.INFO); + } + + assertEquals( + List.of("scan 2 line A", "scan 2 line B"), + log.messages(), + "messages() must hold only the last scan's lines, not the accumulation"); + } } From 03be9a96510e95869b2f255a9c3643625cf0916b Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 08:00:07 +0000 Subject: [PATCH 16/24] feat(cli): capture daemon child output to daemon.log and surface the tail on startup failure (#3) --- .../sonarpredict/cli/DaemonLauncher.java | 78 +++++++++++++++++-- .../sonarpredict/daemon/Daemon.java | 10 ++- .../sonarpredict/cli/DaemonLauncherTest.java | 45 +++++++++++ 3 files changed, 125 insertions(+), 8 deletions(-) diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncher.java b/src/main/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncher.java index 894c464..cc2db37 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncher.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncher.java @@ -95,6 +95,9 @@ public final class DaemonLauncher { private static final String DAEMON_JAR_PREFIX = "sonar-predictor-daemon-"; + /** Number of trailing {@code daemon.log} lines surfaced on a failed start. */ + private static final int LOG_TAIL_LINES = 20; + private final SocketPaths paths; private final Duration startupTimeout; @@ -200,8 +203,17 @@ private Process spawn() { // The daemon resolves SocketPaths from its environment; force it onto // the same runtime directory this launcher expects. builder.environment().put("XDG_RUNTIME_DIR", paths.socket().getParent().toString()); - builder.redirectOutput(ProcessBuilder.Redirect.DISCARD); - builder.redirectError(ProcessBuilder.Redirect.DISCARD); + // Capture the detached child's stdout+stderr to a log file in the same + // runtime directory. A file redirect (not PIPE) is mandatory: the child + // is long-lived and detached, so there is no drain thread to consume a + // pipe — an unconsumed PIPE would eventually block the daemon. On the + // failure path awaitSocket() reads this log's tail to surface the + // child's real stack trace instead of a bare exit code. The daemon + // removes the log on stop (see Daemon.start's onStop), so a clean + // start/stop cycle leaves the runtime directory just as before. + Path log = logFile(); + builder.redirectOutput(ProcessBuilder.Redirect.to(log.toFile())); + builder.redirectErrorStream(true); try { return builder.start(); } catch (IOException e) { @@ -209,6 +221,11 @@ private Process spawn() { } } + /** The daemon's startup log, beside the socket in the runtime directory. */ + private Path logFile() { + return paths.socket().getParent().resolve("daemon.log"); + } + /** * Builds the JVM command line that launches the daemon. * @@ -303,9 +320,21 @@ private void awaitSocket(Process process) { return; } if (!process.isAlive() && !isDaemonRunning()) { - throw new IllegalStateException( - "daemon process exited (code " + process.exitValue() - + ") before its socket began accepting connections"); + int code = process.exitValue(); + String message = "daemon process exited (code " + code + + ") before its socket began accepting connections"; + if (code == 0) { + // Clean exit with no socket: the child detected a live + // daemon and bowed out (lost the start race). Not a crash — + // throw the bare message, no log tail (the log would only + // hold the benign "another daemon already owns the socket" + // notice, never a failure). + throw new IllegalStateException(message); + } + // Genuine crash: the child JVM exited non-zero before binding. + // Surface the captured log tail so an air-gapped user sees the + // real stack trace instead of a bare exit code. + throw failureWithDiagnostics(message); } try { Thread.sleep(20); @@ -314,10 +343,47 @@ private void awaitSocket(Process process) { throw new IllegalStateException("interrupted while waiting for the daemon", e); } } - throw new IllegalStateException( + throw failureWithDiagnostics( "daemon socket did not start accepting within " + startupTimeout); } + /** + * Builds an {@link IllegalStateException} for a genuine startup failure, + * appending the tail of the captured {@code daemon.log} when available so + * the child's real error is visible. The diagnostics path is best-effort + * and never throws: a missing or unreadable log just yields the bare + * {@code message}. When {@code SONAR_DEBUG} is set the same detail is also + * echoed to {@code stderr} for visibility in non-fatal flows. + */ + private IllegalStateException failureWithDiagnostics(String message) { + String tail = readLogTail(); + String detail = tail.isEmpty() + ? message + : message + System.lineSeparator() + + "----- daemon.log (tail) -----" + System.lineSeparator() + + tail; + if (!tail.isEmpty() && System.getenv("SONAR_DEBUG") != null) { + System.err.println(detail); + } + return new IllegalStateException(detail); + } + + /** Last {@value #LOG_TAIL_LINES} lines of {@code daemon.log}, or "" if absent. */ + private String readLogTail() { + Path log = logFile(); + try { + if (!Files.isReadable(log)) { + return ""; + } + List lines = Files.readAllLines(log); + int from = Math.max(0, lines.size() - LOG_TAIL_LINES); + return String.join(System.lineSeparator(), lines.subList(from, lines.size())); + } catch (IOException | RuntimeException unreadable) { + // Diagnostics must never throw — fall back to no tail. + return ""; + } + } + private static String javaExecutable() { String configured = System.getProperty(JAVA_EXE_PROPERTY); if (configured != null && !configured.isBlank()) { diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/Daemon.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/Daemon.java index 8997bfd..6181844 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/Daemon.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/Daemon.java @@ -112,8 +112,9 @@ public static Daemon start(SocketPaths paths, Duration idleTimeout) { // shutdown hook — routes through DaemonServer.stop(), which runs // this onStop callback before releasing awaitStopped(). The // callback closes the warm engine (deleting its work directory), - // removes the pidfile, and releases the startup lock, so no exit - // path can leave any of them behind or strand the lock held. + // removes the pidfile and the launcher's startup log, and releases + // the startup lock, so no exit path can leave any of them behind or + // strand the lock held. server.setOnStop(() -> { engine.close(); try { @@ -122,6 +123,11 @@ public static Daemon start(SocketPaths paths, Duration idleTimeout) { // Best effort — a stale pidfile is detected and overwritten // on the next start(). } + // Remove the launcher-written startup log so a clean start/stop + // leaves the runtime directory exactly as it was found. The + // name is a literal: the daemon module must not depend on the + // cli module's DaemonLauncher — both ends agree on "daemon.log". + deleteQuietly(paths.socket().getParent().resolve("daemon.log")); releaseLock(startupLock, lockChannel); }); diff --git a/src/test/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncherTest.java b/src/test/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncherTest.java index eab7a66..be20d2f 100644 --- a/src/test/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncherTest.java +++ b/src/test/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncherTest.java @@ -3,6 +3,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import java.net.UnixDomainSocketAddress; @@ -242,6 +243,50 @@ void daemonJarPropertyHonoredForBundledLayout(@TempDir Path dir) throws Exceptio } } + @Test + @DisplayName("a child that crashes before binding surfaces the captured daemon.log tail") + void startFailureSurfacesChildLogTail(@TempDir Path dir) throws Exception { + // Drive a deterministic startup failure: point sonar.daemon.jar at a + // file that is a regular file (so spawn()'s isRegularFile guard passes) + // but is NOT a runnable jar, so the child JVM exits non-zero writing + // "Invalid or corrupt jarfile" to stderr — captured into daemon.log. + Path notAJar = Files.writeString(dir.resolve("not-a.jar"), "this is not a jar\n"); + Path workDir = Files.createDirectories(dir.resolve("work")); + Files.createDirectories(workDir.resolve("plugins")); + + SocketPaths paths = SocketPaths.resolve(Map.of("XDG_RUNTIME_DIR", dir.toString())); + DaemonLauncher launcher = new DaemonLauncher(paths, Duration.ofSeconds(30)); + + String prevJar = System.getProperty(DaemonLauncher.DAEMON_JAR_PROPERTY); + String prevWorkDir = System.getProperty(DaemonLauncher.DAEMON_WORKDIR_PROPERTY); + System.setProperty(DaemonLauncher.DAEMON_JAR_PROPERTY, notAJar.toString()); + System.setProperty(DaemonLauncher.DAEMON_WORKDIR_PROPERTY, workDir.toString()); + try { + IllegalStateException failure = + assertThrows(IllegalStateException.class, launcher::start); + + assertTrue(failure.getMessage().contains("daemon.log"), + "the failure must name the captured log section, got: " + + failure.getMessage()); + assertTrue(failure.getMessage().toLowerCase(java.util.Locale.ROOT) + .contains("jar"), + "the failure must include the child's captured output (the JVM's " + + "corrupt-jarfile error), not just a bare exit code, got: " + + failure.getMessage()); + } finally { + restoreProperty(DaemonLauncher.DAEMON_JAR_PROPERTY, prevJar); + restoreProperty(DaemonLauncher.DAEMON_WORKDIR_PROPERTY, prevWorkDir); + } + } + + private static void restoreProperty(String key, String previous) { + if (previous == null) { + System.clearProperty(key); + } else { + System.setProperty(key, previous); + } + } + private static long readPid(SocketPaths paths) throws Exception { return Long.parseLong(Files.readString(paths.pidFile()).trim()); } From 2a875acc23818263c866b6e1834ff5439d1d6cad Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 09:08:16 +0000 Subject: [PATCH 17/24] perf(daemon): memoize the serialized rule catalog (output-identical) (#8 stopgap) --- .../daemon/RequestDispatcher.java | 18 ++++++-- .../sonarpredict/daemon/RuleCatalog.java | 30 +++++++++++++ .../daemon/RequestDispatcherTest.java | 44 +++++++++++++++++++ 3 files changed, 89 insertions(+), 3 deletions(-) diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/RequestDispatcher.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/RequestDispatcher.java index bffc1d3..12d986c 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/RequestDispatcher.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/RequestDispatcher.java @@ -88,12 +88,18 @@ private AnalyzeResponse analyze(WireMessage request) { * rule (and fails if the key is unknown); a {@code null} or blank payload * returns the entire catalog as a {@code List}, which is how * the CLI builds its {@code RuleMetadataIndex} in one round trip. + * + *

The full-catalog ({@code null}-key) branch returns the catalog's + * memoized JSON tree ({@link RuleCatalog#allAsJsonNode()}) rather than + * rebuilding and re-serializing the {@code List} on every + * request; the resulting wire bytes are identical (the catalog is immutable + * for the daemon's life). The single-key branch is unchanged. */ private Object ruleMetadata(WireMessage request) { JsonNode payload = request.payload(); if (payload == null || payload.isNull() || (payload.isTextual() && payload.asText().isBlank())) { - return ruleCatalog.all(); + return ruleCatalog.allAsJsonNode(); } if (!payload.isTextual()) { throw new IllegalArgumentException( @@ -133,8 +139,14 @@ private static T read(JsonNode payload, Class type) { } private static WireMessage response(WireMessage request, Object payload) { - return new WireMessage( - request.id(), request.method(), Json.mapper().valueToTree(payload)); + // A handler that already produced a serialized JSON tree (the memoized + // full rule catalog) is used as-is: re-running valueToTree on it would + // deep-copy the tree (TokenBuffer round-trip) on every request, undoing + // the cache. The bytes are identical either way; this just skips the copy. + JsonNode tree = (payload instanceof JsonNode node) + ? node + : Json.mapper().valueToTree(payload); + return new WireMessage(request.id(), request.method(), tree); } private static WireMessage error(WireMessage request, String message) { diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/RuleCatalog.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/RuleCatalog.java index d3aa145..e02423a 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/RuleCatalog.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/RuleCatalog.java @@ -71,8 +71,24 @@ public final class RuleCatalog { private final Map rulesByKey; + /** + * The full catalog ({@link #all()}) pre-serialized to a JSON tree once at + * construction. The catalog is immutable for the life of this instance + * (plugins load once), so {@code RULE_METADATA} full-catalog requests can + * reuse this node instead of re-sorting and re-serializing every rule's + * multi-KB HTML on each request. + * + *

Built eagerly so it is fully published by the time any thread can call + * {@link #allAsJsonNode()} — no lazy double-checked init, no data race. It is + * read-only after construction (no method mutates it); the only consumer is + * the protocol serializer, which never writes to it, so sharing the instance + * across socket threads is safe. + */ + private final JsonNode allAsJsonNode; + private RuleCatalog(Map rulesByKey) { this.rulesByKey = Map.copyOf(rulesByKey); + this.allAsJsonNode = Json.mapper().valueToTree(all()); } /** @@ -242,6 +258,20 @@ public java.util.List all() { .toList(); } + /** + * The full catalog ({@link #all()}) as a pre-serialized, memoized JSON tree. + * + *

Byte-equivalent to serializing {@link #all()} on demand, but built once + * at construction so repeated full-catalog {@code RULE_METADATA} requests do + * not re-serialize every rule. The returned node is shared and must be + * treated as read-only; callers hand it straight to the serializer. + * + * @return the memoized full-catalog JSON tree (an array of {@code RuleMetadata}) + */ + public JsonNode allAsJsonNode() { + return allAsJsonNode; + } + /** {@code true} if the catalog holds no rules. */ public boolean isEmpty() { return rulesByKey.isEmpty(); diff --git a/src/test/java/io/github/randomcodespace/sonarpredict/daemon/RequestDispatcherTest.java b/src/test/java/io/github/randomcodespace/sonarpredict/daemon/RequestDispatcherTest.java index 9b7019f..e7cfb44 100644 --- a/src/test/java/io/github/randomcodespace/sonarpredict/daemon/RequestDispatcherTest.java +++ b/src/test/java/io/github/randomcodespace/sonarpredict/daemon/RequestDispatcherTest.java @@ -1,8 +1,10 @@ package io.github.randomcodespace.sonarpredict.daemon; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.file.Path; @@ -10,6 +12,7 @@ import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.TextNode; import org.junit.jupiter.api.AfterAll; @@ -95,6 +98,47 @@ void ruleMetadata_returnsMetadata() { assertEquals("java", md.language()); } + @Test + @DisplayName("RULE_METADATA full catalog is byte-identical across repeated requests (cached)") + void ruleMetadata_fullCatalog_isByteIdenticalAcrossRequests() throws Exception { + RequestDispatcher dispatcher = dispatcher(new AtomicBoolean()); + + // Null payload => full-catalog branch. Issue it twice against one dispatcher. + WireMessage first = dispatcher.dispatch( + new WireMessage("rm-all-1", Method.RULE_METADATA, null)); + WireMessage second = dispatcher.dispatch( + new WireMessage("rm-all-2", Method.RULE_METADATA, null)); + + assertEquals(Method.RULE_METADATA, first.method()); + assertTrue(first.payload().isArray(), "full catalog must serialize as a JSON array"); + assertTrue(first.payload().size() > 1000, + "expected the full analyzer catalog (>1000 rules), got: " + first.payload().size()); + assertTrue(second.payload().isArray()); + + // Byte-identity is the load-bearing guarantee: serialize just the payload + // (ids differ by design) and compare the exact bytes the codec would emit. + byte[] firstBytes = Json.mapper().writeValueAsBytes(first.payload()); + byte[] secondBytes = Json.mapper().writeValueAsBytes(second.payload()); + assertArrayEquals(firstBytes, secondBytes, + "repeated full-catalog responses must be byte-identical"); + assertTrue(firstBytes.length > 0, "full-catalog payload must be non-empty"); + + // The catalog data must actually be present (not an empty array of stubs). + List rules = List.of( + Json.mapper().convertValue(first.payload(), RuleMetadata[].class)); + assertTrue(rules.stream().anyMatch(r -> "java:S1118".equals(r.ruleKey())), + "full catalog must contain java:S1118"); + + // The cache is actually used: the dispatcher returns the catalog's single + // memoized node, so both responses share its identity and equal a fresh + // build of the same catalog (proving the memoized copy == freshly-built). + JsonNode cached = service.ruleCatalog().allAsJsonNode(); + assertSame(cached, second.payload(), + "full-catalog response must return the memoized catalog node"); + assertEquals(Json.mapper().valueToTree(service.ruleCatalog().all()), cached, + "memoized catalog node must equal a freshly-serialized catalog"); + } + @Test @DisplayName("SHUTDOWN returns an ack and signals shutdown via the callback") void shutdown_acksAndSignals() { From 1eba0ae3358239429f81ec95c5d137b509303e43 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 09:20:20 +0000 Subject: [PATCH 18/24] refactor(daemon): consolidate repo->language mappings into one LanguageMap (#12) --- .../sonarpredict/daemon/AnalysisService.java | 14 +- .../sonarpredict/daemon/LanguageDetector.java | 18 +- .../sonarpredict/daemon/LanguageMap.java | 204 ++++++++++++++++++ .../sonarpredict/daemon/RuleCatalog.java | 23 +- .../sonarpredict/daemon/SonarWayProfiles.java | 46 +--- .../sonarpredict/daemon/LanguageMapTest.java | 201 +++++++++++++++++ 6 files changed, 411 insertions(+), 95 deletions(-) create mode 100644 src/main/java/io/github/randomcodespace/sonarpredict/daemon/LanguageMap.java create mode 100644 src/test/java/io/github/randomcodespace/sonarpredict/daemon/LanguageMapTest.java diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/AnalysisService.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/AnalysisService.java index c8db8ba..5c03c9c 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/AnalysisService.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/AnalysisService.java @@ -599,18 +599,6 @@ static List sensorFailureWarnings(List engineLogMessage return warnings; } - /** - * Repository-key → SonarLanguage-key fallbacks, for profile rules whose - * key is not in the rule catalog. The catalog is consulted first; this map - * only covers the SonarSource analyzer repos whose name differs from the - * language key. - */ - private static final Map REPO_TO_LANGUAGE_KEY = Map.of( - "python", SonarLanguage.PYTHON.getSonarLanguageKey(), - "javascript", SonarLanguage.JS.getSonarLanguageKey(), - "typescript", SonarLanguage.TS.getSonarLanguageKey(), - "Web", SonarLanguage.HTML.getSonarLanguageKey()); - /** * Builds the active rules from an imported SonarQube quality profile. * Each profile rule becomes an {@link ActiveRule} carrying the analyzer's @@ -657,7 +645,7 @@ private String languageKeyFor(String ruleKey) { return null; } String repo = ruleKey.substring(0, colon); - return REPO_TO_LANGUAGE_KEY.getOrDefault(repo, repo); + return LanguageMap.protocolLanguageKey(repo); } private static Path createWorkDir() { diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/LanguageDetector.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/LanguageDetector.java index a0531b9..41d90b9 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/LanguageDetector.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/LanguageDetector.java @@ -17,22 +17,6 @@ */ public final class LanguageDetector { - /** v1 language set, in priority order — first registration wins on a shared suffix. */ - private static final SonarLanguage[] SUPPORTED = { - SonarLanguage.JAVA, - SonarLanguage.PYTHON, - SonarLanguage.JS, - SonarLanguage.TS, - SonarLanguage.PHP, - SonarLanguage.KOTLIN, - SonarLanguage.GO, - SonarLanguage.RUBY, - SonarLanguage.SCALA, - SonarLanguage.HTML, - SonarLanguage.XML, - SonarLanguage.CSS, - }; - /** Lower-cased extension (no leading dot) -> language. */ private static final Map BY_EXTENSION = buildExtensionMap(); @@ -41,7 +25,7 @@ private LanguageDetector() { private static Map buildExtensionMap() { Map map = new LinkedHashMap<>(); - for (SonarLanguage language : SUPPORTED) { + for (SonarLanguage language : LanguageMap.detectionOrder()) { for (String suffix : language.getDefaultFileSuffixes()) { String ext = normalizeExtension(suffix); if (!ext.isEmpty()) { diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/LanguageMap.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/LanguageMap.java new file mode 100644 index 0000000..f764cfb --- /dev/null +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/LanguageMap.java @@ -0,0 +1,204 @@ +package io.github.randomcodespace.sonarpredict.daemon; + +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.sonarsource.sonarlint.core.commons.api.SonarLanguage; + +/** + * The single, authoritative table of analyzer rule-repository facts. + * + *

Why this exists. The same repo-dir → language relationships used to + * be encoded in four separate places that had to be edited in lockstep: + *

    + *
  • {@code RuleCatalog} — the indexed analyzer repo set and the + * repo → {@code RuleMetadata.language} (protocol key) override map;
  • + *
  • {@code SonarWayProfiles} — the repo → served {@link SonarLanguage}s, + * the {@code Web} split-out, and the TypeScript engine-repo override;
  • + *
  • {@code AnalysisService} — the profile-rule repo → protocol-key fallback;
  • + *
  • {@code LanguageDetector} — the ordered v1 language set.
  • + *
+ * Consolidating them here removes the drift hazard: each call site now reads the + * exact view it needs from this one table. This is a pure structural dedup; every + * accessor reproduces its former call site's lookup byte-for-byte. + * + *

The {@code } segment is the rule-repository directory name in a plugin + * JAR resource path ({@code org/sonar/l10n//rules//...}); it is also + * the rule-key prefix the engine emits. An {@link Entry#protocolLanguageKey} is the + * {@code RuleMetadata.language} / SonarLanguage-key the protocol surfaces. An + * entry's {@link Entry#servedLanguages} are the {@link SonarLanguage}s whose files + * the engine analyzes with that repository's rules, each mapped to the engine rule + * repository its rules activate under (equal to the repo-dir except TypeScript, + * whose rules read from the {@code javascript} resource but activate under + * {@code typescript}). + */ +final class LanguageMap { + + /** + * One analyzer rule repository. + * + * @param repo the repo-dir / rule-key prefix (e.g. {@code "javascript"}, {@code "Web"}) + * @param protocolLanguageKey the protocol/{@code RuleMetadata.language} key (e.g. {@code "js"}, {@code "web"}) + * @param servedLanguages served {@link SonarLanguage} → the engine rule repository its + * rules activate under (the engine-repo, == {@code repo} unless overridden) + */ + record Entry(String repo, String protocolLanguageKey, Map servedLanguages) { + Entry { + servedLanguages = Map.copyOf(servedLanguages); + } + } + + /** + * The consolidated repo table. Iteration order is irrelevant to every current + * consumer (the four former maps were unordered {@code Map.of}/{@code Set.of}); + * it is listed here repo-by-repo for readability. + */ + private static final List ENTRIES = List.of( + new Entry("java", "java", Map.of(SonarLanguage.JAVA, "java")), + new Entry("python", "py", Map.of(SonarLanguage.PYTHON, "python")), + // The javascript analyzer drives both .js and .ts files; TS rules read + // from this resource but activate under the `typescript` engine repo. + new Entry("javascript", "js", + Map.of(SonarLanguage.JS, "javascript", SonarLanguage.TS, "typescript")), + // typescript is an indexed analyzer repo with its own protocol key, but + // ships no Sonar_way_profile.json resource: it serves no profile language. + new Entry("typescript", "ts", Map.of()), + new Entry("css", "css", Map.of(SonarLanguage.CSS, "css")), + new Entry("php", "php", Map.of(SonarLanguage.PHP, "php")), + new Entry("kotlin", "kotlin", Map.of(SonarLanguage.KOTLIN, "kotlin")), + new Entry("go", "go", Map.of(SonarLanguage.GO, "go")), + new Entry("ruby", "ruby", Map.of(SonarLanguage.RUBY, "ruby")), + new Entry("scala", "scala", Map.of(SonarLanguage.SCALA, "scala")), + // The HTML analyzer's repo is `Web` (capitalized), protocol key `web`. + new Entry("Web", "web", Map.of(SonarLanguage.HTML, "Web")), + new Entry("xml", "xml", Map.of(SonarLanguage.XML, "xml"))); + + private static final Map BY_REPO = indexByRepo(); + + /** + * The v1 language set in priority order — first registration wins on a + * shared file suffix in {@code LanguageDetector}. This ordering is its own fact + * (it is not derivable from the per-repo served sets), so it is listed + * explicitly; it is the single authoritative copy nonetheless. + */ + private static final List DETECTION_ORDER = List.of( + SonarLanguage.JAVA, + SonarLanguage.PYTHON, + SonarLanguage.JS, + SonarLanguage.TS, + SonarLanguage.PHP, + SonarLanguage.KOTLIN, + SonarLanguage.GO, + SonarLanguage.RUBY, + SonarLanguage.SCALA, + SonarLanguage.HTML, + SonarLanguage.XML, + SonarLanguage.CSS); + + private static final Set ANALYZER_REPOS = indexRepoNames(); + + private LanguageMap() { + } + + private static Map indexByRepo() { + Map map = new LinkedHashMap<>(); + for (Entry e : ENTRIES) { + map.put(e.repo(), e); + } + return Map.copyOf(map); + } + + private static Set indexRepoNames() { + Set repos = new LinkedHashSet<>(); + for (Entry e : ENTRIES) { + repos.add(e.repo()); + } + return Set.copyOf(repos); + } + + /** + * The SonarSource analyzer rule-repository directory names (the {@code } + * segments that are indexed; also the rule-key prefixes the engine emits). + * + *

Replaces {@code RuleCatalog.ANALYZER_REPOS}. + * + * @return the indexed analyzer repo-dir names + */ + static Set analyzerRepos() { + return ANALYZER_REPOS; + } + + /** + * The protocol / {@code RuleMetadata.language} key for {@code repo}, falling + * back to {@code repo} itself when the repo is not in the table. + * + *

Reproduces both {@code RuleCatalog.REPO_TO_LANGUAGE.getOrDefault(repo, repo)} + * and {@code AnalysisService.REPO_TO_LANGUAGE_KEY.getOrDefault(repo, repo)} — + * the two were byte-identical (the four explicit overrides + * python→py, javascript→js, typescript→ts, Web→web; every + * other repo → itself). + * + * @param repo the rule-repository directory / rule-key prefix + * @return the protocol language key + */ + static String protocolLanguageKey(String repo) { + Entry e = BY_REPO.get(repo); + return e != null ? e.protocolLanguageKey() : repo; + } + + /** + * The {@link SonarLanguage}s whose files the engine analyzes with {@code repo}'s + * rules, or an empty set if {@code repo} ships no Sonar way profile. + * + *

Reproduces {@code SonarWayProfiles.languagesFor(repo)} — the union of + * the former {@code REPO_TO_LANGUAGES} and the {@code WEB_REPO} split-out, with + * the {@code typescript} repo (and any unknown repo) resolving to {@code {}}. + * + * @param repo the rule-repository directory + * @return the served languages, or an empty set + */ + static Set servedLanguages(String repo) { + Entry e = BY_REPO.get(repo); + return e != null ? e.servedLanguages().keySet() : Set.of(); + } + + /** + * The engine rule-key repository prefix for {@code language}'s rules read from + * {@code repo}'s resource, falling back to {@code repo} when the language has no + * override. + * + *

Reproduces {@code ENGINE_REPO_OVERRIDE.getOrDefault(language, repo)} in + * {@code SonarWayProfiles}: only {@link SonarLanguage#TS} overrides + * ({@code javascript} resource → {@code typescript} engine repo); every + * other language's engine repo equals the resource directory it came from. + * + * @param language the served analyzer language + * @param repo the resource directory the rules were read from + * @return the engine rule-key repository prefix + */ + static String engineRepo(SonarLanguage language, String repo) { + Entry e = BY_REPO.get(repo); + if (e != null) { + String engineRepo = e.servedLanguages().get(language); + if (engineRepo != null) { + return engineRepo; + } + } + return repo; + } + + /** + * The v1 language set in detection priority order — first registration + * wins on a shared file suffix. + * + *

Replaces {@code LanguageDetector.SUPPORTED}. + * + * @return the ordered supported languages + */ + static List detectionOrder() { + return DETECTION_ORDER; + } +} diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/RuleCatalog.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/RuleCatalog.java index e02423a..b4c9878 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/RuleCatalog.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/RuleCatalog.java @@ -9,7 +9,6 @@ import java.util.HashMap; import java.util.Locale; import java.util.Map; -import java.util.Set; import java.util.jar.JarEntry; import java.util.jar.JarFile; import java.util.regex.Matcher; @@ -44,24 +43,6 @@ */ public final class RuleCatalog { - /** - * SonarSource analyzer rule-repository directory names (the {@code } - * segment, which is also the rule-key prefix the engine emits). - */ - private static final Set ANALYZER_REPOS = Set.of( - "java", "python", "javascript", "typescript", "css", - "php", "kotlin", "go", "ruby", "scala", "Web", "xml"); - - /** - * Maps a rule-repository directory to the {@code RuleMetadata.language} - * value (the SonarLanguage key). Repos absent here use the repo name. - */ - private static final Map REPO_TO_LANGUAGE = Map.of( - "python", "py", - "javascript", "js", - "typescript", "ts", - "Web", "web"); - /** Matches {@code org/sonar/l10n//rules//.json}. */ private static final Pattern RULE_JSON = Pattern.compile( "^org/sonar/l10n/[^/]+/rules/([^/]+)/([^/]+)\\.json$"); @@ -140,7 +121,7 @@ private static void indexJar(Path jarPath, Map rules) { } String repo = m.group(1); String sqKey = m.group(2); - if (!ANALYZER_REPOS.contains(repo)) { + if (!LanguageMap.analyzerRepos().contains(repo)) { continue; } RuleMetadata md = readRule(jar, entry, repo, sqKey); @@ -189,7 +170,7 @@ private static RuleMetadata readRule( } String ruleKey = repo + ":" + sqKey; String name = text(json, "title", sqKey); - String language = REPO_TO_LANGUAGE.getOrDefault(repo, repo); + String language = LanguageMap.protocolLanguageKey(repo); String severity = normalizeSeverity(text(json, "defaultSeverity", null)); String type = normalizeType(text(json, "type", null)); String descriptionHtml = readHtml(jar, jsonEntry.getName()); diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/SonarWayProfiles.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/SonarWayProfiles.java index a872781..90f8b19 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/SonarWayProfiles.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/SonarWayProfiles.java @@ -55,40 +55,6 @@ public final class SonarWayProfiles { private static final Pattern PROFILE_JSON = Pattern.compile( "^org/sonar/l10n/[^/]+/rules/([^/]+)/Sonar_way_profile\\.json$"); - /** - * Rule-repository directory → the {@link SonarLanguage}s whose files the - * engine analyzes with that repository's rules. {@code javascript} serves - * both JS and TS; every other repo serves a single language. - */ - private static final Map> REPO_TO_LANGUAGES = Map.of( - "java", Set.of(SonarLanguage.JAVA), - "python", Set.of(SonarLanguage.PYTHON), - "javascript", Set.of(SonarLanguage.JS, SonarLanguage.TS), - "css", Set.of(SonarLanguage.CSS), - "php", Set.of(SonarLanguage.PHP), - "kotlin", Set.of(SonarLanguage.KOTLIN), - "go", Set.of(SonarLanguage.GO), - "ruby", Set.of(SonarLanguage.RUBY), - "scala", Set.of(SonarLanguage.SCALA), - "xml", Set.of(SonarLanguage.XML)); - - /** {@code Web} is the HTML analyzer's repo; kept separate so the map above stays ≤10 entries. */ - private static final Map> WEB_REPO = - Map.of("Web", Set.of(SonarLanguage.HTML)); - - /** - * Engine rule-key repository prefix for a language, when it differs from - * the profile-resource directory the rules were read from. - * - *

Only {@link SonarLanguage#TS} needs this: its rules live in the - * {@code javascript} profile resource, but the JavaScript analyzer's - * TypeScript sensor activates them from the {@code typescript} rule - * repository. Every other language's engine repository equals its - * resource directory, so it is absent here. - */ - private static final Map ENGINE_REPO_OVERRIDE = - Map.of(SonarLanguage.TS, "typescript"); - private final Map> ruleKeysByLanguage; private SonarWayProfiles(Map> ruleKeysByLanguage) { @@ -143,7 +109,7 @@ private static void indexJar(Path jarPath, Map> byLa continue; } String repo = m.group(1); - Set languages = languagesFor(repo); + Set languages = LanguageMap.servedLanguages(repo); if (languages.isEmpty()) { continue; } @@ -152,7 +118,7 @@ private static void indexJar(Path jarPath, Map> byLa // The engine rule-key prefix is the resource-directory repo, // unless the language overrides it (TypeScript: rules read // from the `javascript` resource, activated under `typescript`). - String engineRepo = ENGINE_REPO_OVERRIDE.getOrDefault(language, repo); + String engineRepo = LanguageMap.engineRepo(language, repo); List ruleKeys = prefix(bareKeys, engineRepo); byLanguage.merge(language, ruleKeys, SonarWayProfiles::mergeDistinct); } @@ -162,14 +128,6 @@ private static void indexJar(Path jarPath, Map> byLa } } - private static Set languagesFor(String repo) { - Set languages = REPO_TO_LANGUAGES.get(repo); - if (languages != null) { - return languages; - } - return WEB_REPO.getOrDefault(repo, Set.of()); - } - private static List mergeDistinct(List existing, List added) { List merged = new ArrayList<>(existing); for (String key : added) { diff --git a/src/test/java/io/github/randomcodespace/sonarpredict/daemon/LanguageMapTest.java b/src/test/java/io/github/randomcodespace/sonarpredict/daemon/LanguageMapTest.java new file mode 100644 index 0000000..c3411a7 --- /dev/null +++ b/src/test/java/io/github/randomcodespace/sonarpredict/daemon/LanguageMapTest.java @@ -0,0 +1,201 @@ +package io.github.randomcodespace.sonarpredict.daemon; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.List; +import java.util.Set; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.sonarsource.sonarlint.core.commons.api.SonarLanguage; + +/** + * Equivalence oracle for the structural-dedup refactor (#12). + * + *

Before consolidation the same repo-dir -> language facts were encoded in four + * places ({@code RuleCatalog.ANALYZER_REPOS}/{@code REPO_TO_LANGUAGE}, + * {@code SonarWayProfiles.REPO_TO_LANGUAGES}/{@code WEB_REPO}/{@code ENGINE_REPO_OVERRIDE}, + * {@code AnalysisService.REPO_TO_LANGUAGE_KEY}, {@code LanguageDetector.SUPPORTED}). + * This test pins every one of those mappings as a literal expectation derived from + * the pre-refactor sources, and asserts {@link LanguageMap} reproduces each one + * byte-for-byte. It guards the dedup even where behaviour tests do not reach. + */ +class LanguageMapTest { + + // --- RuleCatalog.ANALYZER_REPOS (the indexed analyzer rule-repo dir names) ----------- + + @Test + @DisplayName("analyzerRepos reproduces RuleCatalog.ANALYZER_REPOS exactly") + void analyzerRepos_matchesRuleCatalogSet() { + assertEquals( + Set.of("java", "python", "javascript", "typescript", "css", + "php", "kotlin", "go", "ruby", "scala", "Web", "xml"), + LanguageMap.analyzerRepos()); + } + + // --- RuleCatalog.REPO_TO_LANGUAGE (protocol language key, getOrDefault(repo, repo)) --- + // Pinned as the exact getOrDefault semantics: explicit override for four repos, + // otherwise the repo name itself. + + @Test + @DisplayName("protocolLanguageKey reproduces RuleCatalog.REPO_TO_LANGUAGE getOrDefault") + void protocolLanguageKey_matchesRuleCatalogMap() { + // explicit overrides + assertEquals("py", LanguageMap.protocolLanguageKey("python")); + assertEquals("js", LanguageMap.protocolLanguageKey("javascript")); + assertEquals("ts", LanguageMap.protocolLanguageKey("typescript")); + assertEquals("web", LanguageMap.protocolLanguageKey("Web")); + // getOrDefault(repo, repo) for every other analyzer repo + assertEquals("java", LanguageMap.protocolLanguageKey("java")); + assertEquals("css", LanguageMap.protocolLanguageKey("css")); + assertEquals("php", LanguageMap.protocolLanguageKey("php")); + assertEquals("kotlin", LanguageMap.protocolLanguageKey("kotlin")); + assertEquals("go", LanguageMap.protocolLanguageKey("go")); + assertEquals("ruby", LanguageMap.protocolLanguageKey("ruby")); + assertEquals("scala", LanguageMap.protocolLanguageKey("scala")); + assertEquals("xml", LanguageMap.protocolLanguageKey("xml")); + // an unknown repo falls through to the repo name itself + assertEquals("pmd", LanguageMap.protocolLanguageKey("pmd")); + assertEquals("eslint", LanguageMap.protocolLanguageKey("eslint")); + } + + // --- AnalysisService.REPO_TO_LANGUAGE_KEY (the profile-rule fallback) ----------------- + // Pre-refactor this was a SEPARATE map with the SAME four entries, values derived + // from SonarLanguage.getSonarLanguageKey(). Pin that it is byte-identical to the + // RuleCatalog protocol key AND equal to the enum-derived key. + + @Test + @DisplayName("protocolLanguageKey reproduces AnalysisService.REPO_TO_LANGUAGE_KEY exactly") + void protocolLanguageKey_matchesAnalysisServiceFallback() { + assertEquals(SonarLanguage.PYTHON.getSonarLanguageKey(), + LanguageMap.protocolLanguageKey("python")); + assertEquals(SonarLanguage.JS.getSonarLanguageKey(), + LanguageMap.protocolLanguageKey("javascript")); + assertEquals(SonarLanguage.TS.getSonarLanguageKey(), + LanguageMap.protocolLanguageKey("typescript")); + assertEquals(SonarLanguage.HTML.getSonarLanguageKey(), + LanguageMap.protocolLanguageKey("Web")); + // getOrDefault(repo, repo) for an arbitrary repo (the profile-rule path can + // hand any repository prefix; it must fall through to the prefix itself). + assertEquals("somerepo", LanguageMap.protocolLanguageKey("somerepo")); + } + + // --- SonarWayProfiles.REPO_TO_LANGUAGES + WEB_REPO (languagesFor) --------------------- + + @Test + @DisplayName("servedLanguages reproduces SonarWayProfiles.languagesFor exactly") + void servedLanguages_matchesSonarWayProfiles() { + assertEquals(Set.of(SonarLanguage.JAVA), LanguageMap.servedLanguages("java")); + assertEquals(Set.of(SonarLanguage.PYTHON), LanguageMap.servedLanguages("python")); + assertEquals(Set.of(SonarLanguage.JS, SonarLanguage.TS), + LanguageMap.servedLanguages("javascript")); + assertEquals(Set.of(SonarLanguage.CSS), LanguageMap.servedLanguages("css")); + assertEquals(Set.of(SonarLanguage.PHP), LanguageMap.servedLanguages("php")); + assertEquals(Set.of(SonarLanguage.KOTLIN), LanguageMap.servedLanguages("kotlin")); + assertEquals(Set.of(SonarLanguage.GO), LanguageMap.servedLanguages("go")); + assertEquals(Set.of(SonarLanguage.RUBY), LanguageMap.servedLanguages("ruby")); + assertEquals(Set.of(SonarLanguage.SCALA), LanguageMap.servedLanguages("scala")); + assertEquals(Set.of(SonarLanguage.XML), LanguageMap.servedLanguages("xml")); + // WEB_REPO split-out + assertEquals(Set.of(SonarLanguage.HTML), LanguageMap.servedLanguages("Web")); + } + + @Test + @DisplayName("typescript repo serves no profile languages (TS is served via javascript)") + void servedLanguages_typescriptRepoIsEmpty() { + // typescript is an ANALYZER_REPO and has a protocol key, but it ships no + // Sonar_way_profile.json resource of its own: languagesFor("typescript") == {}. + assertEquals(Set.of(), LanguageMap.servedLanguages("typescript")); + } + + @Test + @DisplayName("an unknown repo serves no profile languages") + void servedLanguages_unknownRepoIsEmpty() { + assertEquals(Set.of(), LanguageMap.servedLanguages("pmd")); + assertEquals(Set.of(), LanguageMap.servedLanguages("nonexistent")); + } + + // --- SonarWayProfiles.ENGINE_REPO_OVERRIDE (engineRepo getOrDefault(language, repo)) -- + + @Test + @DisplayName("engineRepo reproduces ENGINE_REPO_OVERRIDE.getOrDefault(language, repo)") + void engineRepo_matchesSonarWayProfilesOverride() { + // The only override: TS rules read from the javascript resource activate + // under the typescript engine repo. + assertEquals("typescript", LanguageMap.engineRepo(SonarLanguage.TS, "javascript")); + // JS (served from the same javascript resource) keeps the resource repo. + assertEquals("javascript", LanguageMap.engineRepo(SonarLanguage.JS, "javascript")); + // Every other language: engine repo == the resource directory it came from. + assertEquals("java", LanguageMap.engineRepo(SonarLanguage.JAVA, "java")); + assertEquals("python", LanguageMap.engineRepo(SonarLanguage.PYTHON, "python")); + assertEquals("css", LanguageMap.engineRepo(SonarLanguage.CSS, "css")); + assertEquals("Web", LanguageMap.engineRepo(SonarLanguage.HTML, "Web")); + assertEquals("xml", LanguageMap.engineRepo(SonarLanguage.XML, "xml")); + // getOrDefault falls back to the supplied repo for a non-overridden language + assertEquals("anyrepo", LanguageMap.engineRepo(SonarLanguage.GO, "anyrepo")); + } + + // --- LanguageDetector.SUPPORTED (ordered priority list) ------------------------------ + + @Test + @DisplayName("detectionOrder reproduces LanguageDetector.SUPPORTED exactly (order matters)") + void detectionOrder_matchesLanguageDetectorArray() { + assertEquals( + List.of( + SonarLanguage.JAVA, + SonarLanguage.PYTHON, + SonarLanguage.JS, + SonarLanguage.TS, + SonarLanguage.PHP, + SonarLanguage.KOTLIN, + SonarLanguage.GO, + SonarLanguage.RUBY, + SonarLanguage.SCALA, + SonarLanguage.HTML, + SonarLanguage.XML, + SonarLanguage.CSS), + LanguageMap.detectionOrder()); + } + + // --- Cross-source consistency: the dedup itself --------------------------------------- + + @Test + @DisplayName("every served language's protocol key equals its repo's protocol key") + void servedLanguageProtocolKeysAreConsistent() { + // For each analyzer repo that serves languages, every served language's own + // getSonarLanguageKey() must agree with the repo's protocol key for the + // single-language repos (the only repos AnalysisService/RuleCatalog key on). + // javascript serves two languages (js, ts) so is asserted separately above. + assertEquals(SonarLanguage.JAVA.getSonarLanguageKey(), + LanguageMap.protocolLanguageKey("java")); + assertEquals(SonarLanguage.PYTHON.getSonarLanguageKey(), + LanguageMap.protocolLanguageKey("python")); + assertEquals(SonarLanguage.CSS.getSonarLanguageKey(), + LanguageMap.protocolLanguageKey("css")); + assertEquals(SonarLanguage.PHP.getSonarLanguageKey(), + LanguageMap.protocolLanguageKey("php")); + assertEquals(SonarLanguage.KOTLIN.getSonarLanguageKey(), + LanguageMap.protocolLanguageKey("kotlin")); + assertEquals(SonarLanguage.GO.getSonarLanguageKey(), + LanguageMap.protocolLanguageKey("go")); + assertEquals(SonarLanguage.RUBY.getSonarLanguageKey(), + LanguageMap.protocolLanguageKey("ruby")); + assertEquals(SonarLanguage.SCALA.getSonarLanguageKey(), + LanguageMap.protocolLanguageKey("scala")); + assertEquals(SonarLanguage.XML.getSonarLanguageKey(), + LanguageMap.protocolLanguageKey("xml")); + assertEquals(SonarLanguage.HTML.getSonarLanguageKey(), + LanguageMap.protocolLanguageKey("Web")); + } + + @Test + @DisplayName("analyzerRepos is exactly the set of repo-dirs in the table") + void analyzerReposCoversTableKeys() { + // The indexed-repo set and the table's repo-dir keys must be the same set: + // this is the invariant the dedup enforces (one table, no drift). + assertTrue(LanguageMap.analyzerRepos().contains("typescript"), + "typescript is an analyzer repo even though it serves no profile"); + assertEquals(12, LanguageMap.analyzerRepos().size()); + } +} From d58d8b0bde06cc8495d4316eb49e140e17eafc1e Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 09:30:37 +0000 Subject: [PATCH 19/24] refactor: drop dead daemon/target probe, fix SARIF informationUri, extract PluginsDir.forEachJar (#20, #23) --- .../sonarpredict/cli/DaemonLauncher.java | 20 ++++------ .../sonarpredict/cli/SarifReporter.java | 2 +- .../sonarpredict/daemon/PluginsDir.java | 40 +++++++++++++++++++ .../sonarpredict/daemon/RuleCatalog.java | 19 +-------- .../sonarpredict/daemon/SonarWayProfiles.java | 19 +-------- .../sonarpredict/cli/SarifReporterTest.java | 15 +++++++ .../sonarpredict/daemon/PluginsDirTest.java | 36 +++++++++++++++++ 7 files changed, 102 insertions(+), 49 deletions(-) diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncher.java b/src/main/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncher.java index cc2db37..58286e5 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncher.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/cli/DaemonLauncher.java @@ -24,7 +24,7 @@ * {@code sonar.daemon.jar} system property; that is the supported override and * the mechanism Plan 7's {@code setup} command will use to point at an * installed runtime. With no property set it falls back to a dev default — the - * newest {@code daemon/target/sonar-predictor-daemon-*.jar} (the shaded jar, + * newest {@code target/sonar-predictor-daemon-*.jar} (the shaded jar, * never the {@code original-} prefixed unshaded one), located relative to the * working directory or its ancestors. * @@ -117,7 +117,7 @@ public DaemonLauncher(SocketPaths paths, Duration startupTimeout) { /** * Resolves the daemon fat jar: the {@code sonar.daemon.jar} property if set, - * otherwise the dev-default shaded jar under {@code daemon/target/}. + * otherwise the dev-default shaded jar under {@code target/}. * * @return the resolved jar path * @throws IllegalStateException if no jar can be located @@ -398,16 +398,12 @@ private static Path findDevDefaultJar() { Path dir = Path.of("").toAbsolutePath(); while (dir != null) { // Single-module layout: target/sonar-predictor-daemon-*.jar at the - // project root. Legacy multi-module: daemon/target/... — try both - // so a checkout of either generation still resolves. - for (Path candidate : new Path[] { - dir.resolve("target"), - dir.resolve("daemon").resolve("target") }) { - if (Files.isDirectory(candidate)) { - Path jar = newestDaemonJar(candidate); - if (jar != null) { - return jar; - } + // project root. + Path candidate = dir.resolve("target"); + if (Files.isDirectory(candidate)) { + Path jar = newestDaemonJar(candidate); + if (jar != null) { + return jar; } } dir = dir.getParent(); diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/cli/SarifReporter.java b/src/main/java/io/github/randomcodespace/sonarpredict/cli/SarifReporter.java index 695645c..76138dd 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/cli/SarifReporter.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/cli/SarifReporter.java @@ -47,7 +47,7 @@ public String render(AnalyzeResponse response, RuleMetadataIndex index, ObjectNode tool = run.putObject("tool"); ObjectNode driver = tool.putObject("driver"); driver.put("name", "sonar-predictor"); - driver.put("informationUri", "https://github.com/sonarcli/sonar-predictor"); + driver.put("informationUri", "https://github.com/RandomCodeSpace/sonar-predict"); driver.put("version", SonarVersionProvider.version()); // Distinct rule keys, in first-seen order, become driver.rules[]. diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/PluginsDir.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/PluginsDir.java index 740a73a..2698319 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/PluginsDir.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/PluginsDir.java @@ -1,8 +1,14 @@ package io.github.randomcodespace.sonarpredict.daemon; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; import java.nio.file.Path; +import java.util.List; import java.util.Map; +import java.util.function.Consumer; import java.util.function.Function; +import java.util.stream.Stream; /** * Resolves the directory the daemon loads its vendored analyzer plugin jars @@ -54,4 +60,38 @@ static Path resolve(Map env, Function property) } return DEFAULT; } + + /** + * Applies {@code action} to every {@code *.jar} in {@code pluginsDir}, in + * directory-iteration order. + * + *

This is the shared jar-scan loop used by the in-memory plugin readers + * ({@code RuleCatalog}, {@code SonarWayProfiles}): list the directory, + * keep entries whose file name ends in {@code .jar}, and hand each to the + * caller. The directory must hold at least one plugin jar — an empty scan + * is a configuration error, not a silently-empty result. + * + * @param pluginsDir directory holding the vendored analyzer plugin JARs + * @param action invoked once per discovered {@code *.jar} + * @throws IllegalStateException if the directory holds no {@code *.jar} + * @throws UncheckedIOException if the directory cannot be listed + */ + static void forEachJar(Path pluginsDir, Consumer action) { + List jars; + try (Stream entries = Files.list(pluginsDir)) { + jars = entries + .filter(p -> p.getFileName().toString().endsWith(".jar")) + .toList(); + } catch (IOException e) { + throw new UncheckedIOException( + "could not list plugins directory: " + pluginsDir.toAbsolutePath(), e); + } + if (jars.isEmpty()) { + throw new IllegalStateException( + "no analyzer plugin JARs in " + pluginsDir.toAbsolutePath()); + } + for (Path jar : jars) { + action.accept(jar); + } + } } diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/RuleCatalog.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/RuleCatalog.java index b4c9878..8104d81 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/RuleCatalog.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/RuleCatalog.java @@ -4,7 +4,6 @@ import java.io.InputStream; import java.io.UncheckedIOException; import java.nio.charset.StandardCharsets; -import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; import java.util.Locale; @@ -13,7 +12,6 @@ import java.util.jar.JarFile; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.stream.Stream; import com.fasterxml.jackson.databind.JsonNode; @@ -81,22 +79,7 @@ private RuleCatalog(Map rulesByKey) { */ public static RuleCatalog fromPluginsDir(Path pluginsDir) { Map rules = new HashMap<>(); - int jarCount = 0; - try (Stream entries = Files.list(pluginsDir)) { - for (Path jar : entries - .filter(p -> p.getFileName().toString().endsWith(".jar")) - .toList()) { - jarCount++; - indexJar(jar, rules); - } - } catch (IOException e) { - throw new UncheckedIOException( - "could not list plugins directory: " + pluginsDir.toAbsolutePath(), e); - } - if (jarCount == 0) { - throw new IllegalStateException( - "no analyzer plugin JARs in " + pluginsDir.toAbsolutePath()); - } + PluginsDir.forEachJar(pluginsDir, jar -> indexJar(jar, rules)); addTypeScriptAliases(rules); return new RuleCatalog(rules); } diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/SonarWayProfiles.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/SonarWayProfiles.java index 90f8b19..592a368 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/SonarWayProfiles.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/SonarWayProfiles.java @@ -3,7 +3,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.UncheckedIOException; -import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.EnumMap; @@ -14,7 +13,6 @@ import java.util.jar.JarFile; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.stream.Stream; import com.fasterxml.jackson.databind.JsonNode; @@ -71,22 +69,7 @@ private SonarWayProfiles(Map> ruleKeysByLanguage) { */ public static SonarWayProfiles load(Path pluginsDir) { Map> byLanguage = new EnumMap<>(SonarLanguage.class); - int jarCount = 0; - try (Stream entries = Files.list(pluginsDir)) { - for (Path jar : entries - .filter(p -> p.getFileName().toString().endsWith(".jar")) - .toList()) { - jarCount++; - indexJar(jar, byLanguage); - } - } catch (IOException e) { - throw new UncheckedIOException( - "could not list plugins directory: " + pluginsDir.toAbsolutePath(), e); - } - if (jarCount == 0) { - throw new IllegalStateException( - "no analyzer plugin JARs in " + pluginsDir.toAbsolutePath()); - } + PluginsDir.forEachJar(pluginsDir, jar -> indexJar(jar, byLanguage)); return new SonarWayProfiles(byLanguage); } diff --git a/src/test/java/io/github/randomcodespace/sonarpredict/cli/SarifReporterTest.java b/src/test/java/io/github/randomcodespace/sonarpredict/cli/SarifReporterTest.java index 045def6..b7a7516 100644 --- a/src/test/java/io/github/randomcodespace/sonarpredict/cli/SarifReporterTest.java +++ b/src/test/java/io/github/randomcodespace/sonarpredict/cli/SarifReporterTest.java @@ -1,6 +1,7 @@ package io.github.randomcodespace.sonarpredict.cli; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -70,6 +71,20 @@ void sarifHasOneRunWithDriver() throws Exception { "the driver version must not report the stale 0.1.0 literal"); } + @Test + @DisplayName("the driver informationUri points at the current project repository") + void sarifDriverInformationUriIsCurrent() throws Exception { + String sarif = new SarifReporter().render(WITH_ISSUES); + + JsonNode driver = Json.mapper().readTree(sarif) + .get("runs").get(0).get("tool").get("driver"); + assertEquals("https://github.com/RandomCodeSpace/sonar-predict", + driver.get("informationUri").asText(), + "the informationUri must point at the repackaged project's repository"); + assertFalse(driver.get("informationUri").asText().contains("sonarcli"), + "the informationUri must not reference the pre-repackage sonarcli org"); + } + @Test @DisplayName("each result carries ruleId, mapped level, message and a physical location") void sarifResultsAreWellFormed() throws Exception { diff --git a/src/test/java/io/github/randomcodespace/sonarpredict/daemon/PluginsDirTest.java b/src/test/java/io/github/randomcodespace/sonarpredict/daemon/PluginsDirTest.java index 404c479..4081ac2 100644 --- a/src/test/java/io/github/randomcodespace/sonarpredict/daemon/PluginsDirTest.java +++ b/src/test/java/io/github/randomcodespace/sonarpredict/daemon/PluginsDirTest.java @@ -1,12 +1,18 @@ package io.github.randomcodespace.sonarpredict.daemon; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; /** * Verifies that the daemon's analyzer-plugin directory is configurable. The @@ -52,4 +58,34 @@ void propertyWinsOverEnvironment() { assertEquals(Path.of("/opt/plugins"), resolved, "the system property must take precedence over the env var"); } + + @Test + @DisplayName("forEachJar visits every *.jar and skips non-jar entries") + void forEachJar_visitsOnlyJars(@TempDir Path dir) throws Exception { + Files.createFile(dir.resolve("a-plugin.jar")); + Files.createFile(dir.resolve("b-plugin.jar")); + Files.createFile(dir.resolve("notes.txt")); + Files.createDirectory(dir.resolve("sub.jar.d")); + + List visited = new ArrayList<>(); + PluginsDir.forEachJar(dir, jar -> visited.add(jar.getFileName().toString())); + + assertEquals(2, visited.size(), "only the two *.jar files must be visited"); + assertTrue(visited.contains("a-plugin.jar") && visited.contains("b-plugin.jar"), + "both plugin jars must be visited, got: " + visited); + } + + @Test + @DisplayName("forEachJar throws IllegalStateException naming the directory when no jars are present") + void forEachJar_emptyDir_throws(@TempDir Path dir) throws Exception { + Files.createFile(dir.resolve("readme.md")); + + IllegalStateException ex = assertThrows(IllegalStateException.class, + () -> PluginsDir.forEachJar(dir, jar -> { + throw new AssertionError("the action must not run when no jars exist"); + })); + assertTrue(ex.getMessage().contains("no analyzer plugin JARs in") + && ex.getMessage().contains(dir.toAbsolutePath().toString()), + "the empty-dir error must name the directory, got: " + ex.getMessage()); + } } From de659a5270d48f7423dfd3eb2e65018fa749f2ac Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 09:38:27 +0000 Subject: [PATCH 20/24] refactor(daemon): split analyzeLocked into focused steps; lift BaseDirGuard with its own tests (#23) --- .../sonarpredict/daemon/AnalysisService.java | 102 +++++++------ .../sonarpredict/daemon/BaseDirGuard.java | 69 +++++++++ .../sonarpredict/daemon/BaseDirGuardTest.java | 136 ++++++++++++++++++ 3 files changed, 255 insertions(+), 52 deletions(-) create mode 100644 src/main/java/io/github/randomcodespace/sonarpredict/daemon/BaseDirGuard.java create mode 100644 src/test/java/io/github/randomcodespace/sonarpredict/daemon/BaseDirGuardTest.java diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/AnalysisService.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/AnalysisService.java index 5c03c9c..9d8d45f 100644 --- a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/AnalysisService.java +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/AnalysisService.java @@ -223,10 +223,32 @@ private static boolean matchesAnyGlob(String relativePath, List globs) { private AnalyzeResponse analyzeLocked(AnalyzeRequest request) { Path baseDir = Path.of(request.baseDir()).toAbsolutePath().normalize(); - List inputFiles = new ArrayList<>(); List warnings = new ArrayList<>(); Set presentLanguages = new java.util.HashSet<>(); + List inputFiles = + collectInputFiles(request, baseDir, warnings, presentLanguages); + + AnalysisConfiguration analysisConfig = + buildAnalysisConfig(request, baseDir, inputFiles, presentLanguages, warnings); + + return runEngine(analysisConfig, baseDir, warnings); + } + + /** + * Detects, validates, and classifies each requested file, returning the + * engine input files. Recognized-but-unloaded languages, unsupported file + * types, and base-directory escapes are skipped, each appending a + * {@link AnalysisWarning} to {@code warnings}; every accepted file's + * language is added to {@code presentLanguages}. Both collectors are + * mutated in place in request-file order. + */ + private List collectInputFiles( + AnalyzeRequest request, + Path baseDir, + List warnings, + Set presentLanguages) { + List inputFiles = new ArrayList<>(); for (String relative : request.files()) { Optional language = LanguageDetector.detect(relative); if (language.isEmpty()) { @@ -246,7 +268,7 @@ private AnalyzeResponse analyzeLocked(AnalyzeRequest request) { continue; } Path file = baseDir.resolve(relative).toAbsolutePath().normalize(); - if (!isWithinBaseDir(relative, file, baseDir)) { + if (!BaseDirGuard.isWithinBaseDir(relative, file, baseDir)) { // A direct socket client could send an absolute path, a '..' // escape, or any name resolving outside baseDir to read files // the daemon was never asked to analyze. Reject and warn — @@ -261,13 +283,27 @@ private AnalyzeResponse analyzeLocked(AnalyzeRequest request) { inputFiles.add(new FileInputFile(file, baseDir, language.get(), isTest)); presentLanguages.add(language.get()); } + return inputFiles; + } + /** + * Resolves the active rules — from the request's imported profile when one + * is given, otherwise the per-language Sonar way defaults (which may append + * "no rule set" warnings to {@code warnings}) — and assembles the engine + * {@link AnalysisConfiguration} for {@code inputFiles}. + */ + private AnalysisConfiguration buildAnalysisConfig( + AnalyzeRequest request, + Path baseDir, + List inputFiles, + Set presentLanguages, + List warnings) { List activeRules = request.profileRef() != null ? profileRulesFrom(request.profileRef()) : resolveActiveRules( sonarWayProfiles, ruleParameterDefaults, presentLanguages, warnings); - AnalysisConfiguration analysisConfig = AnalysisConfiguration.builder() + return AnalysisConfiguration.builder() .setBaseDir(baseDir) .addInputFiles(inputFiles) .addActiveRules(activeRules) @@ -275,7 +311,18 @@ private AnalyzeResponse analyzeLocked(AnalyzeRequest request) { .putExtraProperty("sonar.java.binaries", baseDir.toString()) .putExtraProperty("sonar.java.libraries", "") .build(); + } + /** + * Posts the analysis to the warm engine scheduler, awaits the result, maps + * the raw issues to protocol DTOs, and appends the failed-file and + * swallowed-sensor-crash warnings to {@code warnings} before returning the + * response. Runs holding {@link #analysisLock}. + */ + private AnalyzeResponse runEngine( + AnalysisConfiguration analysisConfig, + Path baseDir, + List warnings) { List rawIssues = new ArrayList<>(); AnalyzeCommand command = new AnalyzeCommand( "daemon-module", @@ -333,55 +380,6 @@ private AnalyzeResponse analyzeLocked(AnalyzeRequest request) { return new AnalyzeResponse(List.copyOf(issues), List.copyOf(warnings)); } - /** - * Whether a request file resolves to a real path inside {@code baseDir}. - * - *

Three guards, all required: the supplied {@code relative} name must - * not be an absolute path, must contain no {@code ..} segment, and the - * file's real path — symlinks resolved — must lie under the real - * {@code baseDir}. The first two reject the obvious traversal payloads - * before any filesystem touch; the third is the backstop. - * - *

The containment check uses {@link Path#toRealPath()}, not a lexical - * {@link Path#startsWith(Path)} on {@code normalize()}-d paths. - * {@code normalize()} only collapses {@code .} and {@code ..} textually — - * it does not follow symbolic links. A symlink inside the project tree - * pointing outside {@code baseDir} therefore passes a lexical - * check yet reads an arbitrary target file; resolving real paths closes - * that escape. {@code toRealPath()} throwing — a non-existent file, a - * broken symlink, or a missing {@code baseDir} — is treated as "not - * contained": the file is rejected and the caller warns and skips it, - * rather than the request crashing. - * - * @param relative the raw file name from the request - * @param resolved the absolute, normalized path it resolved to - * @param baseDir the absolute, normalized analysis base directory - * @return {@code true} only if the file is safely contained in {@code baseDir} - */ - private static boolean isWithinBaseDir(String relative, Path resolved, Path baseDir) { - Path rawRelative = Path.of(relative); - if (rawRelative.isAbsolute()) { - return false; - } - for (Path segment : rawRelative) { - if ("..".equals(segment.toString())) { - return false; - } - } - try { - // Resolve symlinks on both sides: a symlinked component could - // otherwise point the file outside the tree while still passing - // a purely lexical startsWith() check. - Path realBase = baseDir.toRealPath(); - Path realResolved = resolved.toRealPath(); - return realResolved.startsWith(realBase); - } catch (IOException e) { - // A non-existent file, a broken symlink, or a missing baseDir: - // cannot prove containment, so reject — the caller skips and warns. - return false; - } - } - /** * The analyzer languages this service can actually analyze, as * protocol language keys — derived from the plugins that truly loaded, not diff --git a/src/main/java/io/github/randomcodespace/sonarpredict/daemon/BaseDirGuard.java b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/BaseDirGuard.java new file mode 100644 index 0000000..3cfc156 --- /dev/null +++ b/src/main/java/io/github/randomcodespace/sonarpredict/daemon/BaseDirGuard.java @@ -0,0 +1,69 @@ +package io.github.randomcodespace.sonarpredict.daemon; + +import java.io.IOException; +import java.nio.file.Path; + +/** + * Path-traversal defense for the analysis core: decides whether a request file + * resolves to a real path safely contained within the analysis base directory. + * + *

A direct socket client could send an absolute path, a {@code ..} escape, + * or a symlinked name resolving outside {@code baseDir} to read files the + * daemon was never asked to analyze. This guard is the single place that + * decision is made, so the accept/reject rule is testable in isolation. + */ +final class BaseDirGuard { + + private BaseDirGuard() { + // utility class + } + + /** + * Whether a request file resolves to a real path inside {@code baseDir}. + * + *

Three guards, all required: the supplied {@code relative} name must + * not be an absolute path, must contain no {@code ..} segment, and the + * file's real path — symlinks resolved — must lie under the real + * {@code baseDir}. The first two reject the obvious traversal payloads + * before any filesystem touch; the third is the backstop. + * + *

The containment check uses {@link Path#toRealPath()}, not a lexical + * {@link Path#startsWith(Path)} on {@code normalize()}-d paths. + * {@code normalize()} only collapses {@code .} and {@code ..} textually — + * it does not follow symbolic links. A symlink inside the project tree + * pointing outside {@code baseDir} therefore passes a lexical + * check yet reads an arbitrary target file; resolving real paths closes + * that escape. {@code toRealPath()} throwing — a non-existent file, a + * broken symlink, or a missing {@code baseDir} — is treated as "not + * contained": the file is rejected and the caller warns and skips it, + * rather than the request crashing. + * + * @param relative the raw file name from the request + * @param resolved the absolute, normalized path it resolved to + * @param baseDir the absolute, normalized analysis base directory + * @return {@code true} only if the file is safely contained in {@code baseDir} + */ + static boolean isWithinBaseDir(String relative, Path resolved, Path baseDir) { + Path rawRelative = Path.of(relative); + if (rawRelative.isAbsolute()) { + return false; + } + for (Path segment : rawRelative) { + if ("..".equals(segment.toString())) { + return false; + } + } + try { + // Resolve symlinks on both sides: a symlinked component could + // otherwise point the file outside the tree while still passing + // a purely lexical startsWith() check. + Path realBase = baseDir.toRealPath(); + Path realResolved = resolved.toRealPath(); + return realResolved.startsWith(realBase); + } catch (IOException e) { + // A non-existent file, a broken symlink, or a missing baseDir: + // cannot prove containment, so reject — the caller skips and warns. + return false; + } + } +} diff --git a/src/test/java/io/github/randomcodespace/sonarpredict/daemon/BaseDirGuardTest.java b/src/test/java/io/github/randomcodespace/sonarpredict/daemon/BaseDirGuardTest.java new file mode 100644 index 0000000..4fb0930 --- /dev/null +++ b/src/test/java/io/github/randomcodespace/sonarpredict/daemon/BaseDirGuardTest.java @@ -0,0 +1,136 @@ +package io.github.randomcodespace.sonarpredict.daemon; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.nio.file.Files; +import java.nio.file.Path; + +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +/** + * Focused unit tests for {@link BaseDirGuard}, the path-traversal defense + * lifted out of {@code AnalysisService}. The decisions asserted here mirror + * the accept/reject behavior the analysis core relied on before the lift. + */ +class BaseDirGuardTest { + + /** Resolves a request file the way {@code AnalysisService} does before guarding. */ + private static Path resolve(Path baseDir, String relative) { + return baseDir.resolve(relative).toAbsolutePath().normalize(); + } + + @Test + @DisplayName("a real file inside the base directory is accepted") + void inBaseDir_accepted(@TempDir Path tmp) throws Exception { + Path baseDir = Files.createDirectory(tmp.resolve("project")); + Files.writeString(baseDir.resolve("Main.java"), "class Main {}"); + + assertTrue(BaseDirGuard.isWithinBaseDir( + "Main.java", resolve(baseDir, "Main.java"), baseDir), + "a real file under baseDir must be accepted"); + } + + @Test + @DisplayName("a real file in a nested subdirectory of the base directory is accepted") + void nestedInBaseDir_accepted(@TempDir Path tmp) throws Exception { + Path baseDir = Files.createDirectory(tmp.resolve("project")); + Path nested = Files.createDirectories(baseDir.resolve("src/main")); + Files.writeString(nested.resolve("Main.java"), "class Main {}"); + + String relative = "src/main/Main.java"; + assertTrue(BaseDirGuard.isWithinBaseDir( + relative, resolve(baseDir, relative), baseDir), + "a real nested file under baseDir must be accepted"); + } + + @Test + @DisplayName("a '..' segment escaping the base directory is rejected") + void parentTraversal_rejected(@TempDir Path tmp) throws Exception { + Path baseDir = Files.createDirectory(tmp.resolve("project")); + // A secret the daemon must never read, a sibling of baseDir. + Files.writeString(tmp.resolve("secret.java"), "class Secret {}"); + + String relative = "../secret.java"; + assertFalse(BaseDirGuard.isWithinBaseDir( + relative, resolve(baseDir, relative), baseDir), + "a '..' path escaping baseDir must be rejected"); + } + + @Test + @DisplayName("an absolute request path is rejected") + void absolutePath_rejected(@TempDir Path tmp) throws Exception { + Path baseDir = Files.createDirectory(tmp.resolve("project")); + // An absolute path that genuinely exists, so only the absoluteness — + // not a failed toRealPath() — can be what rejects it. + Path outside = tmp.resolve("hostname"); + Files.writeString(outside, "host"); + + String absolute = outside.toAbsolutePath().toString(); + assertFalse(BaseDirGuard.isWithinBaseDir( + absolute, resolve(baseDir, absolute), baseDir), + "an absolute request path must be rejected"); + } + + @Test + @DisplayName("a non-existent file is rejected (real path cannot be proven)") + void nonExistentFile_rejected(@TempDir Path tmp) throws Exception { + Path baseDir = Files.createDirectory(tmp.resolve("project")); + + // ghost.java does not exist: toRealPath() throws, which the guard + // treats as "not contained" rather than letting the request crash. + String relative = "ghost.java"; + assertFalse(BaseDirGuard.isWithinBaseDir( + relative, resolve(baseDir, relative), baseDir), + "a non-existent file must be rejected, not crash"); + } + + @Test + @DisplayName("a symlink under the base directory pointing outside the tree is rejected") + void symlinkEscaping_rejected(@TempDir Path tmp) throws Exception { + Path baseDir = Files.createDirectory(tmp.resolve("project")); + Path outsideSecret = tmp.resolve("secret.java"); + Files.writeString(outsideSecret, "class Secret {}"); + + // A symlink whose name has no '..' and is not absolute, so it passes + // the lexical guards — but resolves to a file outside baseDir. + Path escapeLink = baseDir.resolve("escape.java"); + try { + Files.createSymbolicLink(escapeLink, outsideSecret); + } catch (java.io.IOException | UnsupportedOperationException e) { + Assumptions.assumeTrue(false, + "filesystem does not support symbolic links; skipping: " + e); + return; + } + + String relative = "escape.java"; + assertFalse(BaseDirGuard.isWithinBaseDir( + relative, resolve(baseDir, relative), baseDir), + "a symlink escaping baseDir must be rejected even though it is " + + "lexically clean"); + } + + @Test + @DisplayName("a symlink under the base directory resolving back inside the tree is accepted") + void symlinkInside_accepted(@TempDir Path tmp) throws Exception { + Path baseDir = Files.createDirectory(tmp.resolve("project")); + Files.writeString(baseDir.resolve("Main.java"), "class Main {}"); + + Path link = baseDir.resolve("alias.java"); + try { + Files.createSymbolicLink(link, baseDir.resolve("Main.java")); + } catch (java.io.IOException | UnsupportedOperationException e) { + Assumptions.assumeTrue(false, + "filesystem does not support symbolic links; skipping: " + e); + return; + } + + String relative = "alias.java"; + assertTrue(BaseDirGuard.isWithinBaseDir( + relative, resolve(baseDir, relative), baseDir), + "a symlink resolving to a real path inside baseDir must be accepted"); + } +} From 29e11330f5dac0c7638ea47eace993a4d07bc1be Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 09:59:08 +0000 Subject: [PATCH 21/24] ci(fix): warm offline-build repo with a full verify so Surefire's JUnit provider is cached (#13) --- .github/workflows/ci.yml | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9286e22..03a08cc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -193,9 +193,10 @@ jobs: # plugins/ analyzer copies over the network, then proves `mvn -o clean # verify` succeeds with NO network access. dependency:go-offline alone is # insufficient: the maven-dependency-plugin `copy` executions fetch the 10 - # analyzer jars (and the host jar is built locally), so a normal - # `-DskipTests package` is run first to populate ~/.m2 and plugins/ before - # the offline verify. + # analyzer jars (and the host jar is built locally), and Surefire resolves + # its JUnit-platform provider only when tests actually run — so a full + # `verify` (tests included) is run first to populate ~/.m2 and plugins/ + # before the offline verify. offline-build: runs-on: ubuntu-latest timeout-minutes: 30 @@ -215,15 +216,19 @@ jobs: with: node-version: '20' - - name: Warm local repo (resolve plugins, deps, analyzer copies) + - name: Warm local repo (resolve plugins, deps, analyzer copies, test providers) run: | set -euo pipefail # Resolve the plugin/dependency graph... mvn -B -ntp dependency:go-offline - # ...then a full package to populate ~/.m2 with the analyzer jars - # pulled by the dependency:copy executions and to build the local - # host/shade jars the assembly needs. - mvn -B -ntp -DskipTests package + # ...then a FULL verify (tests included) to populate ~/.m2 with the + # analyzer jars pulled by the dependency:copy executions, the local + # host/shade jars the assembly needs, AND Surefire's lazily-resolved + # JUnit-platform provider. dependency:go-offline does NOT fetch that + # provider and -DskipTests never triggers it, so without running the + # tests here the offline verify below fails resolving + # surefire-junit-platform. + mvn -B -ntp verify - name: Offline verify (no network) run: | From 84c554067de89f750094e146741052ade91cfe8e Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 10:05:22 +0000 Subject: [PATCH 22/24] ci(fix): CVE gate now feeds osv a recognized .cdx.json via -L and fails closed (was silently scanning nothing) (#4) --- .github/workflows/ci.yml | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 03a08cc..73e1596 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -95,23 +95,51 @@ jobs: mkdir -p "$OSV_SCANNER_LOCAL_DB_CACHE_DIRECTORY" # outputName uses ${project.version}; resolve the actual emitted file. SBOM=$(ls target/*-bom.json | head -n1) - echo "Scanning SBOM: $SBOM" + if [ -z "${SBOM:-}" ] || [ ! -f "$SBOM" ]; then + echo "::error::CycloneDX SBOM not found under target/*-bom.json — cannot run the CVE gate." + exit 1 + fi + # osv-scanner detects SBOM format from the FILENAME; '-bom.json' + # is not a recognized CycloneDX name (it silently parses nothing), so + # copy to a recognized '.cdx.json' name before scanning. + SBOM_CDX="$RUNNER_TEMP/bom.cdx.json" + cp "$SBOM" "$SBOM_CDX" + echo "Scanning SBOM: $SBOM (as $SBOM_CDX)" # --offline-vulnerabilities: never contacts the live feed during scan. # --download-offline-databases: refresh the cached DB if cache missed. - # osv-scanner exits 1 on ANY finding; we ignore its exit code and gate - # ourselves on CVSS severity from the JSON instead. + # -L: lockfile/SBOM input (replaces the deprecated --sbom). + set +e osv-scanner scan \ --offline-vulnerabilities \ --download-offline-databases \ --format json \ - --sbom "$SBOM" > osv-results.json || true + -L "$SBOM_CDX" > osv-results.json 2> osv-stderr.txt + OSV_EXIT=$? + set -e + cat osv-stderr.txt || true + # FAIL CLOSED: osv-scanner exits 0 (no vulns) or 1 (vulns found) on a + # real scan; any other exit code, a parse error, or a missing results + # object means NOTHING was scanned — the gate must fail, never pass on + # a non-scan (the bug this replaces let a non-scan pass green). + if [ "$OSV_EXIT" != "0" ] && [ "$OSV_EXIT" != "1" ]; then + echo "::error::osv-scanner did not complete a scan (exit $OSV_EXIT) — failing closed." + exit 1 + fi + if grep -qiE "Failed to parse|invalid SBOM" osv-stderr.txt; then + echo "::error::osv-scanner could not ingest the SBOM — failing closed." + exit 1 + fi + if ! jq -e 'has("results")' osv-results.json >/dev/null 2>&1; then + echo "::error::osv-scanner produced no results object — failing closed." + exit 1 + fi # Gate: max_severity is a CVSS base-score string per vulnerability # group. >= 7.0 == HIGH or CRITICAL (CVSS v3). Anything below is # non-blocking per security.md. An unscored group (null or "") is # coerced to 0 so a missing CVSS never crashes or trips the gate. SEV='((.max_severity // "0") | if . == "" then "0" else . end | tonumber)' HIGH=$(jq "[.results[]?.packages[]?.groups[]? | ${SEV} | select(. >= 7.0)] | length" osv-results.json) - echo "HIGH/CRITICAL findings (CVSS >= 7.0): ${HIGH}" + echo "HIGH/CRITICAL findings (CVSS >= 7.0): ${HIGH:-0}" jq -r ".results[]?.packages[]? | .package as \$p | .groups[]? | ${SEV} as \$s | select(\$s >= 7.0) | \" BLOCK \(\$p.name)@\(\$p.version) CVSS=\(.max_severity) \(.ids | join(\",\"))\"" osv-results.json || true if [ "${HIGH:-0}" -gt 0 ]; then echo "::error::${HIGH} HIGH/CRITICAL dependency vulnerabilit(ies) found — failing per security policy." From 62068979b4813d465e517e073a82d385693f735c Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 10:17:50 +0000 Subject: [PATCH 23/24] ci(fix): enforce HTTPS on osv-scanner download to clear SonarCloud hotspot S6506 (#4) --- .github/workflows/ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 73e1596..3d14dd3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -84,7 +84,9 @@ jobs: run: | set -euo pipefail mkdir -p "$RUNNER_TEMP/osv" - curl -fsSL -o "$RUNNER_TEMP/osv/osv-scanner" \ + # --proto '=https' --tlsv1.2: enforce HTTPS through redirects too, so a + # downgrade to http can never serve the binary (satisfies S6506). + curl --proto '=https' --tlsv1.2 -fsSL -o "$RUNNER_TEMP/osv/osv-scanner" \ "https://github.com/google/osv-scanner/releases/download/${OSV_SCANNER_VERSION}/osv-scanner_linux_amd64" chmod +x "$RUNNER_TEMP/osv/osv-scanner" echo "$RUNNER_TEMP/osv" >> "$GITHUB_PATH" From 18e11be2d8b31119cf12301e12e45730e34949c2 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 29 May 2026 10:37:42 +0000 Subject: [PATCH 24/24] ci(fix): set least-privilege workflow permissions (contents: read) to clear CodeQL S6504 (#16) --- .github/workflows/ci.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3d14dd3..6e45c2c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,6 +9,11 @@ on: pull_request: workflow_dispatch: +# Least privilege for GITHUB_TOKEN across every job (CodeQL S6504): these jobs +# only read the repo. The security job keeps its own explicit (identical) block. +permissions: + contents: read + jobs: test: runs-on: ubuntu-latest