diff --git a/crates/socket-patch-cli/src/commands/apply.rs b/crates/socket-patch-cli/src/commands/apply.rs index f6c5c568..2d8f456e 100644 --- a/crates/socket-patch-cli/src/commands/apply.rs +++ b/crates/socket-patch-cli/src/commands/apply.rs @@ -602,15 +602,20 @@ async fn apply_patches_inner( let mut results: Vec = Vec::new(); let mut has_errors = false; - // Group pypi PURLs by base (for variant matching with qualifiers) - let mut pypi_qualified_groups: HashMap> = HashMap::new(); - if let Some(pypi_purls) = partitioned.get(&Ecosystem::Pypi) { - for purl in pypi_purls { - let base = strip_purl_qualifiers(purl).to_string(); - pypi_qualified_groups - .entry(base) - .or_default() - .push(purl.clone()); + // Group release-variant PURLs by base. PyPI (`?artifact_id=`), + // RubyGems (`?platform=`), and Maven (`?classifier=&ext=`) carry + // qualifiers distinguishing releases of one `package@version`; the + // crawler emits the base PURL, so we match the manifest's qualified + // variants against it here. + let mut variant_qualified_groups: HashMap> = HashMap::new(); + for (eco, purls) in &partitioned { + if eco.supports_release_variants() { + for purl in purls { + variant_qualified_groups + .entry(strip_purl_qualifiers(purl).to_string()) + .or_default() + .push(purl.clone()); + } } } @@ -618,13 +623,13 @@ async fn apply_patches_inner( let mut matched_manifest_purls: HashSet = HashSet::new(); for (purl, pkg_path) in &all_packages { - if Ecosystem::from_purl(purl) == Some(Ecosystem::Pypi) { + if Ecosystem::from_purl(purl).is_some_and(|e| e.supports_release_variants()) { let base_purl = strip_purl_qualifiers(purl).to_string(); if applied_base_purls.contains(&base_purl) { continue; } - let variants = pypi_qualified_groups + let variants = variant_qualified_groups .get(&base_purl) .cloned() .unwrap_or_else(|| vec![base_purl.clone()]); @@ -636,7 +641,9 @@ async fn apply_patches_inner( None => continue, }; - // Check first file hash match (skip when --force) + // Check first file hash match (skip when --force). A + // mismatch means this variant's distribution isn't the + // one on disk, so skip it. if !args.force { if let Some((file_name, file_info)) = patch.files.iter().next() { let verify = verify_file_patch(pkg_path, file_name, file_info).await; @@ -664,16 +671,21 @@ async fn apply_patches_inner( if result.success { applied = true; - applied_base_purls.insert(base_purl.clone()); results.push(result); matched_manifest_purls.insert(variant_purl.clone()); - break; + // No `break`: apply *every* matching variant. PyPI/gem + // have exactly one installed distribution (the rest + // hash-mismatch and were skipped above), so this + // applies a single variant for them; Maven's coexisting + // classifier jars each get patched. } else { results.push(result); } } - if !applied { + if applied { + applied_base_purls.insert(base_purl.clone()); + } else { has_errors = true; if !args.common.silent && !args.common.json { eprintln!("Failed to patch {base_purl}: no matching variant found"); diff --git a/crates/socket-patch-cli/src/commands/get.rs b/crates/socket-patch-cli/src/commands/get.rs index 940f98b2..849ac88c 100644 --- a/crates/socket-patch-cli/src/commands/get.rs +++ b/crates/socket-patch-cli/src/commands/get.rs @@ -11,7 +11,7 @@ use socket_patch_core::manifest::operations::{read_manifest, write_manifest}; use socket_patch_core::manifest::schema::{ PatchFileInfo, PatchManifest, PatchRecord, VulnerabilityInfo, }; -use socket_patch_core::patch::apply::select_installed_variant; +use socket_patch_core::patch::apply::select_installed_variants; use socket_patch_core::utils::fuzzy_match::fuzzy_match_packages; use socket_patch_core::utils::purl::{is_purl, strip_purl_qualifiers}; use socket_patch_core::utils::telemetry::{track_patch_fetch_failed, track_patch_fetched}; @@ -329,11 +329,12 @@ pub struct GetArgs { #[arg(long = "one-off", env = "SOCKET_ONE_OFF", default_value_t = false)] pub one_off: bool, - /// Download patches for every release/distribution (artifact_id) of - /// a matched package, not just the one matching the locally- - /// installed distribution. Only affects PyPI today — the only - /// ecosystem with per-release artifact_id variants. Off by default: - /// only the patch for the installed dist is fetched. + /// Download patches for every release/distribution variant of a + /// matched package, not just the one(s) matching the locally- + /// installed distribution. Affects ecosystems with per-release + /// variants — PyPI (wheel/sdist via `artifact_id`), RubyGems + /// (`platform`), and Maven (`classifier`). Off by default: only the + /// patch(es) for the installed dist are fetched. #[arg( long = "all-releases", env = "SOCKET_ALL_RELEASES", @@ -530,20 +531,18 @@ pub struct DownloadParams { pub all_releases: bool, } -/// Download and apply a set of selected patches. -/// -/// Used by both `get` and `scan` commands. Returns (exit_code, json_result). -/// Narrow a selection of patches down to the release variant matching -/// each locally-installed distribution. +/// Narrow a selection of patches down to the release variant(s) present +/// in each locally-installed distribution. /// -/// A PyPI `package@version` can resolve to several patch variants — one -/// per `?artifact_id=...` release (wheel/sdist). Only one distribution -/// is ever installed in a given environment, so only one variant can -/// apply. With `--all-releases` off (the default) we keep just the -/// variant whose first patched file's hash matches the on-disk package, -/// dropping the rest so they are never downloaded or written to the -/// manifest. Non-PyPI ecosystems never carry `artifact_id` qualifiers, -/// so they pass through untouched. +/// A release-variant ecosystem `package@version` can resolve to several +/// patch variants — one per qualified PURL: PyPI `?artifact_id=` +/// (wheel/sdist), RubyGems `?platform=`, Maven `?classifier=&ext=`. With +/// `--all-releases` off (the default) we keep only the variant(s) whose +/// first patched file's hash matches what's on disk, dropping the rest so +/// they are never downloaded or written to the manifest. PyPI/RubyGems +/// install one distribution per environment (≤1 kept); Maven classifier +/// jars coexist, so several may be kept. Ecosystems that ship one +/// artifact per version never carry qualifiers and pass through untouched. /// /// Fallbacks (keep all variants of the base, i.e. behave as broad): /// * the base package is not installed on disk (nothing to match @@ -560,14 +559,15 @@ async fn filter_to_installed_releases( api_client: &socket_patch_core::api::client::ApiClient, org: Option<&str>, ) -> (Vec, Vec) { - // Group the PyPI selections by their base PURL (qualifiers stripped). - // Anything that isn't PyPI, or whose base has a single variant, is - // kept verbatim and needs no installed-dist resolution. - let mut pypi_groups: HashMap> = HashMap::new(); + // Group release-variant ecosystem selections (PyPI / RubyGems / Maven) + // by their base PURL (qualifiers stripped). Anything that can't have + // release variants, or whose base has a single variant, is kept + // verbatim and needs no installed-dist resolution. + let mut variant_groups: HashMap> = HashMap::new(); let mut kept: Vec = Vec::new(); for sr in selected { - if Ecosystem::from_purl(&sr.purl) == Some(Ecosystem::Pypi) { - pypi_groups + if Ecosystem::from_purl(&sr.purl).is_some_and(|e| e.supports_release_variants()) { + variant_groups .entry(strip_purl_qualifiers(&sr.purl).to_string()) .or_default() .push(sr.clone()); @@ -578,10 +578,10 @@ async fn filter_to_installed_releases( let mut warnings: Vec = Vec::new(); - // Singleton PyPI bases have nothing to disambiguate — keep as-is. + // Singleton bases have nothing to disambiguate — keep as-is. // Collect the multi-variant bases that actually need resolution. let mut multi: Vec<(String, Vec)> = Vec::new(); - for (base, variants) in pypi_groups { + for (base, variants) in variant_groups { if variants.len() <= 1 { kept.extend(variants); } else { @@ -593,10 +593,11 @@ async fn filter_to_installed_releases( return (kept, warnings); } - // Discover the on-disk path for each multi-variant base. The pypi - // crawler is queried with base PURLs and the result is fanned back - // out to every qualified variant (all variants of one installed - // package resolve to the same path). + // Discover the on-disk path for each multi-variant base. The crawler + // is queried with base PURLs and the result is fanned back out to + // every qualified variant. For PyPI/RubyGems all variants of one + // installed package resolve to the same dir; for Maven the variants + // share a version dir but target distinct jar files within it. let all_qualified: Vec = multi .iter() .flat_map(|(_, variants)| variants.iter().map(|s| s.purl.clone())) @@ -612,8 +613,8 @@ async fn filter_to_installed_releases( let paths = find_packages_for_rollback(&partitioned, &crawler_options, true).await; for (base, variants) in multi { - // Any variant's resolved path works — they all map to the single - // installed distribution. + // Any variant's resolved path works — they all map to the same + // installed package directory. let pkg_path = variants.iter().find_map(|s| paths.get(&s.purl)).cloned(); let Some(pkg_path) = pkg_path else { // Not installed: cannot determine the relevant release. Keep @@ -645,21 +646,23 @@ async fn filter_to_installed_releases( .map(|(purl, files)| (purl.as_str(), files)) .collect(); - match select_installed_variant(&pkg_path, &refs).await { - Some(idx) => { - let winner = candidates[idx].0.clone(); - kept.extend(variants.into_iter().filter(|s| s.purl == winner)); - } - None => { - // Installed, but no variant matches the on-disk bytes. - // Fall back to broad rather than silently dropping a - // package the user asked about. - warnings.push(format!( - "No release variant of {base} matches the installed distribution; keeping all {} variant(s).", - variants.len() - )); - kept.extend(variants); - } + // Keep every variant present on disk. PyPI/RubyGems install one + // distribution per env (≤1 match); Maven classifier jars coexist + // so several may match. + let matched = select_installed_variants(&pkg_path, &refs).await; + if matched.is_empty() { + // Installed, but no variant matches the on-disk bytes. Fall + // back to broad rather than silently dropping a package the + // user asked about. + warnings.push(format!( + "No release variant of {base} matches the installed distribution; keeping all {} variant(s).", + variants.len() + )); + kept.extend(variants); + } else { + let winners: std::collections::HashSet = + matched.iter().map(|&i| candidates[i].0.clone()).collect(); + kept.extend(variants.into_iter().filter(|s| winners.contains(&s.purl))); } } @@ -686,6 +689,9 @@ fn files_for_selection(patch: &PatchResponse) -> HashMap files } +/// Download and apply a set of selected patches. +/// +/// Used by both `get` and `scan` commands. Returns (exit_code, json_result). pub async fn download_and_apply_patches( selected: &[PatchSearchResult], params: &DownloadParams, diff --git a/crates/socket-patch-cli/src/commands/rollback.rs b/crates/socket-patch-cli/src/commands/rollback.rs index 1937b916..7401c92c 100644 --- a/crates/socket-patch-cli/src/commands/rollback.rs +++ b/crates/socket-patch-cli/src/commands/rollback.rs @@ -6,7 +6,7 @@ use socket_patch_core::api::client::get_api_client_with_overrides; use socket_patch_core::crawlers::CrawlerOptions; use socket_patch_core::manifest::operations::read_manifest; use socket_patch_core::manifest::schema::{PatchFileInfo, PatchManifest, PatchRecord}; -use socket_patch_core::patch::apply::select_installed_variant; +use socket_patch_core::patch::apply::select_installed_variants; use socket_patch_core::patch::rollback::{rollback_package_patch, RollbackResult, VerifyRollbackStatus}; use socket_patch_core::utils::purl::{purl_matches_identifier, strip_purl_qualifiers}; use socket_patch_core::utils::telemetry::{track_patch_rolled_back, track_patch_rollback_failed}; @@ -451,13 +451,14 @@ async fn rollback_patches_inner( return Ok((true, Vec::new())); } - // Group discovered packages by base PURL. A PyPI package@version may - // have several release variants (`?artifact_id=...`) in the manifest; - // `merge_pypi_qualified` resolves them all to the single on-disk - // distribution. Rolling back every variant against that one file - // would HashMismatch on the non-installed variants and report - // spurious failures, so — mirroring apply — we collapse each group to - // the single variant whose hashes match the installed bytes. + // Group discovered packages by base PURL. A release-variant + // `package@version` (PyPI/RubyGems/Maven) may have several variants + // in the manifest that `merge_qualified` resolves to the same + // installed package dir. Rolling back a variant that is *not* present + // on disk would HashMismatch and report a spurious failure, so — + // mirroring apply — we collapse each group to the variant(s) whose + // hashes actually match the installed bytes. PyPI/RubyGems yield one + // such variant; Maven's coexisting classifier jars may yield several. let mut groups: HashMap> = HashMap::new(); for (purl, pkg_path) in &all_packages { groups @@ -486,16 +487,20 @@ async fn rollback_patches_inner( .map(|p| (purl.as_str(), &p.files)) }) .collect(); - match select_installed_variant(pkg_path, &candidates).await { - Some(idx) => { - let winner = candidates[idx].0.to_string(); - entries.into_iter().filter(|(p, _)| **p == winner).collect() - } + let matched = select_installed_variants(pkg_path, &candidates).await; + if matched.is_empty() { // No variant matches the installed distribution (e.g. a // locally-modified file). Fall back to attempting every // variant so the per-file verification surfaces the // mismatch rather than silently skipping the package. - None => entries, + entries + } else { + let winners: HashSet = + matched.iter().map(|&i| candidates[i].0.to_string()).collect(); + entries + .into_iter() + .filter(|(p, _)| winners.contains(*p)) + .collect() } }; diff --git a/crates/socket-patch-cli/src/commands/scan.rs b/crates/socket-patch-cli/src/commands/scan.rs index d132e744..bf693d4b 100644 --- a/crates/socket-patch-cli/src/commands/scan.rs +++ b/crates/socket-patch-cli/src/commands/scan.rs @@ -249,11 +249,12 @@ pub struct ScanArgs { #[arg(long, default_value_t = false)] pub sync: bool, - /// Download patches for every release/distribution (artifact_id) of - /// a matched package, not just the one matching the locally- - /// installed distribution. Only affects PyPI today — the only - /// ecosystem with per-release artifact_id variants. Off by default: - /// narrow scans store only the patch for the installed dist, keeping + /// Download patches for every release/distribution variant of a + /// matched package, not just the one(s) matching the locally- + /// installed distribution. Affects ecosystems with per-release + /// variants — PyPI (wheel/sdist via `artifact_id`), RubyGems + /// (`platform`), and Maven (`classifier`). Off by default: narrow + /// scans store only the patch(es) for the installed dist, keeping /// `.socket/` small; `--all-releases` makes the manifest portable /// across environments (e.g. cross-platform CI caches). #[arg( diff --git a/crates/socket-patch-cli/src/ecosystem_dispatch.rs b/crates/socket-patch-cli/src/ecosystem_dispatch.rs index 02bc9562..f18da6a5 100644 --- a/crates/socket-patch-cli/src/ecosystem_dispatch.rs +++ b/crates/socket-patch-cli/src/ecosystem_dispatch.rs @@ -155,7 +155,7 @@ macro_rules! scan_ecosystem { }}; } -/// Signature shared by `merge_first_wins` and `merge_pypi_qualified`. +/// Signature shared by `merge_first_wins` and `merge_qualified`. /// `dispatch_find` swaps between them so the rollback path can fan one /// crawler result back out to every caller-supplied qualified PURL. type MergeFn = @@ -172,10 +172,13 @@ fn merge_first_wins( } } -/// Pypi rollback merge: the crawler is queried with base PURLs (no +/// Release-variant merge: the crawler is queried with base PURLs (no /// `?qualifiers`); fan the resulting paths back out to every qualified -/// caller-supplied PURL that strips to the same base. -fn merge_pypi_qualified( +/// caller-supplied PURL that strips to the same base. Used for the +/// release-variant ecosystems (PyPI / RubyGems / Maven) so a single +/// installed package directory is mapped to every manifest variant for +/// later hash-based selection. +fn merge_qualified( out: &mut HashMap, purls: &[String], packages: HashMap, @@ -191,7 +194,10 @@ fn merge_pypi_qualified( } } -fn dedup_pypi_purls(purls: &[String]) -> Vec { +/// Strip qualifiers and dedupe — the crawler only needs the base PURL of +/// a release-variant ecosystem; the variant is resolved later by hashing +/// the installed files. +fn dedup_qualified_purls(purls: &[String]) -> Vec { purls .iter() .map(|p| strip_purl_qualifiers(p).to_string()) @@ -207,14 +213,16 @@ fn passthrough_purls(purls: &[String]) -> Vec { /// Drive every enabled ecosystem's find-by-purls path, accumulating /// into one `purl -> path` map. /// -/// `pypi_merge` lets the rollback variant fan a single crawler result +/// `variant_merge` lets the rollback variant fan a single crawler result /// out to every caller-supplied qualified PURL; everything else just -/// inserts the crawler-returned PURL with first-wins semantics. +/// inserts the crawler-returned PURL with first-wins semantics. It is +/// applied to the release-variant ecosystems (PyPI / RubyGems / Maven), +/// which are also queried with deduped base PURLs. async fn dispatch_find( partitioned: &HashMap>, options: &CrawlerOptions, silent: bool, - pypi_merge: MergeFn, + variant_merge: MergeFn, ) -> HashMap { let mut out: HashMap = HashMap::new(); @@ -242,8 +250,8 @@ async fn dispatch_find( get_paths = get_site_packages_paths, using_label = "", err_label = "Python packages", - purls_override = dedup_pypi_purls, - on_match = pypi_merge, + purls_override = dedup_qualified_purls, + on_match = variant_merge, ); #[cfg(feature = "cargo")] @@ -271,8 +279,11 @@ async fn dispatch_find( get_paths = get_gem_paths, using_label = "ruby gem paths", err_label = "Ruby gems", - purls_override = passthrough_purls, - on_match = merge_first_wins, + // RubyGems has per-platform release variants (`?platform=`); the + // crawler emits the base PURL and the platform is resolved by + // hashing the installed files, same as PyPI. + purls_override = dedup_qualified_purls, + on_match = variant_merge, ); #[cfg(feature = "golang")] @@ -307,8 +318,12 @@ async fn dispatch_find( get_paths = get_maven_repo_paths, using_label = "Maven repository", err_label = "Maven packages", - purls_override = passthrough_purls, - on_match = merge_first_wins, + // Maven has per-classifier release variants + // (`?classifier=&ext=`) that coexist as distinct jars in + // one version dir; the crawler emits the base PURL and + // each variant is resolved by hashing its jar file. + purls_override = dedup_qualified_purls, + on_match = variant_merge, ); } } @@ -380,15 +395,16 @@ pub async fn find_packages_for_purls( dispatch_find(partitioned, options, silent, merge_first_wins).await } -/// Variant of `find_packages_for_purls` for rollback, which needs to remap -/// pypi qualified PURLs (with `?artifact_id=...`) to the base PURL found -/// by the crawler. +/// Variant of `find_packages_for_purls` for rollback and narrow-release +/// resolution, which needs to remap qualified PURLs (PyPI +/// `?artifact_id=`, RubyGems `?platform=`, Maven `?classifier=&ext=`) to +/// the base PURL found by the crawler. pub async fn find_packages_for_rollback( partitioned: &HashMap>, options: &CrawlerOptions, silent: bool, ) -> HashMap { - dispatch_find(partitioned, options, silent, merge_pypi_qualified).await + dispatch_find(partitioned, options, silent, merge_qualified).await } /// Crawl all enabled ecosystems and return all packages plus per-ecosystem counts. diff --git a/crates/socket-patch-cli/tests/in_process_gem_multi_platform.rs b/crates/socket-patch-cli/tests/in_process_gem_multi_platform.rs new file mode 100644 index 00000000..af2163b6 --- /dev/null +++ b/crates/socket-patch-cli/tests/in_process_gem_multi_platform.rs @@ -0,0 +1,356 @@ +//! Multi-platform (per-`platform`) RubyGems patching coverage. +//! +//! RubyGems ships platform-specific gems — `nokogiri-1.16.5-x86_64-linux`, +//! `nokogiri-1.16.5-arm64-darwin`, … — alongside the generic ruby gem. +//! Each is a distinct release with its own compiled files, distinguished +//! by a `?platform=` PURL qualifier. An environment installs exactly one, +//! so this mirrors PyPI's one-installed-variant model. +//! +//! Unlike the pypi test, this needs no `gem` binary: a platform gem on +//! disk is just `gems/--/lib/.rb`, so we +//! synthesize it under a `vendor/bundle/ruby/*/gems` tree (what the +//! crawler scans in local mode) and serve matching hashes via wiremock. +//! +//! Behaviors pinned: +//! * `scan` (narrow, default) stores only the installed platform's patch. +//! * `scan --all-releases` (broad) stores every platform variant; apply +//! still patches with the installed platform only. +//! * `remove ` over a broad manifest removes ALL platform +//! variants and rolls back the file without spurious failure. +//! * `rollback` (no id) over a broad manifest exits 0. + +use std::path::{Path, PathBuf}; + +use base64::Engine; +use serial_test::serial; +use sha2::{Digest, Sha256}; +use socket_patch_cli::commands::remove::{run as remove_run, RemoveArgs}; +use socket_patch_cli::commands::rollback::{run as rollback_run, RollbackArgs}; +use socket_patch_cli::commands::scan::{run as scan_run, ScanArgs}; +use wiremock::matchers::{method, path, path_regex}; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +const ORG: &str = "test-org"; +const GEM_NAME: &str = "nokogiri"; +const GEM_VERSION: &str = "1.16.5"; + +const UUID_INSTALLED: &str = "11111111-1111-4111-8111-aaaaaaaaaaaa"; +const UUID_OTHER: &str = "22222222-2222-4222-8222-bbbbbbbbbbbb"; + +const PLATFORM_INSTALLED: &str = "x86_64-linux"; +const PLATFORM_OTHER: &str = "arm64-darwin"; + +const MARKER_INSTALLED: &[u8] = b"\n# SOCKET-GEM-INSTALLED-X86_64\n"; + +fn git_sha256(content: &[u8]) -> String { + let header = format!("blob {}\0", content.len()); + let mut hasher = Sha256::new(); + hasher.update(header.as_bytes()); + hasher.update(content); + hex::encode(hasher.finalize()) +} + +fn b64(bytes: &[u8]) -> String { + base64::engine::general_purpose::STANDARD.encode(bytes) +} + +fn base_purl() -> String { + format!("pkg:gem/{GEM_NAME}@{GEM_VERSION}") +} + +fn qualified(platform: &str) -> String { + format!("{}?platform={platform}", base_purl()) +} + +/// Create an installed platform gem under the cwd's vendor/bundle tree. +/// Returns the path to the patchable file (`lib/.rb`). +fn install_platform_gem(cwd: &Path, platform: &str, contents: &[u8]) -> PathBuf { + let gems = cwd + .join("vendor") + .join("bundle") + .join("ruby") + .join("3.0.0") + .join("gems"); + let gem_dir = gems.join(format!("{GEM_NAME}-{GEM_VERSION}-{platform}")); + let lib = gem_dir.join("lib"); + std::fs::create_dir_all(&lib).expect("create gem lib dir"); + let file = lib.join(format!("{GEM_NAME}.rb")); + std::fs::write(&file, contents).expect("write gem file"); + file +} + +/// Stand up a wiremock advertising two platform variants for the base +/// PURL. Only the installed platform's `beforeHash` matches the on-disk +/// `lib/nokogiri.rb`. +async fn setup_mock( + server: &MockServer, + installed_before_hash: &str, + installed_after_hash: &str, + installed_before_bytes: &[u8], + installed_after_bytes: &[u8], +) { + let base = base_purl(); + + Mock::given(method("POST")) + .and(path(format!("/v0/orgs/{ORG}/patches/batch"))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "packages": [{ + "purl": base, + "patches": [ + { "uuid": UUID_INSTALLED, "purl": qualified(PLATFORM_INSTALLED), + "tier": "free", "cveIds": [], "ghsaIds": [], + "severity": "high", "title": "linux gem" }, + { "uuid": UUID_OTHER, "purl": qualified(PLATFORM_OTHER), + "tier": "free", "cveIds": [], "ghsaIds": [], + "severity": "high", "title": "darwin gem" }, + ] + }], + "canAccessPaidPatches": false, + }))) + .mount(server) + .await; + + Mock::given(method("GET")) + .and(path_regex(format!("^/v0/orgs/{ORG}/patches/by-package/.+$"))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "patches": [ + { "uuid": UUID_INSTALLED, "purl": qualified(PLATFORM_INSTALLED), + "publishedAt": "2024-01-01T00:00:00Z", "description": "linux gem", + "license": "MIT", "tier": "free", "vulnerabilities": {} }, + { "uuid": UUID_OTHER, "purl": qualified(PLATFORM_OTHER), + "publishedAt": "2024-01-01T00:00:00Z", "description": "darwin gem", + "license": "MIT", "tier": "free", "vulnerabilities": {} }, + ], + "canAccessPaidPatches": false, + }))) + .mount(server) + .await; + + // Installed (linux) variant: hashes match the on-disk file. + mount_view( + server, + UUID_INSTALLED, + &qualified(PLATFORM_INSTALLED), + installed_before_hash, + installed_after_hash, + installed_before_bytes, + installed_after_bytes, + ) + .await; + + // Other (darwin) variant: a different distribution's bytes, so its + // beforeHash never matches the installed linux gem. + let other_before = b"# nokogiri.rb from the arm64-darwin gem\n"; + let mut other_after = other_before.to_vec(); + other_after.extend_from_slice(b"\n# DARWIN-MARKER\n"); + mount_view( + server, + UUID_OTHER, + &qualified(PLATFORM_OTHER), + &git_sha256(other_before), + &git_sha256(&other_after), + other_before, + &other_after, + ) + .await; +} + +#[allow(clippy::too_many_arguments)] +async fn mount_view( + server: &MockServer, + uuid: &str, + purl: &str, + before_hash: &str, + after_hash: &str, + before_bytes: &[u8], + after_bytes: &[u8], +) { + Mock::given(method("GET")) + .and(path(format!("/v0/orgs/{ORG}/patches/view/{uuid}"))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "uuid": uuid, + "purl": purl, + "publishedAt": "2024-01-01T00:00:00Z", + "files": { + "lib/nokogiri.rb": { + "beforeHash": before_hash, + "afterHash": after_hash, + "blobContent": b64(after_bytes), + "beforeBlobContent": b64(before_bytes), + } + }, + "vulnerabilities": {}, + "description": "gem multi-platform fixture", + "license": "MIT", + "tier": "free", + }))) + .mount(server) + .await; +} + +fn scan_args(cwd: &Path, api_url: String, all_releases: bool) -> ScanArgs { + ScanArgs { + common: socket_patch_cli::args::GlobalArgs { + cwd: cwd.to_path_buf(), + org: Some(ORG.to_string()), + json: true, + yes: true, + global: false, + global_prefix: None, + api_url, + api_token: Some("fake".to_string()), + ecosystems: Some(vec!["gem".to_string()]), + download_mode: "diff".to_string(), + dry_run: false, + ..socket_patch_cli::args::GlobalArgs::default() + }, + batch_size: 100, + // apply (not sync) so the post-sync GC doesn't sweep beforeHash + // blobs the later rollback/remove needs offline. + apply: true, + prune: false, + sync: false, + all_releases, + } +} + +fn manifest_keys(cwd: &Path) -> Vec { + let path = cwd.join(".socket").join("manifest.json"); + let raw = std::fs::read_to_string(&path) + .unwrap_or_else(|_| panic!("manifest not found at {}", path.display())); + let v: serde_json::Value = serde_json::from_str(&raw).expect("manifest json"); + v["patches"] + .as_object() + .map(|m| m.keys().cloned().collect()) + .unwrap_or_default() +} + +fn file_has_marker(file: &Path, marker: &[u8]) -> bool { + let bytes = std::fs::read(file).expect("read file"); + bytes.windows(marker.len()).any(|w| w == marker) +} + +/// Install the linux gem, compute its hashes, stand up the mock. +async fn fixture(cwd: &Path) -> (PathBuf, MockServer) { + let original = b"module Nokogiri\n VERSION = '1.16.5'\nend\n".to_vec(); + let file = install_platform_gem(cwd, PLATFORM_INSTALLED, &original); + let before_hash = git_sha256(&original); + let mut patched = original.clone(); + patched.extend_from_slice(MARKER_INSTALLED); + let after_hash = git_sha256(&patched); + + let server = MockServer::start().await; + setup_mock(&server, &before_hash, &after_hash, &original, &patched).await; + (file, server) +} + +#[tokio::test] +#[serial] +async fn narrow_scan_keeps_only_installed_platform() { + let tmp = tempfile::tempdir().expect("tempdir"); + let (gem_file, server) = fixture(tmp.path()).await; + + let code = scan_run(scan_args(tmp.path(), server.uri(), false)).await; + assert!(code == 0 || code == 1, "scan exit: {code}"); + + let keys = manifest_keys(tmp.path()); + assert_eq!( + keys, + vec![qualified(PLATFORM_INSTALLED)], + "narrow scan must store only the installed platform variant; got {keys:?}" + ); + assert!( + file_has_marker(&gem_file, MARKER_INSTALLED), + "installed platform gem should be patched" + ); +} + +#[tokio::test] +#[serial] +async fn broad_scan_keeps_all_platforms() { + let tmp = tempfile::tempdir().expect("tempdir"); + let (gem_file, server) = fixture(tmp.path()).await; + + let code = scan_run(scan_args(tmp.path(), server.uri(), true)).await; + assert!(code == 0 || code == 1, "scan exit: {code}"); + + let mut keys = manifest_keys(tmp.path()); + keys.sort(); + let mut expected = vec![qualified(PLATFORM_INSTALLED), qualified(PLATFORM_OTHER)]; + expected.sort(); + assert_eq!(keys, expected, "broad scan must store every platform variant"); + + // Apply still patches only with the installed platform's variant. + assert!( + file_has_marker(&gem_file, MARKER_INSTALLED), + "broad apply should patch with the installed platform variant" + ); +} + +#[tokio::test] +#[serial] +async fn remove_base_purl_clears_all_platforms_and_rolls_back() { + let tmp = tempfile::tempdir().expect("tempdir"); + let (gem_file, server) = fixture(tmp.path()).await; + + let _ = scan_run(scan_args(tmp.path(), server.uri(), true)).await; + assert_eq!(manifest_keys(tmp.path()).len(), 2); + assert!(file_has_marker(&gem_file, MARKER_INSTALLED)); + + let remove_args = RemoveArgs { + identifier: base_purl(), + common: socket_patch_cli::args::GlobalArgs { + cwd: tmp.path().to_path_buf(), + org: Some(ORG.to_string()), + api_url: server.uri(), + api_token: Some("fake".to_string()), + json: true, + yes: true, + ecosystems: Some(vec!["gem".to_string()]), + ..socket_patch_cli::args::GlobalArgs::default() + }, + skip_rollback: false, + }; + let code = remove_run(remove_args).await; + assert_eq!(code, 0, "remove base PURL should succeed (exit 0)"); + + assert!( + manifest_keys(tmp.path()).is_empty(), + "all platform variants should be removed from the manifest" + ); + assert!( + !file_has_marker(&gem_file, MARKER_INSTALLED), + "remove should roll the gem file back to its original bytes" + ); +} + +#[tokio::test] +#[serial] +async fn rollback_all_over_broad_manifest_succeeds() { + let tmp = tempfile::tempdir().expect("tempdir"); + let (gem_file, server) = fixture(tmp.path()).await; + + let _ = scan_run(scan_args(tmp.path(), server.uri(), true)).await; + assert_eq!(manifest_keys(tmp.path()).len(), 2); + assert!(file_has_marker(&gem_file, MARKER_INSTALLED)); + + let rollback_args = RollbackArgs { + identifier: None, + common: socket_patch_cli::args::GlobalArgs { + cwd: tmp.path().to_path_buf(), + org: Some(ORG.to_string()), + api_url: server.uri(), + api_token: Some("fake".to_string()), + json: true, + ecosystems: Some(vec!["gem".to_string()]), + ..socket_patch_cli::args::GlobalArgs::default() + }, + one_off: false, + }; + let code = rollback_run(rollback_args).await; + assert_eq!(code, 0, "rollback-all over broad manifest should exit 0"); + assert!( + !file_has_marker(&gem_file, MARKER_INSTALLED), + "rollback should restore the original gem file" + ); +} diff --git a/crates/socket-patch-cli/tests/in_process_remote_ecosystems_apply.rs b/crates/socket-patch-cli/tests/in_process_remote_ecosystems_apply.rs index 24c6e238..16216e73 100644 --- a/crates/socket-patch-cli/tests/in_process_remote_ecosystems_apply.rs +++ b/crates/socket-patch-cli/tests/in_process_remote_ecosystems_apply.rs @@ -242,6 +242,122 @@ async fn maven_handcrafted_install_apply_patches_file() { std::env::remove_var("SOCKET_EXPERIMENTAL_MAVEN"); } +/// Maven is the one release-variant ecosystem where multiple variants +/// COEXIST on disk: a version dir can hold several classifier jars at +/// once (e.g. native `-linux-x86_64` / `-osx-x86_64`). This pins that +/// the (default, narrow) apply path keeps and patches *every* present +/// classifier variant — exercising the plural `select_installed_variants` +/// selector — rather than just the first. +#[cfg(feature = "maven")] +#[tokio::test] +#[serial] +async fn maven_multi_classifier_patches_every_present_jar() { + let tmp = tempfile::tempdir().expect("tempdir"); + let repo = tmp.path().join("m2-repo"); + let version_dir = repo.join("org/example/native-lib/1.0.0"); + std::fs::create_dir_all(&version_dir).unwrap(); + std::fs::write( + version_dir.join("native-lib-1.0.0.pom"), + "4.0.0org.examplenative-lib1.0.0", + ) + .unwrap(); + + // Two classifier jars coexist in the same version directory. + let jar_a = "native-lib-1.0.0-linux-x86_64.jar"; + let jar_b = "native-lib-1.0.0-osx-x86_64.jar"; + let orig_a = b"JAR-A original bytes\n"; + let orig_b = b"JAR-B original bytes\n"; + std::fs::write(version_dir.join(jar_a), orig_a).unwrap(); + std::fs::write(version_dir.join(jar_b), orig_b).unwrap(); + let mut patched_a = orig_a.to_vec(); + patched_a.extend_from_slice(b"\n# MARKER-A\n"); + let mut patched_b = orig_b.to_vec(); + patched_b.extend_from_slice(b"\n# MARKER-B\n"); + + std::env::set_var("MAVEN_REPO_LOCAL", &repo); + std::env::set_var("SOCKET_EXPERIMENTAL_MAVEN", "1"); + + let base = "pkg:maven/org.example/native-lib@1.0.0"; + let purl_a = format!("{base}?classifier=linux-x86_64&ext=jar"); + let purl_b = format!("{base}?classifier=osx-x86_64&ext=jar"); + let uuid_a = "aaaaaaaa-aaaa-4aaa-8aaa-aaaaaaaaaaaa"; + let uuid_b = "bbbbbbbb-bbbb-4bbb-8bbb-bbbbbbbbbbbb"; + + let server = MockServer::start().await; + // Batch + by-package advertise both classifier variants for the base. + Mock::given(method("POST")) + .and(path(format!("/v0/orgs/{ORG}/patches/batch"))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "packages": [{ + "purl": base, + "patches": [ + { "uuid": uuid_a, "purl": purl_a, "tier": "free", "cveIds": [], + "ghsaIds": [], "severity": "medium", "title": "linux jar" }, + { "uuid": uuid_b, "purl": purl_b, "tier": "free", "cveIds": [], + "ghsaIds": [], "severity": "medium", "title": "osx jar" }, + ] + }], + "canAccessPaidPatches": false, + }))) + .mount(&server) + .await; + Mock::given(method("GET")) + .and(path_regex(format!("^/v0/orgs/{ORG}/patches/by-package/.+$"))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "patches": [ + { "uuid": uuid_a, "purl": purl_a, "publishedAt": "2024-01-01T00:00:00Z", + "description": "linux", "license": "MIT", "tier": "free", "vulnerabilities": {} }, + { "uuid": uuid_b, "purl": purl_b, "publishedAt": "2024-01-01T00:00:00Z", + "description": "osx", "license": "MIT", "tier": "free", "vulnerabilities": {} }, + ], + "canAccessPaidPatches": false, + }))) + .mount(&server) + .await; + for (uuid, purl, jar, before, after) in [ + (uuid_a, &purl_a, jar_a, orig_a.to_vec(), patched_a.clone()), + (uuid_b, &purl_b, jar_b, orig_b.to_vec(), patched_b.clone()), + ] { + Mock::given(method("GET")) + .and(path(format!("/v0/orgs/{ORG}/patches/view/{uuid}"))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "uuid": uuid, + "purl": purl, + "publishedAt": "2024-01-01T00:00:00Z", + "files": { + jar: { + "beforeHash": git_sha256(&before), + "afterHash": git_sha256(&after), + "blobContent": base64::engine::general_purpose::STANDARD.encode(&after), + } + }, + "vulnerabilities": {}, + "description": "fixture", "license": "MIT", "tier": "free", + }))) + .mount(&server) + .await; + } + + let args = default_scan_args(tmp.path(), "maven", server.uri()); + let code = scan_run(args).await; + assert!(code == 0 || code == 1, "scan --sync exit: {code}"); + + // BOTH coexisting classifier jars must be patched. + let after_a = std::fs::read(version_dir.join(jar_a)).expect("read jar a"); + let after_b = std::fs::read(version_dir.join(jar_b)).expect("read jar b"); + assert!( + after_a.windows(b"# MARKER-A\n".len()).any(|w| w == b"# MARKER-A\n"), + "linux-x86_64 classifier jar was not patched" + ); + assert!( + after_b.windows(b"# MARKER-B\n".len()).any(|w| w == b"# MARKER-B\n"), + "osx-x86_64 classifier jar was not patched (plural selector must keep both)" + ); + + std::env::remove_var("MAVEN_REPO_LOCAL"); + std::env::remove_var("SOCKET_EXPERIMENTAL_MAVEN"); +} + // --------------------------------------------------------------------------- // composer // --------------------------------------------------------------------------- diff --git a/crates/socket-patch-core/src/crawlers/ruby_crawler.rs b/crates/socket-patch-core/src/crawlers/ruby_crawler.rs index c94abd28..048cbd09 100644 --- a/crates/socket-patch-core/src/crawlers/ruby_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/ruby_crawler.rs @@ -95,8 +95,10 @@ impl RubyCrawler { for purl in purls { if let Some((name, version)) = crate::utils::purl::parse_gem_purl(purl) { - let gem_dir = gem_path.join(format!("{name}-{version}")); - if self.verify_gem_at_path(&gem_dir).await { + // The purl is the base PURL (qualifiers stripped upstream). + // Resolve it to the installed gem dir, which may carry a + // `-` suffix for platform gems. + if let Some(gem_dir) = self.locate_gem_dir(gem_path, name, version).await { result.insert( purl.clone(), CrawledPackage { @@ -309,26 +311,62 @@ impl RubyCrawler { false } - /// Parse a gem directory name into (name, version). + /// Parse a gem directory name into its base `(name, version)`. /// - /// Gem directories follow the pattern `-`, where the - /// version is the last `-`-separated component that starts with a digit. + /// Gem directories follow `-` (ruby-platform gems) or + /// `--` (platform gems, e.g. + /// `nokogiri-1.16.5-x86_64-linux`). The name/version boundary is the + /// **first** `-` followed by a digit. A RubyGems version is dash-free + /// (prerelease dashes render as `.pre.`), so the version is the run up + /// to the next `-`; anything after that is the platform suffix, which + /// we drop — the installed platform is resolved later by hashing the + /// gem's files (the same model as PyPI's `artifact_id`). The qualified + /// `?platform=` PURL is only ever carried in the manifest/API. fn parse_dir_name_version(dir_name: &str) -> Option<(String, String)> { - // Find the last '-' followed by a digit - let mut split_idx = None; - for (i, _) in dir_name.match_indices('-') { - if dir_name[i + 1..].starts_with(|c: char| c.is_ascii_digit()) { - split_idx = Some(i); - } - } - let idx = split_idx?; + let idx = dir_name + .match_indices('-') + .find(|(i, _)| dir_name[i + 1..].starts_with(|c: char| c.is_ascii_digit())) + .map(|(i, _)| i)?; let name = &dir_name[..idx]; - let version = &dir_name[idx + 1..]; + let rest = &dir_name[idx + 1..]; + // Version is the leading dash-free token; drop any `-`. + let version = rest.split('-').next().unwrap_or(rest); if name.is_empty() || version.is_empty() { return None; } Some((name.to_string(), version.to_string())) } + + /// Locate an installed gem directory for a base `name`/`version`. + /// + /// Plain (ruby-platform) gems live in `-/`; platform + /// gems append a `-` suffix + /// (`--x86_64-linux/`). Only one platform is installed + /// per environment, so we return the exact dir when present, otherwise + /// the first verifying `--*` directory. + async fn locate_gem_dir( + &self, + gem_path: &Path, + name: &str, + version: &str, + ) -> Option { + let exact = gem_path.join(format!("{name}-{version}")); + if self.verify_gem_at_path(&exact).await { + return Some(exact); + } + let prefix = format!("{name}-{version}-"); + for entry in crate::utils::fs::list_dir_entries(gem_path).await { + let file_name = entry.file_name(); + let dir_name = file_name.to_string_lossy(); + if dir_name.starts_with(&prefix) { + let dir = gem_path.join(&*dir_name); + if self.verify_gem_at_path(&dir).await { + return Some(dir); + } + } + } + None + } } impl Default for RubyCrawler { @@ -383,6 +421,37 @@ mod tests { assert!(RubyCrawler::parse_dir_name_version("noversion").is_none()); } + #[test] + fn test_parse_gem_dir_name_platform_gems() { + // Platform gems append `-` to the base name-version; the + // platform must be stripped so the base PURL matches the manifest. + assert_eq!( + RubyCrawler::parse_dir_name_version("nokogiri-1.16.5-x86_64-linux"), + Some(("nokogiri".to_string(), "1.16.5".to_string())) + ); + assert_eq!( + RubyCrawler::parse_dir_name_version("nokogiri-1.16.5-arm64-darwin"), + Some(("nokogiri".to_string(), "1.16.5".to_string())) + ); + assert_eq!( + RubyCrawler::parse_dir_name_version("sassc-2.4.0-java"), + Some(("sassc".to_string(), "2.4.0".to_string())) + ); + // Platform with a trailing OS version number must not leak into + // the gem version (regression: a "last dash-digit" parser would + // split on `-21`). + assert_eq!( + RubyCrawler::parse_dir_name_version("nokogiri-1.16.5-universal-darwin-21"), + Some(("nokogiri".to_string(), "1.16.5".to_string())) + ); + // A name with an embedded version-like number resolves at the + // first dash-digit boundary. + assert_eq!( + RubyCrawler::parse_dir_name_version("libv8-node-18.16.0.0-x86_64-linux"), + Some(("libv8-node".to_string(), "18.16.0.0".to_string())) + ); + } + #[tokio::test] async fn test_find_by_purls_gem() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/crawlers/types.rs b/crates/socket-patch-core/src/crawlers/types.rs index eedbd916..44925a6e 100644 --- a/crates/socket-patch-core/src/crawlers/types.rs +++ b/crates/socket-patch-core/src/crawlers/types.rs @@ -107,6 +107,30 @@ impl Ecosystem { } } + /// Whether this ecosystem can have multiple release/distribution + /// variants per `package@version`, each a distinct downloadable + /// artifact distinguished by a PURL qualifier: + /// + /// * PyPI — `?artifact_id=` (wheel / sdist), + /// * RubyGems — `?platform=` (e.g. `x86_64-linux`, `arm64-darwin`), + /// * Maven — `?classifier=&ext=` (e.g. native `-linux-x86_64` jars). + /// + /// Single-artifact ecosystems (npm, cargo, go, composer, nuget, deno) + /// return false: they ship exactly one tarball/zip per version, and + /// any platform split lives under separate package *names* rather + /// than as variants of one coordinate. Callers use this to decide + /// whether to dedupe qualified PURLs to a base and fan results back + /// out to every variant (release-variant ecosystems) or to match + /// PURLs 1:1 (everything else). + pub fn supports_release_variants(&self) -> bool { + match self { + Ecosystem::Pypi | Ecosystem::Gem => true, + #[cfg(feature = "maven")] + Ecosystem::Maven => true, + _ => false, + } + } + /// Human-readable name for user-facing messages. pub fn display_name(&self) -> &'static str { match self { @@ -258,6 +282,21 @@ mod tests { assert_eq!(Ecosystem::Cargo.display_name(), "cargo"); } + #[test] + fn test_supports_release_variants() { + // Multi-artifact ecosystems. + assert!(Ecosystem::Pypi.supports_release_variants()); + assert!(Ecosystem::Gem.supports_release_variants()); + #[cfg(feature = "maven")] + assert!(Ecosystem::Maven.supports_release_variants()); + // Single-artifact ecosystems. + assert!(!Ecosystem::Npm.supports_release_variants()); + #[cfg(feature = "cargo")] + assert!(!Ecosystem::Cargo.supports_release_variants()); + #[cfg(feature = "nuget")] + assert!(!Ecosystem::Nuget.supports_release_variants()); + } + #[test] fn test_from_purl_gem() { assert_eq!( diff --git a/crates/socket-patch-core/src/patch/apply.rs b/crates/socket-patch-core/src/patch/apply.rs index 5249a834..4d3ec07c 100644 --- a/crates/socket-patch-core/src/patch/apply.rs +++ b/crates/socket-patch-core/src/patch/apply.rs @@ -225,36 +225,45 @@ pub async fn verify_file_patch( /// match. /// /// `variants` maps a variant key (typically a qualified PURL) to that -/// variant's patched files. Returns the index of the first variant whose -/// first patched file is in a [`VerifyStatus::Ready`] or +/// variant's patched files. Returns the indices of **every** variant +/// whose first patched file is in a [`VerifyStatus::Ready`] or /// [`VerifyStatus::AlreadyPatched`] state — i.e. its `beforeHash` (or -/// `afterHash`, if already applied) matches the installed bytes — or -/// `None` when no variant matches the installed distribution. +/// `afterHash`, if already applied) matches the installed bytes. /// /// A [`VerifyStatus::NotFound`] (a missing pre-existing file) or /// [`VerifyStatus::HashMismatch`] does **not** count as a match: those -/// signal the variant describes a *different* distribution than the one -/// on disk. A variant with no files (nothing to verify) is treated as a -/// match. Both the narrow download filter (scan/get) and the rollback -/// dedupe share this helper so release selection stays consistent. -pub async fn select_installed_variant( +/// signal the variant describes a distribution that is *not* present on +/// disk. A variant with no files (nothing to verify) is treated as a +/// match. +/// +/// Returning all matches (not just the first) is what lets ecosystems +/// whose variants *coexist* on disk work — e.g. Maven, where several +/// classifier jars (`foo-1.0.jar`, `foo-1.0-linux-x86_64.jar`) live in +/// one version directory and each maps to its own file. For PyPI and +/// RubyGems exactly one distribution is installed per environment, so +/// this naturally yields ≤1 index and their behavior is unchanged. The +/// narrow download filter (scan/get) and the rollback dedupe share this +/// helper so release selection stays consistent with apply. +pub async fn select_installed_variants( pkg_path: &Path, variants: &[(&str, &HashMap)], -) -> Option { +) -> Vec { + let mut matched = Vec::new(); for (idx, (_key, files)) in variants.iter().enumerate() { // No files to verify — nothing to disqualify the variant. let Some((file_name, file_info)) = files.iter().next() else { - return Some(idx); + matched.push(idx); + continue; }; let verify = verify_file_patch(pkg_path, file_name, file_info).await; if matches!( verify.status, VerifyStatus::Ready | VerifyStatus::AlreadyPatched ) { - return Some(idx); + matched.push(idx); } } - None + matched } /// Apply a patch to a single file.