NVIDIA · elezar · May 4, 2026 · May 4, 2026 · May 4, 2026 · Jun 3, 2026
@@ -40,6 +40,14 @@ template resource limits. Docker and Podman apply them as runtime limits.
 Kubernetes mirrors each limit into the matching request. VM accepts the fields
 but currently ignores them.
 
+GPU requests enter the driver layer through
+`SandboxSpec.resource_requirements.gpu`. The compact interim shape supports a
+default GPU request and GPU count. Exact driver-native device selection is
+passed through the selected runtime's `driver_config` block; the gateway
+selects that block but does not interpret the nested driver schema. Drivers
+that support exact selection validate that the unique `gpu_device_ids` entry
+count matches the portable GPU count.
+
 VM runtime state paths are derived only from driver-validated sandbox IDs
 matching `[A-Za-z0-9._-]{1,128}`. The gateway-owned VM driver socket uses a
 private `run/` directory plus Unix peer UID/PID checks. Standalone

@@ -1215,10 +1215,14 @@ enum SandboxCommands {
 
         /// Target a driver-specific GPU device. Docker and Podman use CDI device IDs
         /// (for example "nvidia.com/gpu=0"); VM uses a PCI BDF or index.
-        /// Only valid with --gpu. When omitted with --gpu, the driver uses its default GPU selection.
-        #[arg(long, requires = "gpu")]
+        /// When omitted with --gpu, the driver uses its default GPU selection.
+        #[arg(long, conflicts_with = "gpu_count")]
         gpu_device: Option<String>,
 
+        /// Request a specific number of GPUs. Mutually exclusive with --gpu-device.
+        #[arg(long, value_parser = clap::value_parser!(u32).range(1..), conflicts_with = "gpu_device")]
+        gpu_count: Option<u32>,
+
         /// CPU limit for the sandbox (for example: 500m, 1, 2.5).
         #[arg(long)]
         cpu: Option<String>,
@@ -2539,6 +2543,7 @@ async fn main() -> Result<()> {
                     editor,
                     gpu,
                     gpu_device,
+                    gpu_count,
                     cpu,
                     memory,
                     providers,
@@ -2608,6 +2613,7 @@ async fn main() -> Result<()> {
                         keep,
                         gpu,
                         gpu_device.as_deref(),
+                        gpu_count,
                         cpu.as_deref(),
                         memory.as_deref(),
                         editor,
@@ -4287,6 +4293,78 @@ mod tests {
         }
     }
 
+    #[test]
+    fn sandbox_create_gpu_count_parses_without_gpu_flag() {
+        let cli = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu-count", "2"])
+            .expect("sandbox create --gpu-count should parse");
+
+        match cli.command {
+            Some(Commands::Sandbox {
+                command: Some(SandboxCommands::Create { gpu, gpu_count, .. }),
+                ..
+            }) => {
+                assert!(!gpu);
+                assert_eq!(gpu_count, Some(2));
+            }
+            other => panic!("expected SandboxCommands::Create, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn sandbox_create_gpu_count_rejects_zero() {
+        let result = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu-count", "0"]);
+
+        assert!(
+            result.is_err(),
+            "sandbox create --gpu-count 0 should be rejected"
+        );
+    }
+
+    #[test]
+    fn sandbox_create_gpu_device_parses_without_gpu_flag() {
+        let cli = Cli::try_parse_from([
+            "openshell",
+            "sandbox",
+            "create",
+            "--gpu-device",
+            "nvidia.com/gpu=0",
+        ])
+        .expect("sandbox create --gpu-device should parse without --gpu");
+
+        match cli.command {
+            Some(Commands::Sandbox {
+                command:
+                    Some(SandboxCommands::Create {
+                        gpu, gpu_device, ..
+                    }),
+                ..
+            }) => {
+                assert!(!gpu);
+                assert_eq!(gpu_device.as_deref(), Some("nvidia.com/gpu=0"));
+            }
+            other => panic!("expected SandboxCommands::Create, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn sandbox_create_gpu_count_conflicts_with_gpu_device() {
+        let result = Cli::try_parse_from([
+            "openshell",
+            "sandbox",
+            "create",
+            "--gpu",
+            "--gpu-device",
+            "nvidia.com/gpu=0",
+            "--gpu-count",
+            "2",
+        ]);
+
+        assert!(
+            result.is_err(),
+            "sandbox create should reject --gpu-count with --gpu-device"
+        );
+    }
+
     #[test]
     fn service_expose_accepts_positional_target_port_and_service() {
         let cli = Cli::try_parse_from([

@@ -39,17 +39,18 @@ use openshell_core::proto::{
     GetClusterInferenceRequest, GetDraftHistoryRequest, GetDraftPolicyRequest,
     GetGatewayConfigRequest, GetProviderProfileRequest, GetProviderRefreshStatusRequest,
     GetProviderRequest, GetSandboxConfigRequest, GetSandboxLogsRequest,
-    GetSandboxPolicyStatusRequest, GetSandboxRequest, GetServiceRequest, HealthRequest,
-    ImportProviderProfilesRequest, LintProviderProfilesRequest, ListProviderProfilesRequest,
-    ListProvidersRequest, ListSandboxPoliciesRequest, ListSandboxProvidersRequest,
-    ListSandboxesRequest, ListServicesRequest, PlatformEvent, PolicySource, PolicyStatus, Provider,
-    ProviderCredentialRefreshStatus, ProviderCredentialRefreshStrategy, ProviderProfile,
-    ProviderProfileDiagnostic, ProviderProfileImportItem, RejectDraftChunkRequest,
-    RevokeSshSessionRequest, RotateProviderCredentialRequest, Sandbox, SandboxPhase, SandboxPolicy,
-    SandboxSpec, SandboxTemplate, ServiceEndpointResponse, SetClusterInferenceRequest,
-    SettingScope, SettingValue, TcpForwardFrame, TcpForwardInit, TcpRelayTarget,
-    UpdateConfigRequest, UpdateProviderRequest, WatchSandboxRequest, exec_sandbox_event,
-    setting_value, tcp_forward_init,
+    GetSandboxPolicyStatusRequest, GetSandboxRequest, GetServiceRequest, GpuResourceRequirement,
+    HealthRequest, ImportProviderProfilesRequest, LintProviderProfilesRequest,
+    ListProviderProfilesRequest, ListProvidersRequest, ListSandboxPoliciesRequest,
+    ListSandboxProvidersRequest, ListSandboxesRequest, ListServicesRequest, PlatformEvent,
+    PolicySource, PolicyStatus, Provider, ProviderCredentialRefreshStatus,
+    ProviderCredentialRefreshStrategy, ProviderProfile, ProviderProfileDiagnostic,
+    ProviderProfileImportItem, RejectDraftChunkRequest, RevokeSshSessionRequest,
+    RotateProviderCredentialRequest, Sandbox, SandboxPhase, SandboxPolicy,
+    SandboxResourceRequirements, SandboxSpec, SandboxTemplate, ServiceEndpointResponse,
+    SetClusterInferenceRequest, SettingScope, SettingValue, TcpForwardFrame, TcpForwardInit,
+    TcpRelayTarget, UpdateConfigRequest, UpdateProviderRequest, WatchSandboxRequest,
+    exec_sandbox_event, setting_value, tcp_forward_init,
 };
 use openshell_core::settings::{self, SettingValueKind};
 use openshell_core::{ObjectId, ObjectName};
@@ -1679,6 +1680,7 @@ pub async fn sandbox_create(
     keep: bool,
     gpu: bool,
     gpu_device: Option<&str>,
+    gpu_count: Option<u32>,
     cpu: Option<&str>,
     memory: Option<&str>,
     editor: Option<Editor>,
@@ -1732,7 +1734,10 @@ pub async fn sandbox_create(
         }
         None => None,
     };
-    let requested_gpu = gpu || image.as_deref().is_some_and(image_requests_gpu);
+    let gpu_device_ids = gpu_device_ids_from_cli(gpu_device);
+    let effective_gpu_count = gpu_count_from_cli(gpu_count, &gpu_device_ids);
+    let requested_gpu =
+        gpu || effective_gpu_count.is_some() || image.as_deref().is_some_and(image_requests_gpu);
 
     let providers_v2_enabled = gateway_providers_v2_enabled(&mut client).await?;
     let inferred_types: Vec<String> = if providers_v2_enabled {
@@ -1750,11 +1755,13 @@ pub async fn sandbox_create(
 
     let policy = load_sandbox_policy(policy)?;
     let resource_limits = build_sandbox_resource_limits(cpu, memory)?;
+    let driver_config = gpu_driver_config_from_cli(&gpu_device_ids);
 
-    let template = if image.is_some() || resource_limits.is_some() {
+    let template = if image.is_some() || resource_limits.is_some() || driver_config.is_some() {
         Some(SandboxTemplate {
             image: image.unwrap_or_default(),
             resources: resource_limits,
+            driver_config,
             ..SandboxTemplate::default()
         })
     } else {
@@ -1763,8 +1770,10 @@ pub async fn sandbox_create(
 
     let request = CreateSandboxRequest {
         spec: Some(SandboxSpec {
-            gpu: requested_gpu,
-            gpu_device: gpu_device.unwrap_or_default().to_string(),
+            resource_requirements: resource_requirements_from_cli(
+                requested_gpu,
+                effective_gpu_count,
+            ),
             policy,
             providers: configured_providers,
             template,
@@ -2189,6 +2198,74 @@ pub async fn sandbox_create(
     }
 }
 
+fn resource_requirements_from_cli(
+    requested_gpu: bool,
+    gpu_count: Option<u32>,
+) -> Option<SandboxResourceRequirements> {
+    requested_gpu.then_some(SandboxResourceRequirements {
+        gpu: Some(GpuResourceRequirement { count: gpu_count }),
+    })
+}
+
+fn gpu_device_ids_from_cli(gpu_device: Option<&str>) -> Vec<String> {
+    gpu_device
+        .map(str::trim)
+        .filter(|device_id| !device_id.is_empty())
+        .map(|device_id| vec![device_id.to_string()])
+        .unwrap_or_default()
+}
+
+fn gpu_count_from_cli(gpu_count: Option<u32>, gpu_device_ids: &[String]) -> Option<u32> {
+    if gpu_device_ids.is_empty() {
+        gpu_count
+    } else {
+        u32::try_from(gpu_device_ids.len()).ok()
+    }
+}
+
+fn gpu_driver_config_from_cli(gpu_device_ids: &[String]) -> Option<prost_types::Struct> {
+    use prost_types::{ListValue, Struct, Value, value::Kind};
+
+    fn string_value(value: &str) -> Value {
+        Value {
+            kind: Some(Kind::StringValue(value.to_string())),
+        }
+    }
+
+    fn driver_block(gpu_device_ids: &[String]) -> Value {
+        Value {
+            kind: Some(Kind::StructValue(Struct {
+                fields: std::iter::once((
+                    "gpu_device_ids".to_string(),
+                    Value {
+                        kind: Some(Kind::ListValue(ListValue {
+                            values: gpu_device_ids
+                                .iter()
+                                .map(|device_id| string_value(device_id))
+                                .collect(),
+                        })),
+                    },
+                ))
+                .collect(),
+            })),
+        }
+    }
+
+    if gpu_device_ids.is_empty() {
+        return None;
+    }
+
+    Some(Struct {
+        fields: [
+            ("docker".to_string(), driver_block(gpu_device_ids)),
+            ("podman".to_string(), driver_block(gpu_device_ids)),
+            ("vm".to_string(), driver_block(gpu_device_ids)),
+        ]
+        .into_iter()
+        .collect(),
+    })
+}
+
 /// Resolved source for the `--from` flag on `sandbox create`.
 #[derive(Debug)]
 enum ResolvedSource {
@@ -7438,14 +7515,15 @@ mod tests {
         dockerfile_sources_supported_for_gateway, format_endpoint, format_gateway_select_header,
         format_gateway_select_items, format_provider_attachment_table, gateway_add,
         gateway_auth_label, gateway_env_override_warning, gateway_select_with, gateway_type_label,
-        git_sync_files, http_health_check, image_requests_gpu, import_local_package_mtls_bundle,
+        git_sync_files, gpu_count_from_cli, gpu_device_ids_from_cli, gpu_driver_config_from_cli,
+        http_health_check, image_requests_gpu, import_local_package_mtls_bundle,
         inferred_provider_type, package_managed_tls_dirs, parse_cli_setting_value,
         parse_credential_expiry_cli_value, parse_credential_expiry_pairs, parse_credential_pairs,
         plaintext_gateway_is_remote, progress_step_from_metadata,
         provider_profile_allows_refresh_bootstrap, provisioning_timeout_message,
         ready_false_condition_message, refresh_status_header, refresh_status_row, resolve_from,
-        sandbox_should_persist, sandbox_upload_plan, service_expose_status_error,
-        service_url_for_gateway,
+        resource_requirements_from_cli, sandbox_should_persist, sandbox_upload_plan,
+        service_expose_status_error, service_url_for_gateway,
     };
     use crate::TEST_ENV_LOCK;
     use hyper::StatusCode;
@@ -7924,6 +8002,67 @@ mod tests {
         }
     }
 
+    #[test]
+    fn gpu_device_ids_from_cli_trims_gpu_device() {
+        assert_eq!(
+            gpu_device_ids_from_cli(Some(" nvidia.com/gpu=0 ")),
+            vec!["nvidia.com/gpu=0".to_string()]
+        );
+    }
+
+    #[test]
+    fn gpu_device_ids_from_cli_omits_empty_device() {
+        assert!(gpu_device_ids_from_cli(Some(" ")).is_empty());
+        assert!(gpu_device_ids_from_cli(None).is_empty());
+    }
+
+    #[test]
+    fn gpu_count_from_cli_uses_gpu_device_id_count() {
+        let device_ids = gpu_device_ids_from_cli(Some("nvidia.com/gpu=0"));
+
+        assert_eq!(gpu_count_from_cli(None, &device_ids), Some(1));
+        assert_eq!(gpu_count_from_cli(Some(2), &device_ids), Some(1));
+    }
+
+    #[test]
+    fn resource_requirements_from_cli_uses_presence_for_default_gpu() {
+        let requirements = resource_requirements_from_cli(true, None)
+            .expect("resource requirements should be present");
+        let gpu = requirements.gpu.expect("GPU requirement should be present");
+
+        assert_eq!(gpu.count, None);
+    }
+
+    #[test]
+    fn gpu_driver_config_from_cli_maps_gpu_device_to_driver_blocks() {
+        let device_ids = gpu_device_ids_from_cli(Some("nvidia.com/gpu=0"));
+        let config =
+            gpu_driver_config_from_cli(&device_ids).expect("driver config should be present");
+
+        assert!(config.fields.contains_key("docker"));
+        assert!(config.fields.contains_key("podman"));
+        assert!(config.fields.contains_key("vm"));
+    }
+
+    #[test]
+    fn resource_requirements_from_cli_maps_gpu_count() {
+        let requirements =
+            resource_requirements_from_cli(true, Some(2)).expect("requirements should exist");
+        let gpu = requirements.gpu.expect("GPU requirement should be present");
+
+        assert_eq!(gpu.count, Some(2));
+    }
+
+    #[test]
+    fn gpu_driver_config_from_cli_omits_empty_device() {
+        assert!(gpu_driver_config_from_cli(&[]).is_none());
+    }
+
+    #[test]
+    fn resource_requirements_from_cli_omits_gpu_request_when_not_requested() {
+        assert!(resource_requirements_from_cli(false, None).is_none());
+    }
+
     #[test]
     fn resolve_from_classifies_existing_dockerfile_path() {
         let temp = tempfile::tempdir().expect("failed to create tempdir");