Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions mkdocs/docs/reference/dstack.yml/dev-environment.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,15 @@ The `dev-environment` configuration type allows running [dev environments](../..
* `~/.bashrc`, same as `~/.bashrc:~/.bashrc`
* `/opt/myorg`, same as `/opt/myorg/` and `/opt/myorg:/opt/myorg`
* `libs/patched_libibverbs.so.1:/lib/x86_64-linux-gnu/libibverbs.so.1`

### `backend_options`

Backend-specific options that only take effect for offers of the respective backend.

#### `backend_options[n][type=vastai]` { #backend_options-vastai data-toc-label="vastai" }

#SCHEMA# dstack._internal.core.backends.vastai.profile_options.VastAIProfileOptions
overrides:
show_root_heading: false
type:
required: true
12 changes: 12 additions & 0 deletions mkdocs/docs/reference/dstack.yml/fleet.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,18 @@ The `fleet` configuration type allows creating and updating fleets.
overrides:
show_root_heading: false

### `backend_options`

Backend-specific options that only take effect for offers of the respective backend.

#### `backend_options[n][type=vastai]` { #backend_options-vastai data-toc-label="vastai" }

#SCHEMA# dstack._internal.core.backends.vastai.profile_options.VastAIProfileOptions
overrides:
show_root_heading: false
type:
required: true

=== "SSH fleet"

## Root reference
Expand Down
12 changes: 12 additions & 0 deletions mkdocs/docs/reference/dstack.yml/service.md
Original file line number Diff line number Diff line change
Expand Up @@ -229,3 +229,15 @@ The `service` configuration type allows running [services](../../concepts/servic
* `~/.bashrc`, same as `~/.bashrc:~/.bashrc`
* `/opt/myorg`, same as `/opt/myorg/` and `/opt/myorg:/opt/myorg`
* `libs/patched_libibverbs.so.1:/lib/x86_64-linux-gnu/libibverbs.so.1`

### `backend_options`

Backend-specific options that only take effect for offers of the respective backend.

#### `backend_options[n][type=vastai]` { #backend_options-vastai data-toc-label="vastai" }

#SCHEMA# dstack._internal.core.backends.vastai.profile_options.VastAIProfileOptions
overrides:
show_root_heading: false
type:
required: true
12 changes: 12 additions & 0 deletions mkdocs/docs/reference/dstack.yml/task.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,15 @@ The `task` configuration type allows running [tasks](../../concepts/tasks.md).
* `~/.bashrc`, same as `~/.bashrc:~/.bashrc`
* `/opt/myorg`, same as `/opt/myorg/` and `/opt/myorg:/opt/myorg`
* `libs/patched_libibverbs.so.1:/lib/x86_64-linux-gnu/libibverbs.so.1`

### `backend_options`

Backend-specific options that only take effect for offers of the respective backend.

#### `backend_options[n][type=vastai]` { #backend_options-vastai data-toc-label="vastai" }

#SCHEMA# dstack._internal.core.backends.vastai.profile_options.VastAIProfileOptions
overrides:
show_root_heading: false
type:
required: true
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies = [
"python-multipart>=0.0.16",
"filelock",
"psutil",
"gpuhunt==0.1.22",
"gpuhunt==0.1.23",
"argcomplete>=3.5.0",
"ignore-python>=0.2.0",
"orjson",
Expand Down
23 changes: 23 additions & 0 deletions src/dstack/_internal/core/backends/base/profile_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from abc import ABC, abstractmethod
from typing import Generic, Optional, Sequence, Type, TypeVar

from dstack._internal.core.models.common import CoreModel

T = TypeVar("T", bound="BackendProfileOptions")


class BackendProfileOptions(CoreModel, ABC, Generic[T]):
@abstractmethod
def combine(self, other: T) -> T: ...


_OptionsT = TypeVar("_OptionsT", bound="BackendProfileOptions")


def get_backend_profile_options(
options: Optional[Sequence[BackendProfileOptions]],
options_type: Type[_OptionsT],
) -> Optional[_OptionsT]:
if not options:
return None
return next((opt for opt in options if isinstance(opt, options_type)), None)
5 changes: 5 additions & 0 deletions src/dstack/_internal/core/backends/profile_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from dstack._internal.core.backends.vastai.profile_options import VastAIProfileOptions

# TODO: when adding options for the first VM-based backend,
# implement the logic to check idle instances against backend options before reusing.
AnyBackendProfileOptions = VastAIProfileOptions
58 changes: 46 additions & 12 deletions src/dstack/_internal/core/backends/vastai/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import gpuhunt
from gpuhunt.providers.vastai import VastAIProvider
from typing_extensions import assert_never

from dstack._internal.core.backends.base.backend import Compute
from dstack._internal.core.backends.base.compute import (
Expand All @@ -10,8 +11,15 @@
get_docker_commands,
)
from dstack._internal.core.backends.base.offers import get_catalog_offers
from dstack._internal.core.backends.base.profile_options import get_backend_profile_options
from dstack._internal.core.backends.vastai.api_client import VastAIAPIClient
from dstack._internal.core.backends.vastai.models import VastAIConfig
from dstack._internal.core.backends.vastai.profile_options import (
VASTAI_DEFAULT_MIN_RELIABILITY,
VASTAI_DEFAULT_OFFER_ORDER,
VastAIOfferOrder,
VastAIProfileOptions,
)
from dstack._internal.core.consts import DSTACK_RUNNER_SSH_PORT
from dstack._internal.core.errors import ProvisioningError
from dstack._internal.core.models.backends.base import BackendType
Expand Down Expand Up @@ -40,31 +48,55 @@ def __init__(self, config: VastAIConfig):
super().__init__()
self.config = config
self.api_client = VastAIAPIClient(config.creds.api_key)
self.catalog = gpuhunt.Catalog(balance_resources=False, auto_reload=False)
self.catalog.add_provider(

def _make_catalog(self, options: VastAIProfileOptions) -> gpuhunt.Catalog:
filters = {
"direct_port_count": {"gte": 1},
"reliability2": {
"gte": options.min_reliability
if options.min_reliability is not None
else VASTAI_DEFAULT_MIN_RELIABILITY
},
"inet_down": {"gt": 128},
"verified": {"eq": True},
"cuda_max_good": {"gte": 12.8},
"compute_cap": {"gte": 600},
}
if options.min_score is not None:
filters["score"] = {"gte": options.min_score}
match options.offer_order or VASTAI_DEFAULT_OFFER_ORDER:
case VastAIOfferOrder.SCORE:
order = [("score", "desc")]
case VastAIOfferOrder.PRICE:
# NOTE: dph_base is only one of the price components,
# so we also sort by InstanceOffer.price later for accurate results.
order = [("dph_base", "asc")]
case other:
assert_never(other)
catalog = gpuhunt.Catalog(balance_resources=False, auto_reload=False)
catalog.add_provider(
VastAIProvider(
community_cloud=config.allow_community_cloud,
extra_filters={
"direct_port_count": {"gte": 1},
"reliability2": {"gte": 0.9},
"inet_down": {"gt": 128},
"verified": {"eq": True},
"cuda_max_good": {"gte": 12.8},
"compute_cap": {"gte": 600},
},
community_cloud=self.config.allow_community_cloud,
extra_filters=filters,
order=order,
)
)
return catalog

def get_offers_by_requirements(
self, requirements: Requirements
) -> List[InstanceOfferWithAvailability]:
vastai_options = (
get_backend_profile_options(requirements.backend_options, VastAIProfileOptions)
or VastAIProfileOptions()
)
offers = get_catalog_offers(
backend=BackendType.VASTAI,
locations=self.config.regions or None,
requirements=requirements,
# TODO(egor-s): spots currently not supported
extra_filter=lambda offer: not offer.instance.resources.spot,
catalog=self.catalog,
catalog=self._make_catalog(vastai_options),
)
offers = [
offer.with_availability(
Expand All @@ -73,6 +105,8 @@ def get_offers_by_requirements(
)
for offer in offers
]
if (vastai_options.offer_order or VASTAI_DEFAULT_OFFER_ORDER) == VastAIOfferOrder.PRICE:
offers = sorted(offers, key=lambda o: o.price)
return offers

def run_job(
Expand Down
63 changes: 63 additions & 0 deletions src/dstack/_internal/core/backends/vastai/profile_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from enum import Enum
from typing import Annotated, Literal, Optional

from pydantic import Field

from dstack._internal.core.backends.base.profile_options import BackendProfileOptions
from dstack._internal.utils.combine import get_max_optional, get_single_value_optional


class VastAIOfferOrder(str, Enum):
SCORE = "score"
PRICE = "price"


VASTAI_DEFAULT_OFFER_ORDER = VastAIOfferOrder.SCORE
VASTAI_DEFAULT_MIN_RELIABILITY = 0.9


class VastAIProfileOptions(BackendProfileOptions["VastAIProfileOptions"]):
type: Literal["vastai"] = "vastai"
offer_order: Annotated[
Optional[VastAIOfferOrder],
Field(
description=(
"Controls the order in which offers are considered for provisioning."
" Use `score` to prioritize the highest overall score first"
" (the default order in the Vast.ai console),"
" or `price` to prioritize the lowest-cost offers first."
" Lower-cost offers are often less reliable,"
" so consider applying stricter filters when using `price`."
f" Defaults to `{VASTAI_DEFAULT_OFFER_ORDER.value}`"
)
),
] = None
min_reliability: Annotated[
Optional[float],
Field(
description=(
"The minimum reliability threshold for offers, on a scale from `0` to `1`."
f" Defaults to `{VASTAI_DEFAULT_MIN_RELIABILITY}`"
),
ge=0,
le=1,
),
] = None
min_score: Annotated[
Optional[int],
Field(
description=(
"The minimum overall score required for offers to be considered."
" The scoring scale varies and may require experimentation."
" Starting with a value in the low hundreds is generally recommended"
),
ge=0,
),
] = None

def combine(self, other: "VastAIProfileOptions") -> "VastAIProfileOptions":
return VastAIProfileOptions(
offer_order=get_single_value_optional(self.offer_order, other.offer_order),
min_reliability=get_max_optional(self.min_reliability, other.min_reliability),
min_score=get_max_optional(self.min_score, other.min_score),
)
13 changes: 12 additions & 1 deletion src/dstack/_internal/core/compatibility/common.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
from dstack._internal.core.models.common import EntityReference
from typing import Optional

from dstack._internal.core.models.common import EntityReference, IncludeExcludeSetType
from dstack._internal.core.models.profiles import ProfileParams


def get_profile_excludes(profile: Optional[ProfileParams]) -> IncludeExcludeSetType:
excludes: IncludeExcludeSetType = set()
if profile is None:
return excludes
if profile.backend_options is None:
excludes.add("backend_options")
return excludes


def patch_profile_params(params: ProfileParams) -> None:
# If there are no project-prefixed fleets, replace all EntityReference with str
# for compatibility with pre-0.20.14 servers that don't support EntityReference.
Expand Down
14 changes: 8 additions & 6 deletions src/dstack/_internal/core/compatibility/fleets.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
from typing import Optional

from dstack._internal.core.compatibility.common import patch_profile_params
from dstack._internal.core.models.common import (
IncludeExcludeDictType,
IncludeExcludeSetType,
)
from dstack._internal.core.compatibility.common import get_profile_excludes, patch_profile_params
from dstack._internal.core.models.common import IncludeExcludeDictType
from dstack._internal.core.models.fleets import ApplyFleetPlanInput, FleetSpec


Expand All @@ -24,6 +21,9 @@ def get_apply_plan_excludes(plan_input: ApplyFleetPlanInput) -> IncludeExcludeDi
current_resource = plan_input.current_resource
if current_resource is not None:
current_resource_excludes = {}
current_resource_spec_excludes = get_fleet_spec_excludes(current_resource.spec)
if current_resource_spec_excludes:
current_resource_excludes["spec"] = current_resource_spec_excludes
apply_plan_excludes["current_resource"] = current_resource_excludes
return {"plan": apply_plan_excludes}

Expand All @@ -44,9 +44,11 @@ def get_fleet_spec_excludes(fleet_spec: FleetSpec) -> Optional[IncludeExcludeDic
"""
spec_excludes: IncludeExcludeDictType = {}
configuration_excludes: IncludeExcludeDictType = {}
profile_excludes: IncludeExcludeSetType = set()
profile_excludes = get_profile_excludes(fleet_spec.profile)

spec_excludes["autocreated"] = True
if fleet_spec.configuration.backend_options is None:
configuration_excludes["backend_options"] = True

if configuration_excludes:
spec_excludes["configuration"] = configuration_excludes
Expand Down
10 changes: 8 additions & 2 deletions src/dstack/_internal/core/compatibility/runs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Optional

from dstack._internal.core.compatibility.common import patch_profile_params
from dstack._internal.core.compatibility.common import get_profile_excludes, patch_profile_params
from dstack._internal.core.models.common import (
EntityReference,
IncludeExcludeDictType,
Expand Down Expand Up @@ -82,7 +82,10 @@ def get_run_spec_excludes(run_spec: RunSpec) -> IncludeExcludeDictType:
"""
spec_excludes: IncludeExcludeDictType = {}
configuration_excludes: IncludeExcludeDictType = {}
profile_excludes: IncludeExcludeSetType = set()
profile_excludes = get_profile_excludes(run_spec.profile)

if run_spec.configuration.backend_options is None:
configuration_excludes["backend_options"] = True

if isinstance(run_spec.configuration, ServiceConfiguration):
if run_spec.configuration.probes:
Expand Down Expand Up @@ -148,6 +151,9 @@ def get_job_spec_excludes(job_specs: list[JobSpec]) -> IncludeExcludeDictType:
if all(all(p.until_ready == DEFAULT_PROBE_UNTIL_READY for p in s.probes) for s in job_specs):
probe_excludes["until_ready"] = True

if all(s.requirements.backend_options is None for s in job_specs):
spec_excludes["requirements"] = {"backend_options": True}

return spec_excludes


Expand Down
10 changes: 9 additions & 1 deletion src/dstack/_internal/core/models/fleets.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pydantic import Field, root_validator, validator
from typing_extensions import Annotated, Literal

from dstack._internal.core.backends.profile_options import AnyBackendProfileOptions
from dstack._internal.core.models.backends.base import BackendType
from dstack._internal.core.models.common import (
ApplyAction,
Expand All @@ -22,6 +23,7 @@
ProfileRetry,
SpotPolicy,
parse_idle_duration,
validate_backend_options,
)
from dstack._internal.core.models.resources import ResourcesSpec
from dstack._internal.utils.common import list_enum_values_for_annotation
Expand Down Expand Up @@ -303,6 +305,10 @@ class BackendFleetConfiguraionProps(CoreModel):
)
),
] = None
backend_options: Annotated[
Optional[List[AnyBackendProfileOptions]],
Field(description="Backend-specific options, applied only to offers from that backend"),
] = None

@validator("nodes", pre=True)
def parse_nodes(cls, v: Optional[Union[dict, str]]) -> Optional[dict]:
Expand All @@ -317,8 +323,10 @@ def parse_nodes(cls, v: Optional[Union[dict, str]]) -> Optional[dict]:
_validate_idle_duration = validator("idle_duration", pre=True, allow_reuse=True)(
parse_idle_duration
)

_validate_tags = validator("tags", pre=True, allow_reuse=True)(tags_validator)
_validate_backend_options = validator("backend_options", allow_reuse=True)(
validate_backend_options
)


class BackendFleetConfigurationPropsConfig(CoreConfig):
Expand Down
Loading
Loading