From a53296d3784291c1f71cd8732bcb407143c031e0 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Fri, 12 Jun 2026 15:02:36 -0700 Subject: [PATCH 1/4] feat(platform): source infra config from the app-config file (PLT-475) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrates the non-networking platform.Config fields off environment variables and into the mounted app-config file (SEI_CONTROLLER_CONFIG -> FileConfig) that #397 established for state-sync syncers. - platform.FileConfig gains grouped infra sections: scheduling, storage, resources, snapshot, resultExport, genesis, images. - platform.Load resolves Config at startup file-wins-else-env: a non-empty file value wins, an absent one falls back to its historical SEI_* env var. An unset SEI_CONTROLLER_CONFIG reproduces the original all-env behavior. This is the transitional half of PLT-475 (env fallback); a follow-up drops the fallback + the env vars once the ConfigMap is verified populated. - platform.ReadFileConfig centralizes the file read/decode; the per-reconcile state-sync reader now shares it (removes the duplicate os.ReadFile/Unmarshal, preserves the fail-closed-vs-transient distinction). - main.go calls platform.Load instead of the inline os.Getenv block. - Config.Validate names both the file key and the env var per field. Deliberately NOT migrated: networking/gateway config (SEI_GATEWAY_*, SEI_P2P_ENDPOINT_DOMAIN, SEI_NLB_TARGET_TYPE) stays env-sourced, pending its removal from the controller in the GitOps networking move (PLT-451) — avoids migrate-then-delete churn. Infra fields are read once at startup (an infra change needs a restart); the stateSync section keeps its per-reconcile hot-reload. Schema documented in docs/controller-app-config.md; CLAUDE.md convention updated. Co-Authored-By: Claude Opus 4.8 --- CLAUDE.md | 2 +- cmd/main.go | 37 +----- docs/controller-app-config.md | 80 +++++++++++++ internal/controller/node/statesync.go | 17 +-- internal/platform/load.go | 103 ++++++++++++++++ internal/platform/load_test.go | 161 ++++++++++++++++++++++++++ internal/platform/platform.go | 131 +++++++++++++++------ 7 files changed, 447 insertions(+), 84 deletions(-) create mode 100644 docs/controller-app-config.md create mode 100644 internal/platform/load.go create mode 100644 internal/platform/load_test.go diff --git a/CLAUDE.md b/CLAUDE.md index 6095e9e..722674f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -119,6 +119,6 @@ make docker-push IMG= # Push container image - **Condition ownership** — The planner owns all condition management on the owning resource. It sets conditions when creating plans (e.g., `NodeUpdateInProgress=True`) and when observing terminal plans (e.g., `NodeUpdateInProgress=False`). The executor does not set conditions — it only mutates plan/task state and phase transitions. - **Single-patch model** — All status mutations (plan state, conditions, phase, currentImage) accumulate in-memory during a reconcile and are flushed in a single `Status().Patch()` at the end. Tasks mutate owned resources (StatefulSets, Services, PVCs); the executor mutates plan state in-memory; the reconciler flushes once. - **Resource generators** live in `internal/noderesource/` — pure functions that produce StatefulSets, Services, and PVCs from a SeiNode spec. Used by both the controller and plan tasks. -- **Platform config** is fully environment-driven — all fields in `platform.Config` must be set via env vars (no defaults). See `internal/platform/platform.go` for the full list. +- **Platform config** is resolved by `platform.Load` (`internal/platform/load.go`). Infra fields (scheduling, storage, resources, snapshot/genesis/result-export buckets, images) are read from the mounted app-config file (`SEI_CONTROLLER_CONFIG` → `platform.FileConfig`) when present, falling back to their historical env vars — PLT-475, transitional: the env fallback is removed in a follow-up once the ConfigMap is verified populated. Networking/gateway fields (`SEI_GATEWAY_*`, `SEI_P2P_ENDPOINT_DOMAIN`, `SEI_NLB_TARGET_TYPE`) stay env-sourced pending their removal from the controller in PLT-451. The file is read once at startup for infra fields (an infra change needs a restart); the `stateSync` section is re-read per reconcile (it hot-reloads). See `internal/platform/platform.go` for the field list and `docs/controller-app-config.md` for the file schema. - **Genesis resolution** is handled by the sidecar autonomously: embedded sei-config for well-known chains, S3 fallback at `{SEI_GENESIS_BUCKET}/{chainID}/genesis.json` for custom chains. - Config keys in seid's `config.toml` use **hyphens** (e.g., `persistent-peers`, `trust-height`), not underscores. diff --git a/cmd/main.go b/cmd/main.go index 84f8990..2c5c4ad 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -135,40 +135,11 @@ func main() { os.Exit(1) } - platformCfg := platform.Config{ - NodepoolName: os.Getenv("SEI_NODEPOOL_NAME"), - NodepoolArchive: os.Getenv("SEI_NODEPOOL_ARCHIVE"), - TolerationKey: os.Getenv("SEI_TOLERATION_KEY"), - ServiceAccount: os.Getenv("SEI_SERVICE_ACCOUNT"), - StorageClassPerf: os.Getenv("SEI_STORAGE_CLASS_PERF"), - StorageClassDefault: os.Getenv("SEI_STORAGE_CLASS_DEFAULT"), - StorageClassArchive: os.Getenv("SEI_STORAGE_CLASS_ARCHIVE"), - StorageSizeDefault: os.Getenv("SEI_STORAGE_SIZE_DEFAULT"), - StorageSizeArchive: os.Getenv("SEI_STORAGE_SIZE_ARCHIVE"), - ResourceCPUArchive: os.Getenv("SEI_RESOURCE_CPU_ARCHIVE"), - ResourceMemArchive: os.Getenv("SEI_RESOURCE_MEM_ARCHIVE"), - ResourceCPUDefault: os.Getenv("SEI_RESOURCE_CPU_DEFAULT"), - ResourceMemDefault: os.Getenv("SEI_RESOURCE_MEM_DEFAULT"), - SnapshotBucket: os.Getenv("SEI_SNAPSHOT_BUCKET"), - SnapshotRegion: os.Getenv("SEI_SNAPSHOT_REGION"), - ResultExportBucket: os.Getenv("SEI_RESULT_EXPORT_BUCKET"), - ResultExportRegion: os.Getenv("SEI_RESULT_EXPORT_REGION"), - ResultExportPrefix: os.Getenv("SEI_RESULT_EXPORT_PREFIX"), - GenesisBucket: os.Getenv("SEI_GENESIS_BUCKET"), - GenesisRegion: os.Getenv("SEI_GENESIS_REGION"), - GatewayName: os.Getenv("SEI_GATEWAY_NAME"), - GatewayNamespace: os.Getenv("SEI_GATEWAY_NAMESPACE"), - GatewayDomain: os.Getenv("SEI_GATEWAY_DOMAIN"), - GatewayPublicDomain: os.Getenv("SEI_GATEWAY_PUBLIC_DOMAIN"), - KubeRBACProxyImage: os.Getenv("SEI_KUBE_RBAC_PROXY_IMAGE"), - SidecarImage: os.Getenv("SEI_SIDECAR_IMAGE"), - CosmosExporterImage: os.Getenv("SEI_COSMOS_EXPORTER_IMAGE"), - - // The application-config file is opt-in; this may be empty. Points at a - // read-only mounted file (a GitOps-written ConfigMap volume). - ControllerConfigFile: os.Getenv("SEI_CONTROLLER_CONFIG"), + platformCfg, err := platform.Load() + if err != nil { + setupLog.Error(err, "Failed to load platform configuration") + os.Exit(1) } - if err := platformCfg.Validate(); err != nil { setupLog.Error(err, "Invalid platform configuration") os.Exit(1) diff --git a/docs/controller-app-config.md b/docs/controller-app-config.md new file mode 100644 index 0000000..cd2f8d0 --- /dev/null +++ b/docs/controller-app-config.md @@ -0,0 +1,80 @@ +# Controller app-config file + +The controller reads a single read-only application-config file, pointed at by +`SEI_CONTROLLER_CONFIG` and mounted as a directory (a GitOps-written ConfigMap, +typically `sei-controller-config`). It is decoded into `platform.FileConfig` +(`internal/platform/platform.go`). + +Two read paths, by design: + +- **Infra sections** (`scheduling`, `storage`, `resources`, `snapshot`, + `resultExport`, `genesis`, `images`) are resolved **once at startup** by + `platform.Load`. Changing them requires a controller restart. +- **`stateSync`** is re-read **per reconcile** so syncer changes hot-reload + without a restart (the directory mount swaps atomically). + +## Transitional env fallback (PLT-475) + +For each infra field, a non-empty file value wins; an absent one falls back to +its historical `SEI_*` env var. So an unset `SEI_CONTROLLER_CONFIG` reproduces +the original all-env behavior. The fallback is removed in a follow-up once the +ConfigMap is verified populated, after which the file is authoritative. + +Networking/gateway config (`SEI_GATEWAY_*`, `SEI_P2P_ENDPOINT_DOMAIN`, +`SEI_NLB_TARGET_TYPE`) is **not** in the file — it stays env-sourced pending its +removal from the controller in the GitOps networking move (PLT-451). + +## Schema + +```yaml +# State-sync canonical syncers, keyed by chain-id. Bare host:port (no scheme). +# Read per-reconcile; >=2 entries per chain or the node fails closed. +stateSync: + syncers: + pacific-1: + - rpc-1.example.net:26657 + - rpc-2.example.net:26657 + +# --- infra (read once at startup; env-var fallback during PLT-475) --- + +scheduling: + nodepoolName: sei-node # SEI_NODEPOOL_NAME + nodepoolArchive: sei-archive # SEI_NODEPOOL_ARCHIVE + tolerationKey: sei.io/workload # SEI_TOLERATION_KEY + serviceAccount: seid-node # SEI_SERVICE_ACCOUNT + +storage: + classPerf: gp3-10k-750 # SEI_STORAGE_CLASS_PERF + classDefault: gp3 # SEI_STORAGE_CLASS_DEFAULT + classArchive: gp3-archive # SEI_STORAGE_CLASS_ARCHIVE + sizeDefault: 2000Gi # SEI_STORAGE_SIZE_DEFAULT + sizeArchive: 40Ti # SEI_STORAGE_SIZE_ARCHIVE + +resources: + cpuArchive: "48" # SEI_RESOURCE_CPU_ARCHIVE + memArchive: 448Gi # SEI_RESOURCE_MEM_ARCHIVE + cpuDefault: "4" # SEI_RESOURCE_CPU_DEFAULT + memDefault: 32Gi # SEI_RESOURCE_MEM_DEFAULT + +snapshot: + bucket: sei-snapshots # SEI_SNAPSHOT_BUCKET + region: us-east-2 # SEI_SNAPSHOT_REGION + +resultExport: + bucket: sei-shadow-results # SEI_RESULT_EXPORT_BUCKET + region: us-east-2 # SEI_RESULT_EXPORT_REGION + prefix: shadow-results/ # SEI_RESULT_EXPORT_PREFIX + +genesis: + bucket: sei-k8s-genesis # SEI_GENESIS_BUCKET + region: us-east-2 # SEI_GENESIS_REGION + +images: + sidecar: ghcr.io/sei-protocol/seictl@sha256:... # SEI_SIDECAR_IMAGE + kubeRBACProxy: quay.io/brancz/kube-rbac-proxy:v0.19.1 # SEI_KUBE_RBAC_PROXY_IMAGE + cosmosExporter: ghcr.io/sei-protocol/sei-cosmos-exporter@sha256:... # SEI_COSMOS_EXPORTER_IMAGE +``` + +A present-but-unparseable file is a hard startup error — it never silently falls +back to env. Required fields missing from both sources fail `Config.Validate` +with a message naming the file key and the env var. diff --git a/internal/controller/node/statesync.go b/internal/controller/node/statesync.go index b87d3c7..a51cdfe 100644 --- a/internal/controller/node/statesync.go +++ b/internal/controller/node/statesync.go @@ -2,13 +2,11 @@ package node import ( "fmt" - "os" "slices" "strings" apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/yaml" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" "github.com/sei-protocol/sei-k8s-controller/internal/platform" @@ -88,24 +86,11 @@ func (r *SeiNodeReconciler) reconcileStateSyncGate(node *seiv1alpha1.SeiNode) (b // on the directory mount), so re-reading picks up GitOps updates without a pod // restart. Never cache an open handle. func (r *SeiNodeReconciler) canonicalSyncers(chainID string) ([]string, error) { - path := strings.TrimSpace(r.Platform.ControllerConfigFile) - if path == "" { - return nil, nil - } - - raw, err := os.ReadFile(path) + cfg, err := platform.ReadFileConfig(r.Platform.ControllerConfigFile) if err != nil { - if os.IsNotExist(err) { - return nil, nil - } return nil, err } - var cfg platform.FileConfig - if err := yaml.Unmarshal(raw, &cfg); err != nil { - return nil, fmt.Errorf("parsing controller config file %q: %w", path, err) - } - // A YAML list entry may itself carry comma/whitespace-joined endpoints, so // route the joined value through the same splitter the ConfigMap source used. return parseSyncerList(strings.Join(cfg.StateSync.Syncers[chainID], "\n")), nil diff --git a/internal/platform/load.go b/internal/platform/load.go new file mode 100644 index 0000000..c277209 --- /dev/null +++ b/internal/platform/load.go @@ -0,0 +1,103 @@ +package platform + +import ( + "fmt" + "os" + "strings" + + "sigs.k8s.io/yaml" +) + +// envControllerConfig names the env var pointing at the read-only app-config +// file (a GitOps-written ConfigMap mounted as a directory). +const envControllerConfig = "SEI_CONTROLLER_CONFIG" + +// Load resolves the platform Config at startup. For each migrated infra field +// (PLT-475) a non-empty value in the app-config file wins; an absent one falls +// back to its historical env var, so an unset SEI_CONTROLLER_CONFIG yields the +// original all-env behavior. Networking/gateway fields and the config-file path +// itself are env-sourced. +// +// The file is read once here; infra changes therefore require a controller +// restart. The stateSync section is read per-reconcile elsewhere (it hot-reloads). +// Caller is expected to run Config.Validate after Load. +func Load() (Config, error) { + path := strings.TrimSpace(os.Getenv(envControllerConfig)) + file, err := ReadFileConfig(path) + if err != nil { + return Config{}, err + } + + return Config{ + NodepoolName: fileOrEnv(file.Scheduling.NodepoolName, "SEI_NODEPOOL_NAME"), + NodepoolArchive: fileOrEnv(file.Scheduling.NodepoolArchive, "SEI_NODEPOOL_ARCHIVE"), + TolerationKey: fileOrEnv(file.Scheduling.TolerationKey, "SEI_TOLERATION_KEY"), + ServiceAccount: fileOrEnv(file.Scheduling.ServiceAccount, "SEI_SERVICE_ACCOUNT"), + + StorageClassPerf: fileOrEnv(file.Storage.ClassPerf, "SEI_STORAGE_CLASS_PERF"), + StorageClassDefault: fileOrEnv(file.Storage.ClassDefault, "SEI_STORAGE_CLASS_DEFAULT"), + StorageClassArchive: fileOrEnv(file.Storage.ClassArchive, "SEI_STORAGE_CLASS_ARCHIVE"), + StorageSizeDefault: fileOrEnv(file.Storage.SizeDefault, "SEI_STORAGE_SIZE_DEFAULT"), + StorageSizeArchive: fileOrEnv(file.Storage.SizeArchive, "SEI_STORAGE_SIZE_ARCHIVE"), + + ResourceCPUArchive: fileOrEnv(file.Resources.CPUArchive, "SEI_RESOURCE_CPU_ARCHIVE"), + ResourceMemArchive: fileOrEnv(file.Resources.MemArchive, "SEI_RESOURCE_MEM_ARCHIVE"), + ResourceCPUDefault: fileOrEnv(file.Resources.CPUDefault, "SEI_RESOURCE_CPU_DEFAULT"), + ResourceMemDefault: fileOrEnv(file.Resources.MemDefault, "SEI_RESOURCE_MEM_DEFAULT"), + + SnapshotBucket: fileOrEnv(file.Snapshot.Bucket, "SEI_SNAPSHOT_BUCKET"), + SnapshotRegion: fileOrEnv(file.Snapshot.Region, "SEI_SNAPSHOT_REGION"), + + ResultExportBucket: fileOrEnv(file.ResultExport.Bucket, "SEI_RESULT_EXPORT_BUCKET"), + ResultExportRegion: fileOrEnv(file.ResultExport.Region, "SEI_RESULT_EXPORT_REGION"), + ResultExportPrefix: fileOrEnv(file.ResultExport.Prefix, "SEI_RESULT_EXPORT_PREFIX"), + + GenesisBucket: fileOrEnv(file.Genesis.Bucket, "SEI_GENESIS_BUCKET"), + GenesisRegion: fileOrEnv(file.Genesis.Region, "SEI_GENESIS_REGION"), + + SidecarImage: fileOrEnv(file.Images.Sidecar, "SEI_SIDECAR_IMAGE"), + KubeRBACProxyImage: fileOrEnv(file.Images.KubeRBACProxy, "SEI_KUBE_RBAC_PROXY_IMAGE"), + CosmosExporterImage: fileOrEnv(file.Images.CosmosExporter, "SEI_COSMOS_EXPORTER_IMAGE"), + + // Networking/gateway: env-only, pending removal in the GitOps networking + // move (PLT-451). Not migrated to the file to avoid migrate-then-delete. + GatewayName: os.Getenv("SEI_GATEWAY_NAME"), + GatewayNamespace: os.Getenv("SEI_GATEWAY_NAMESPACE"), + GatewayDomain: os.Getenv("SEI_GATEWAY_DOMAIN"), + GatewayPublicDomain: os.Getenv("SEI_GATEWAY_PUBLIC_DOMAIN"), + + ControllerConfigFile: path, + }, nil +} + +// ReadFileConfig reads and decodes the app-config file. An empty path or a +// missing file yields a zero FileConfig (the file is opt-in) — only a present +// file that can't be read or parsed is an error. It is the single read path for +// the config file, shared by Load (startup) and the per-reconcile state-sync +// reader. +func ReadFileConfig(path string) (FileConfig, error) { + if strings.TrimSpace(path) == "" { + return FileConfig{}, nil + } + raw, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return FileConfig{}, nil + } + return FileConfig{}, fmt.Errorf("reading controller config file %q: %w", path, err) + } + var cfg FileConfig + if err := yaml.Unmarshal(raw, &cfg); err != nil { + return FileConfig{}, fmt.Errorf("parsing controller config file %q: %w", path, err) + } + return cfg, nil +} + +// fileOrEnv returns the file value when non-empty, otherwise the named env var +// (the transitional PLT-475 fallback). +func fileOrEnv(fileVal, envVar string) string { + if strings.TrimSpace(fileVal) != "" { + return fileVal + } + return os.Getenv(envVar) +} diff --git a/internal/platform/load_test.go b/internal/platform/load_test.go new file mode 100644 index 0000000..c3f7ea0 --- /dev/null +++ b/internal/platform/load_test.go @@ -0,0 +1,161 @@ +package platform + +import ( + "os" + "path/filepath" + "testing" +) + +// setMigratedEnv sets every migrated infra env var to a recognizable "env-" +// prefixed value so a test can assert which source a resolved field came from. +func setMigratedEnv(t *testing.T) { + t.Helper() + for _, kv := range [][2]string{ + {"SEI_NODEPOOL_NAME", "env-nodepool"}, + {"SEI_NODEPOOL_ARCHIVE", "env-nodepool-archive"}, + {"SEI_TOLERATION_KEY", "env-toleration"}, + {"SEI_SERVICE_ACCOUNT", "env-sa"}, + {"SEI_STORAGE_CLASS_PERF", "env-perf"}, + {"SEI_STORAGE_CLASS_DEFAULT", "env-default"}, + {"SEI_STORAGE_CLASS_ARCHIVE", "env-archive"}, + {"SEI_STORAGE_SIZE_DEFAULT", "env-size-default"}, + {"SEI_STORAGE_SIZE_ARCHIVE", "env-size-archive"}, + {"SEI_RESOURCE_CPU_ARCHIVE", "env-cpu-archive"}, + {"SEI_RESOURCE_MEM_ARCHIVE", "env-mem-archive"}, + {"SEI_RESOURCE_CPU_DEFAULT", "env-cpu-default"}, + {"SEI_RESOURCE_MEM_DEFAULT", "env-mem-default"}, + {"SEI_SNAPSHOT_BUCKET", "env-snap-bucket"}, + {"SEI_SNAPSHOT_REGION", "env-snap-region"}, + {"SEI_RESULT_EXPORT_BUCKET", "env-export-bucket"}, + {"SEI_RESULT_EXPORT_REGION", "env-export-region"}, + {"SEI_RESULT_EXPORT_PREFIX", "env-export-prefix"}, + {"SEI_GENESIS_BUCKET", "env-genesis-bucket"}, + {"SEI_GENESIS_REGION", "env-genesis-region"}, + {"SEI_SIDECAR_IMAGE", "env-sidecar"}, + {"SEI_KUBE_RBAC_PROXY_IMAGE", "env-rbac-proxy"}, + {"SEI_COSMOS_EXPORTER_IMAGE", "env-cosmos-exporter"}, + {"SEI_GATEWAY_NAME", "env-gw-name"}, + {"SEI_GATEWAY_NAMESPACE", "env-gw-ns"}, + {"SEI_GATEWAY_DOMAIN", "env-gw-domain"}, + {"SEI_GATEWAY_PUBLIC_DOMAIN", "env-gw-public"}, + } { + t.Setenv(kv[0], kv[1]) + } +} + +func writeConfig(t *testing.T, body string) string { + t.Helper() + path := filepath.Join(t.TempDir(), "config.yaml") + if err := os.WriteFile(path, []byte(body), 0o600); err != nil { + t.Fatalf("write config: %v", err) + } + return path +} + +// No file configured: every infra field resolves from the environment. +func TestLoad_NoFile_AllEnv(t *testing.T) { + setMigratedEnv(t) + t.Setenv("SEI_CONTROLLER_CONFIG", "") + + cfg, err := Load() + if err != nil { + t.Fatalf("Load: %v", err) + } + if err := cfg.Validate(); err != nil { + t.Fatalf("Validate: %v", err) + } + if cfg.NodepoolName != "env-nodepool" || cfg.SnapshotBucket != "env-snap-bucket" || cfg.SidecarImage != "env-sidecar" { + t.Errorf("expected env-sourced values, got nodepool=%q snapshot=%q sidecar=%q", + cfg.NodepoolName, cfg.SnapshotBucket, cfg.SidecarImage) + } + if cfg.ControllerConfigFile != "" { + t.Errorf("ControllerConfigFile = %q, want empty", cfg.ControllerConfigFile) + } +} + +// A field present in the file wins; a field absent from the file falls back to +// its env var. Networking/gateway fields are always env-sourced. +func TestLoad_FileWinsEnvFallback(t *testing.T) { + setMigratedEnv(t) + path := writeConfig(t, ` +scheduling: + nodepoolName: file-nodepool + serviceAccount: file-sa +storage: + classPerf: file-perf +images: + sidecar: file-sidecar +`) + t.Setenv("SEI_CONTROLLER_CONFIG", path) + + cfg, err := Load() + if err != nil { + t.Fatalf("Load: %v", err) + } + + // File-sourced. + if cfg.NodepoolName != "file-nodepool" { + t.Errorf("NodepoolName = %q, want file-nodepool", cfg.NodepoolName) + } + if cfg.ServiceAccount != "file-sa" { + t.Errorf("ServiceAccount = %q, want file-sa", cfg.ServiceAccount) + } + if cfg.StorageClassPerf != "file-perf" { + t.Errorf("StorageClassPerf = %q, want file-perf", cfg.StorageClassPerf) + } + if cfg.SidecarImage != "file-sidecar" { + t.Errorf("SidecarImage = %q, want file-sidecar", cfg.SidecarImage) + } + + // Env fallback (absent from file). + if cfg.NodepoolArchive != "env-nodepool-archive" { + t.Errorf("NodepoolArchive = %q, want env fallback", cfg.NodepoolArchive) + } + if cfg.TolerationKey != "env-toleration" { + t.Errorf("TolerationKey = %q, want env fallback", cfg.TolerationKey) + } + + // Networking/gateway: always env, never file. + if cfg.GatewayName != "env-gw-name" || cfg.GatewayDomain != "env-gw-domain" { + t.Errorf("gateway fields should be env-sourced, got name=%q domain=%q", cfg.GatewayName, cfg.GatewayDomain) + } + if cfg.ControllerConfigFile != path { + t.Errorf("ControllerConfigFile = %q, want %q", cfg.ControllerConfigFile, path) + } +} + +// A configured-but-missing file is not an error (the file is opt-in); resolution +// falls back to the environment. +func TestLoad_MissingFileFallsBackToEnv(t *testing.T) { + setMigratedEnv(t) + t.Setenv("SEI_CONTROLLER_CONFIG", filepath.Join(t.TempDir(), "absent.yaml")) + + cfg, err := Load() + if err != nil { + t.Fatalf("Load: %v", err) + } + if cfg.NodepoolName != "env-nodepool" { + t.Errorf("NodepoolName = %q, want env fallback", cfg.NodepoolName) + } +} + +// Malformed YAML is a hard error — a present-but-broken file must not silently +// fall back to env (that would mask an operator mistake). +func TestLoad_MalformedFile_Errors(t *testing.T) { + path := writeConfig(t, "scheduling: [not-a-map") + t.Setenv("SEI_CONTROLLER_CONFIG", path) + + if _, err := Load(); err == nil { + t.Fatal("expected error for malformed config file, got nil") + } +} + +func TestReadFileConfig_EmptyPath(t *testing.T) { + cfg, err := ReadFileConfig("") + if err != nil { + t.Fatalf("ReadFileConfig(\"\"): %v", err) + } + if cfg.StateSync.Syncers != nil || cfg.Scheduling.NodepoolName != "" { + t.Errorf("empty path should yield zero FileConfig, got %+v", cfg) + } +} diff --git a/internal/platform/platform.go b/internal/platform/platform.go index b97ac43..ec22241 100644 --- a/internal/platform/platform.go +++ b/internal/platform/platform.go @@ -21,13 +21,12 @@ const ( ) // Config holds infrastructure-level settings that vary per deployment -// environment. Fields are read from environment variables in main.go and are -// required unless documented otherwise — ControllerConfigFile is optional -// (state-sync is opt-in). See platformtest.Config() for test fixtures. -// -// Config is env-sourced infra; FileConfig (below) is the file-sourced -// application config. They are deliberately distinct: ControllerConfigFile is -// the path to the latter, not its contents. +// environment. It is resolved by Load: the infra fields are read from the +// app-config file (FileConfig) when present, falling back to their historical +// env vars (PLT-475, transitional); the networking/gateway fields and +// ControllerConfigFile are env-sourced. Fields are required unless documented +// otherwise — ControllerConfigFile is optional (state-sync is opt-in). See +// platformtest.Config() for test fixtures. type Config struct { NodepoolName string NodepoolArchive string @@ -79,8 +78,24 @@ type Config struct { } // FileConfig is the controller's file-sourced application config (SEI_CONTROLLER_CONFIG). +// +// The infra sections (scheduling, storage, resources, snapshot, resultExport, +// genesis, images) back the migration of platform.Config off environment +// variables (PLT-475). They are resolved once at startup by Load, file-wins +// over the historical env vars. The stateSync section is read per-reconcile (it +// hot-reloads); the infra sections are not (an infra change warrants a restart). +// +// Networking/gateway config is deliberately absent — it stays env-sourced +// pending its removal from the controller in the GitOps networking move (PLT-451). type FileConfig struct { - StateSync StateSyncConfig `json:"stateSync"` + StateSync StateSyncConfig `json:"stateSync"` + Scheduling SchedulingConfig `json:"scheduling"` + Storage StorageConfig `json:"storage"` + Resources ResourcesConfig `json:"resources"` + Snapshot BucketConfig `json:"snapshot"` + ResultExport ResultExportConfig `json:"resultExport"` + Genesis BucketConfig `json:"genesis"` + Images ImagesConfig `json:"images"` } // StateSyncConfig is the state-sync section of the application config. @@ -89,6 +104,51 @@ type StateSyncConfig struct { Syncers map[string][]string `json:"syncers"` } +// SchedulingConfig places node pods onto Karpenter pools and the seid service account. +type SchedulingConfig struct { + NodepoolName string `json:"nodepoolName"` + NodepoolArchive string `json:"nodepoolArchive"` + TolerationKey string `json:"tolerationKey"` + ServiceAccount string `json:"serviceAccount"` +} + +// StorageConfig holds the PVC storage classes and sizes for default and archive nodes. +type StorageConfig struct { + ClassPerf string `json:"classPerf"` + ClassDefault string `json:"classDefault"` + ClassArchive string `json:"classArchive"` + SizeDefault string `json:"sizeDefault"` + SizeArchive string `json:"sizeArchive"` +} + +// ResourcesConfig holds the CPU/memory requests for default and archive nodes. +type ResourcesConfig struct { + CPUArchive string `json:"cpuArchive"` + MemArchive string `json:"memArchive"` + CPUDefault string `json:"cpuDefault"` + MemDefault string `json:"memDefault"` +} + +// BucketConfig is an S3 bucket + region pair (snapshot, genesis). +type BucketConfig struct { + Bucket string `json:"bucket"` + Region string `json:"region"` +} + +// ResultExportConfig is the shadow-replay result-export bucket, region, and key prefix. +type ResultExportConfig struct { + Bucket string `json:"bucket"` + Region string `json:"region"` + Prefix string `json:"prefix"` +} + +// ImagesConfig holds the sidecar container images attached to every SeiNode pod. +type ImagesConfig struct { + Sidecar string `json:"sidecar"` + KubeRBACProxy string `json:"kubeRBACProxy"` + CosmosExporter string `json:"cosmosExporter"` +} + // NodepoolForMode returns the Karpenter NodePool name for the given // sei-config mode string. Archive nodes use a dedicated pool; all // other modes share the default pool. @@ -99,34 +159,37 @@ func (c Config) NodepoolForMode(mode string) string { return c.NodepoolName } -// Validate returns an error if required fields are missing. +// Validate returns an error if a required field is missing from both the +// app-config file and the environment. The source label names the file key and +// the env var for migrated fields (PLT-475) so the error points at either fix; +// networking/gateway fields name only their env var. func (c Config) Validate() error { required := map[string]string{ - "SEI_NODEPOOL_NAME": c.NodepoolName, - "SEI_TOLERATION_KEY": c.TolerationKey, - "SEI_SERVICE_ACCOUNT": c.ServiceAccount, - "SEI_STORAGE_CLASS_PERF": c.StorageClassPerf, - "SEI_STORAGE_CLASS_DEFAULT": c.StorageClassDefault, - "SEI_STORAGE_CLASS_ARCHIVE": c.StorageClassArchive, - "SEI_STORAGE_SIZE_DEFAULT": c.StorageSizeDefault, - "SEI_STORAGE_SIZE_ARCHIVE": c.StorageSizeArchive, - "SEI_NODEPOOL_ARCHIVE": c.NodepoolArchive, - "SEI_RESOURCE_CPU_ARCHIVE": c.ResourceCPUArchive, - "SEI_RESOURCE_MEM_ARCHIVE": c.ResourceMemArchive, - "SEI_RESOURCE_CPU_DEFAULT": c.ResourceCPUDefault, - "SEI_RESOURCE_MEM_DEFAULT": c.ResourceMemDefault, - "SEI_SNAPSHOT_BUCKET": c.SnapshotBucket, - "SEI_SNAPSHOT_REGION": c.SnapshotRegion, - "SEI_RESULT_EXPORT_BUCKET": c.ResultExportBucket, - "SEI_RESULT_EXPORT_REGION": c.ResultExportRegion, - "SEI_RESULT_EXPORT_PREFIX": c.ResultExportPrefix, - "SEI_GENESIS_BUCKET": c.GenesisBucket, - "SEI_GENESIS_REGION": c.GenesisRegion, - "SEI_GATEWAY_NAME": c.GatewayName, - "SEI_GATEWAY_NAMESPACE": c.GatewayNamespace, - "SEI_GATEWAY_DOMAIN": c.GatewayDomain, - "SEI_SIDECAR_IMAGE": c.SidecarImage, - "SEI_KUBE_RBAC_PROXY_IMAGE": c.KubeRBACProxyImage, + "scheduling.nodepoolName (or SEI_NODEPOOL_NAME)": c.NodepoolName, + "scheduling.nodepoolArchive (or SEI_NODEPOOL_ARCHIVE)": c.NodepoolArchive, + "scheduling.tolerationKey (or SEI_TOLERATION_KEY)": c.TolerationKey, + "scheduling.serviceAccount (or SEI_SERVICE_ACCOUNT)": c.ServiceAccount, + "storage.classPerf (or SEI_STORAGE_CLASS_PERF)": c.StorageClassPerf, + "storage.classDefault (or SEI_STORAGE_CLASS_DEFAULT)": c.StorageClassDefault, + "storage.classArchive (or SEI_STORAGE_CLASS_ARCHIVE)": c.StorageClassArchive, + "storage.sizeDefault (or SEI_STORAGE_SIZE_DEFAULT)": c.StorageSizeDefault, + "storage.sizeArchive (or SEI_STORAGE_SIZE_ARCHIVE)": c.StorageSizeArchive, + "resources.cpuArchive (or SEI_RESOURCE_CPU_ARCHIVE)": c.ResourceCPUArchive, + "resources.memArchive (or SEI_RESOURCE_MEM_ARCHIVE)": c.ResourceMemArchive, + "resources.cpuDefault (or SEI_RESOURCE_CPU_DEFAULT)": c.ResourceCPUDefault, + "resources.memDefault (or SEI_RESOURCE_MEM_DEFAULT)": c.ResourceMemDefault, + "snapshot.bucket (or SEI_SNAPSHOT_BUCKET)": c.SnapshotBucket, + "snapshot.region (or SEI_SNAPSHOT_REGION)": c.SnapshotRegion, + "resultExport.bucket (or SEI_RESULT_EXPORT_BUCKET)": c.ResultExportBucket, + "resultExport.region (or SEI_RESULT_EXPORT_REGION)": c.ResultExportRegion, + "resultExport.prefix (or SEI_RESULT_EXPORT_PREFIX)": c.ResultExportPrefix, + "genesis.bucket (or SEI_GENESIS_BUCKET)": c.GenesisBucket, + "genesis.region (or SEI_GENESIS_REGION)": c.GenesisRegion, + "images.sidecar (or SEI_SIDECAR_IMAGE)": c.SidecarImage, + "images.kubeRBACProxy (or SEI_KUBE_RBAC_PROXY_IMAGE)": c.KubeRBACProxyImage, + "SEI_GATEWAY_NAME": c.GatewayName, + "SEI_GATEWAY_NAMESPACE": c.GatewayNamespace, + "SEI_GATEWAY_DOMAIN": c.GatewayDomain, } for name, val := range required { if strings.TrimSpace(val) == "" { From fe0ac25b686bf065b013faf0fece3430ea20667d Mon Sep 17 00:00:00 2001 From: bdchatham Date: Fri, 12 Jun 2026 15:10:36 -0700 Subject: [PATCH 2/4] refactor(platform): address cross-review + trim ticket IDs from comments - Drop PLT ticket IDs from code comments except where they mark temporary scaffolding (the transitional env fallback; networking config pending its PLT-451 removal). - docs/controller-app-config.md: call out that infra-section edits need a pod restart (only stateSync hot-reloads), and note the absent sizePerf. Co-Authored-By: Claude Opus 4.8 --- docs/controller-app-config.md | 6 ++++-- internal/platform/load.go | 11 ++++++----- internal/platform/platform.go | 12 ++++++------ 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/docs/controller-app-config.md b/docs/controller-app-config.md index cd2f8d0..871ef52 100644 --- a/docs/controller-app-config.md +++ b/docs/controller-app-config.md @@ -9,7 +9,9 @@ Two read paths, by design: - **Infra sections** (`scheduling`, `storage`, `resources`, `snapshot`, `resultExport`, `genesis`, `images`) are resolved **once at startup** by - `platform.Load`. Changing them requires a controller restart. + `platform.Load`. Editing them in the live ConfigMap propagates to the mount + but has **no effect until the controller pod restarts** + (`kubectl rollout restart`) — only `stateSync` hot-reloads. - **`stateSync`** is re-read **per reconcile** so syncer changes hot-reload without a restart (the directory mount swaps atomically). @@ -43,7 +45,7 @@ scheduling: tolerationKey: sei.io/workload # SEI_TOLERATION_KEY serviceAccount: seid-node # SEI_SERVICE_ACCOUNT -storage: +storage: # note: no sizePerf — matches the historical env layout classPerf: gp3-10k-750 # SEI_STORAGE_CLASS_PERF classDefault: gp3 # SEI_STORAGE_CLASS_DEFAULT classArchive: gp3-archive # SEI_STORAGE_CLASS_ARCHIVE diff --git a/internal/platform/load.go b/internal/platform/load.go index c277209..62f24e4 100644 --- a/internal/platform/load.go +++ b/internal/platform/load.go @@ -12,10 +12,11 @@ import ( // file (a GitOps-written ConfigMap mounted as a directory). const envControllerConfig = "SEI_CONTROLLER_CONFIG" -// Load resolves the platform Config at startup. For each migrated infra field -// (PLT-475) a non-empty value in the app-config file wins; an absent one falls -// back to its historical env var, so an unset SEI_CONTROLLER_CONFIG yields the -// original all-env behavior. Networking/gateway fields and the config-file path +// Load resolves the platform Config at startup. A non-empty value in the +// app-config file wins; an absent infra field falls back to its historical env +// var, so an unset SEI_CONTROLLER_CONFIG yields the original all-env behavior. +// That env fallback is transitional — removed once the ConfigMap is populated +// everywhere (PLT-475). Networking/gateway fields and the config-file path // itself are env-sourced. // // The file is read once here; infra changes therefore require a controller @@ -94,7 +95,7 @@ func ReadFileConfig(path string) (FileConfig, error) { } // fileOrEnv returns the file value when non-empty, otherwise the named env var -// (the transitional PLT-475 fallback). +// (the transitional fallback). func fileOrEnv(fileVal, envVar string) string { if strings.TrimSpace(fileVal) != "" { return fileVal diff --git a/internal/platform/platform.go b/internal/platform/platform.go index ec22241..4dc9a32 100644 --- a/internal/platform/platform.go +++ b/internal/platform/platform.go @@ -80,10 +80,10 @@ type Config struct { // FileConfig is the controller's file-sourced application config (SEI_CONTROLLER_CONFIG). // // The infra sections (scheduling, storage, resources, snapshot, resultExport, -// genesis, images) back the migration of platform.Config off environment -// variables (PLT-475). They are resolved once at startup by Load, file-wins -// over the historical env vars. The stateSync section is read per-reconcile (it -// hot-reloads); the infra sections are not (an infra change warrants a restart). +// genesis, images) carry the infra config that was historically env-sourced. +// They are resolved once at startup by Load, the file value winning over the +// env fallback. The stateSync section is read per-reconcile (it hot-reloads); +// the infra sections are not (an infra change warrants a restart). // // Networking/gateway config is deliberately absent — it stays env-sourced // pending its removal from the controller in the GitOps networking move (PLT-451). @@ -161,8 +161,8 @@ func (c Config) NodepoolForMode(mode string) string { // Validate returns an error if a required field is missing from both the // app-config file and the environment. The source label names the file key and -// the env var for migrated fields (PLT-475) so the error points at either fix; -// networking/gateway fields name only their env var. +// the env var so the error points at either fix; networking/gateway fields name +// only their env var. func (c Config) Validate() error { required := map[string]string{ "scheduling.nodepoolName (or SEI_NODEPOOL_NAME)": c.NodepoolName, From f819e6a40ada7ab4c61b77cd6dca8f20f8e799dd Mon Sep 17 00:00:00 2001 From: bdchatham Date: Fri, 12 Jun 2026 15:17:30 -0700 Subject: [PATCH 3/4] test(platform): name repeated env-nodepool fixture literal (goconst) Co-Authored-By: Claude Opus 4.8 --- internal/platform/load_test.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/internal/platform/load_test.go b/internal/platform/load_test.go index c3f7ea0..f1e6045 100644 --- a/internal/platform/load_test.go +++ b/internal/platform/load_test.go @@ -6,12 +6,15 @@ import ( "testing" ) +// envNodepool is asserted in multiple fallback cases, so it's named (goconst). +const envNodepool = "env-nodepool" + // setMigratedEnv sets every migrated infra env var to a recognizable "env-" // prefixed value so a test can assert which source a resolved field came from. func setMigratedEnv(t *testing.T) { t.Helper() for _, kv := range [][2]string{ - {"SEI_NODEPOOL_NAME", "env-nodepool"}, + {"SEI_NODEPOOL_NAME", envNodepool}, {"SEI_NODEPOOL_ARCHIVE", "env-nodepool-archive"}, {"SEI_TOLERATION_KEY", "env-toleration"}, {"SEI_SERVICE_ACCOUNT", "env-sa"}, @@ -64,7 +67,7 @@ func TestLoad_NoFile_AllEnv(t *testing.T) { if err := cfg.Validate(); err != nil { t.Fatalf("Validate: %v", err) } - if cfg.NodepoolName != "env-nodepool" || cfg.SnapshotBucket != "env-snap-bucket" || cfg.SidecarImage != "env-sidecar" { + if cfg.NodepoolName != envNodepool || cfg.SnapshotBucket != "env-snap-bucket" || cfg.SidecarImage != "env-sidecar" { t.Errorf("expected env-sourced values, got nodepool=%q snapshot=%q sidecar=%q", cfg.NodepoolName, cfg.SnapshotBucket, cfg.SidecarImage) } @@ -134,7 +137,7 @@ func TestLoad_MissingFileFallsBackToEnv(t *testing.T) { if err != nil { t.Fatalf("Load: %v", err) } - if cfg.NodepoolName != "env-nodepool" { + if cfg.NodepoolName != envNodepool { t.Errorf("NodepoolName = %q, want env fallback", cfg.NodepoolName) } } From 4c4452f93b403a622f908fee27d9ae3d79032795 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Fri, 12 Jun 2026 15:23:27 -0700 Subject: [PATCH 4/4] refactor(platform): centralize SEI_* env var names as constants Standardize the env-var contract: one const block in load.go is the single source of truth for every SEI_* name, referenced by Load, Config.Validate, and the tests instead of scattered string literals. Validate also moves from a map to an ordered slice keyed by those constants, so a missing-field error is deterministic. No behavior change. Co-Authored-By: Claude Opus 4.8 --- internal/platform/load.go | 99 +++++++++++++++++++++++----------- internal/platform/load_test.go | 62 ++++++++++----------- internal/platform/platform.go | 68 +++++++++++++---------- 3 files changed, 139 insertions(+), 90 deletions(-) diff --git a/internal/platform/load.go b/internal/platform/load.go index 62f24e4..06c2ad9 100644 --- a/internal/platform/load.go +++ b/internal/platform/load.go @@ -8,9 +8,48 @@ import ( "sigs.k8s.io/yaml" ) -// envControllerConfig names the env var pointing at the read-only app-config -// file (a GitOps-written ConfigMap mounted as a directory). -const envControllerConfig = "SEI_CONTROLLER_CONFIG" +// Environment-variable names. SEI_CONTROLLER_CONFIG points at the read-only +// app-config file (a GitOps-written ConfigMap mounted as a directory); the rest +// are the historical infra knobs Load falls back to when a field is absent from +// that file. Single source of truth — referenced by Load and Config.Validate. +const ( + envControllerConfig = "SEI_CONTROLLER_CONFIG" + + envNodepoolName = "SEI_NODEPOOL_NAME" + envNodepoolArchive = "SEI_NODEPOOL_ARCHIVE" + envTolerationKey = "SEI_TOLERATION_KEY" + envServiceAccount = "SEI_SERVICE_ACCOUNT" + + envStorageClassPerf = "SEI_STORAGE_CLASS_PERF" + envStorageClassDefault = "SEI_STORAGE_CLASS_DEFAULT" + envStorageClassArchive = "SEI_STORAGE_CLASS_ARCHIVE" + envStorageSizeDefault = "SEI_STORAGE_SIZE_DEFAULT" + envStorageSizeArchive = "SEI_STORAGE_SIZE_ARCHIVE" + + envResourceCPUArchive = "SEI_RESOURCE_CPU_ARCHIVE" + envResourceMemArchive = "SEI_RESOURCE_MEM_ARCHIVE" + envResourceCPUDefault = "SEI_RESOURCE_CPU_DEFAULT" + envResourceMemDefault = "SEI_RESOURCE_MEM_DEFAULT" + + envSnapshotBucket = "SEI_SNAPSHOT_BUCKET" + envSnapshotRegion = "SEI_SNAPSHOT_REGION" + + envResultExportBucket = "SEI_RESULT_EXPORT_BUCKET" + envResultExportRegion = "SEI_RESULT_EXPORT_REGION" + envResultExportPrefix = "SEI_RESULT_EXPORT_PREFIX" + + envGenesisBucket = "SEI_GENESIS_BUCKET" + envGenesisRegion = "SEI_GENESIS_REGION" + + envSidecarImage = "SEI_SIDECAR_IMAGE" + envKubeRBACProxyImage = "SEI_KUBE_RBAC_PROXY_IMAGE" + envCosmosExporterImage = "SEI_COSMOS_EXPORTER_IMAGE" + + envGatewayName = "SEI_GATEWAY_NAME" + envGatewayNamespace = "SEI_GATEWAY_NAMESPACE" + envGatewayDomain = "SEI_GATEWAY_DOMAIN" + envGatewayPublicDomain = "SEI_GATEWAY_PUBLIC_DOMAIN" +) // Load resolves the platform Config at startup. A non-empty value in the // app-config file wins; an absent infra field falls back to its historical env @@ -30,42 +69,42 @@ func Load() (Config, error) { } return Config{ - NodepoolName: fileOrEnv(file.Scheduling.NodepoolName, "SEI_NODEPOOL_NAME"), - NodepoolArchive: fileOrEnv(file.Scheduling.NodepoolArchive, "SEI_NODEPOOL_ARCHIVE"), - TolerationKey: fileOrEnv(file.Scheduling.TolerationKey, "SEI_TOLERATION_KEY"), - ServiceAccount: fileOrEnv(file.Scheduling.ServiceAccount, "SEI_SERVICE_ACCOUNT"), + NodepoolName: fileOrEnv(file.Scheduling.NodepoolName, envNodepoolName), + NodepoolArchive: fileOrEnv(file.Scheduling.NodepoolArchive, envNodepoolArchive), + TolerationKey: fileOrEnv(file.Scheduling.TolerationKey, envTolerationKey), + ServiceAccount: fileOrEnv(file.Scheduling.ServiceAccount, envServiceAccount), - StorageClassPerf: fileOrEnv(file.Storage.ClassPerf, "SEI_STORAGE_CLASS_PERF"), - StorageClassDefault: fileOrEnv(file.Storage.ClassDefault, "SEI_STORAGE_CLASS_DEFAULT"), - StorageClassArchive: fileOrEnv(file.Storage.ClassArchive, "SEI_STORAGE_CLASS_ARCHIVE"), - StorageSizeDefault: fileOrEnv(file.Storage.SizeDefault, "SEI_STORAGE_SIZE_DEFAULT"), - StorageSizeArchive: fileOrEnv(file.Storage.SizeArchive, "SEI_STORAGE_SIZE_ARCHIVE"), + StorageClassPerf: fileOrEnv(file.Storage.ClassPerf, envStorageClassPerf), + StorageClassDefault: fileOrEnv(file.Storage.ClassDefault, envStorageClassDefault), + StorageClassArchive: fileOrEnv(file.Storage.ClassArchive, envStorageClassArchive), + StorageSizeDefault: fileOrEnv(file.Storage.SizeDefault, envStorageSizeDefault), + StorageSizeArchive: fileOrEnv(file.Storage.SizeArchive, envStorageSizeArchive), - ResourceCPUArchive: fileOrEnv(file.Resources.CPUArchive, "SEI_RESOURCE_CPU_ARCHIVE"), - ResourceMemArchive: fileOrEnv(file.Resources.MemArchive, "SEI_RESOURCE_MEM_ARCHIVE"), - ResourceCPUDefault: fileOrEnv(file.Resources.CPUDefault, "SEI_RESOURCE_CPU_DEFAULT"), - ResourceMemDefault: fileOrEnv(file.Resources.MemDefault, "SEI_RESOURCE_MEM_DEFAULT"), + ResourceCPUArchive: fileOrEnv(file.Resources.CPUArchive, envResourceCPUArchive), + ResourceMemArchive: fileOrEnv(file.Resources.MemArchive, envResourceMemArchive), + ResourceCPUDefault: fileOrEnv(file.Resources.CPUDefault, envResourceCPUDefault), + ResourceMemDefault: fileOrEnv(file.Resources.MemDefault, envResourceMemDefault), - SnapshotBucket: fileOrEnv(file.Snapshot.Bucket, "SEI_SNAPSHOT_BUCKET"), - SnapshotRegion: fileOrEnv(file.Snapshot.Region, "SEI_SNAPSHOT_REGION"), + SnapshotBucket: fileOrEnv(file.Snapshot.Bucket, envSnapshotBucket), + SnapshotRegion: fileOrEnv(file.Snapshot.Region, envSnapshotRegion), - ResultExportBucket: fileOrEnv(file.ResultExport.Bucket, "SEI_RESULT_EXPORT_BUCKET"), - ResultExportRegion: fileOrEnv(file.ResultExport.Region, "SEI_RESULT_EXPORT_REGION"), - ResultExportPrefix: fileOrEnv(file.ResultExport.Prefix, "SEI_RESULT_EXPORT_PREFIX"), + ResultExportBucket: fileOrEnv(file.ResultExport.Bucket, envResultExportBucket), + ResultExportRegion: fileOrEnv(file.ResultExport.Region, envResultExportRegion), + ResultExportPrefix: fileOrEnv(file.ResultExport.Prefix, envResultExportPrefix), - GenesisBucket: fileOrEnv(file.Genesis.Bucket, "SEI_GENESIS_BUCKET"), - GenesisRegion: fileOrEnv(file.Genesis.Region, "SEI_GENESIS_REGION"), + GenesisBucket: fileOrEnv(file.Genesis.Bucket, envGenesisBucket), + GenesisRegion: fileOrEnv(file.Genesis.Region, envGenesisRegion), - SidecarImage: fileOrEnv(file.Images.Sidecar, "SEI_SIDECAR_IMAGE"), - KubeRBACProxyImage: fileOrEnv(file.Images.KubeRBACProxy, "SEI_KUBE_RBAC_PROXY_IMAGE"), - CosmosExporterImage: fileOrEnv(file.Images.CosmosExporter, "SEI_COSMOS_EXPORTER_IMAGE"), + SidecarImage: fileOrEnv(file.Images.Sidecar, envSidecarImage), + KubeRBACProxyImage: fileOrEnv(file.Images.KubeRBACProxy, envKubeRBACProxyImage), + CosmosExporterImage: fileOrEnv(file.Images.CosmosExporter, envCosmosExporterImage), // Networking/gateway: env-only, pending removal in the GitOps networking // move (PLT-451). Not migrated to the file to avoid migrate-then-delete. - GatewayName: os.Getenv("SEI_GATEWAY_NAME"), - GatewayNamespace: os.Getenv("SEI_GATEWAY_NAMESPACE"), - GatewayDomain: os.Getenv("SEI_GATEWAY_DOMAIN"), - GatewayPublicDomain: os.Getenv("SEI_GATEWAY_PUBLIC_DOMAIN"), + GatewayName: os.Getenv(envGatewayName), + GatewayNamespace: os.Getenv(envGatewayNamespace), + GatewayDomain: os.Getenv(envGatewayDomain), + GatewayPublicDomain: os.Getenv(envGatewayPublicDomain), ControllerConfigFile: path, }, nil diff --git a/internal/platform/load_test.go b/internal/platform/load_test.go index f1e6045..8b209e8 100644 --- a/internal/platform/load_test.go +++ b/internal/platform/load_test.go @@ -14,33 +14,33 @@ const envNodepool = "env-nodepool" func setMigratedEnv(t *testing.T) { t.Helper() for _, kv := range [][2]string{ - {"SEI_NODEPOOL_NAME", envNodepool}, - {"SEI_NODEPOOL_ARCHIVE", "env-nodepool-archive"}, - {"SEI_TOLERATION_KEY", "env-toleration"}, - {"SEI_SERVICE_ACCOUNT", "env-sa"}, - {"SEI_STORAGE_CLASS_PERF", "env-perf"}, - {"SEI_STORAGE_CLASS_DEFAULT", "env-default"}, - {"SEI_STORAGE_CLASS_ARCHIVE", "env-archive"}, - {"SEI_STORAGE_SIZE_DEFAULT", "env-size-default"}, - {"SEI_STORAGE_SIZE_ARCHIVE", "env-size-archive"}, - {"SEI_RESOURCE_CPU_ARCHIVE", "env-cpu-archive"}, - {"SEI_RESOURCE_MEM_ARCHIVE", "env-mem-archive"}, - {"SEI_RESOURCE_CPU_DEFAULT", "env-cpu-default"}, - {"SEI_RESOURCE_MEM_DEFAULT", "env-mem-default"}, - {"SEI_SNAPSHOT_BUCKET", "env-snap-bucket"}, - {"SEI_SNAPSHOT_REGION", "env-snap-region"}, - {"SEI_RESULT_EXPORT_BUCKET", "env-export-bucket"}, - {"SEI_RESULT_EXPORT_REGION", "env-export-region"}, - {"SEI_RESULT_EXPORT_PREFIX", "env-export-prefix"}, - {"SEI_GENESIS_BUCKET", "env-genesis-bucket"}, - {"SEI_GENESIS_REGION", "env-genesis-region"}, - {"SEI_SIDECAR_IMAGE", "env-sidecar"}, - {"SEI_KUBE_RBAC_PROXY_IMAGE", "env-rbac-proxy"}, - {"SEI_COSMOS_EXPORTER_IMAGE", "env-cosmos-exporter"}, - {"SEI_GATEWAY_NAME", "env-gw-name"}, - {"SEI_GATEWAY_NAMESPACE", "env-gw-ns"}, - {"SEI_GATEWAY_DOMAIN", "env-gw-domain"}, - {"SEI_GATEWAY_PUBLIC_DOMAIN", "env-gw-public"}, + {envNodepoolName, envNodepool}, + {envNodepoolArchive, "env-nodepool-archive"}, + {envTolerationKey, "env-toleration"}, + {envServiceAccount, "env-sa"}, + {envStorageClassPerf, "env-perf"}, + {envStorageClassDefault, "env-default"}, + {envStorageClassArchive, "env-archive"}, + {envStorageSizeDefault, "env-size-default"}, + {envStorageSizeArchive, "env-size-archive"}, + {envResourceCPUArchive, "env-cpu-archive"}, + {envResourceMemArchive, "env-mem-archive"}, + {envResourceCPUDefault, "env-cpu-default"}, + {envResourceMemDefault, "env-mem-default"}, + {envSnapshotBucket, "env-snap-bucket"}, + {envSnapshotRegion, "env-snap-region"}, + {envResultExportBucket, "env-export-bucket"}, + {envResultExportRegion, "env-export-region"}, + {envResultExportPrefix, "env-export-prefix"}, + {envGenesisBucket, "env-genesis-bucket"}, + {envGenesisRegion, "env-genesis-region"}, + {envSidecarImage, "env-sidecar"}, + {envKubeRBACProxyImage, "env-rbac-proxy"}, + {envCosmosExporterImage, "env-cosmos-exporter"}, + {envGatewayName, "env-gw-name"}, + {envGatewayNamespace, "env-gw-ns"}, + {envGatewayDomain, "env-gw-domain"}, + {envGatewayPublicDomain, "env-gw-public"}, } { t.Setenv(kv[0], kv[1]) } @@ -58,7 +58,7 @@ func writeConfig(t *testing.T, body string) string { // No file configured: every infra field resolves from the environment. func TestLoad_NoFile_AllEnv(t *testing.T) { setMigratedEnv(t) - t.Setenv("SEI_CONTROLLER_CONFIG", "") + t.Setenv(envControllerConfig, "") cfg, err := Load() if err != nil { @@ -89,7 +89,7 @@ storage: images: sidecar: file-sidecar `) - t.Setenv("SEI_CONTROLLER_CONFIG", path) + t.Setenv(envControllerConfig, path) cfg, err := Load() if err != nil { @@ -131,7 +131,7 @@ images: // falls back to the environment. func TestLoad_MissingFileFallsBackToEnv(t *testing.T) { setMigratedEnv(t) - t.Setenv("SEI_CONTROLLER_CONFIG", filepath.Join(t.TempDir(), "absent.yaml")) + t.Setenv(envControllerConfig, filepath.Join(t.TempDir(), "absent.yaml")) cfg, err := Load() if err != nil { @@ -146,7 +146,7 @@ func TestLoad_MissingFileFallsBackToEnv(t *testing.T) { // fall back to env (that would mask an operator mistake). func TestLoad_MalformedFile_Errors(t *testing.T) { path := writeConfig(t, "scheduling: [not-a-map") - t.Setenv("SEI_CONTROLLER_CONFIG", path) + t.Setenv(envControllerConfig, path) if _, err := Load(); err == nil { t.Fatal("expected error for malformed config file, got nil") diff --git a/internal/platform/platform.go b/internal/platform/platform.go index 4dc9a32..089383d 100644 --- a/internal/platform/platform.go +++ b/internal/platform/platform.go @@ -164,37 +164,47 @@ func (c Config) NodepoolForMode(mode string) string { // the env var so the error points at either fix; networking/gateway fields name // only their env var. func (c Config) Validate() error { - required := map[string]string{ - "scheduling.nodepoolName (or SEI_NODEPOOL_NAME)": c.NodepoolName, - "scheduling.nodepoolArchive (or SEI_NODEPOOL_ARCHIVE)": c.NodepoolArchive, - "scheduling.tolerationKey (or SEI_TOLERATION_KEY)": c.TolerationKey, - "scheduling.serviceAccount (or SEI_SERVICE_ACCOUNT)": c.ServiceAccount, - "storage.classPerf (or SEI_STORAGE_CLASS_PERF)": c.StorageClassPerf, - "storage.classDefault (or SEI_STORAGE_CLASS_DEFAULT)": c.StorageClassDefault, - "storage.classArchive (or SEI_STORAGE_CLASS_ARCHIVE)": c.StorageClassArchive, - "storage.sizeDefault (or SEI_STORAGE_SIZE_DEFAULT)": c.StorageSizeDefault, - "storage.sizeArchive (or SEI_STORAGE_SIZE_ARCHIVE)": c.StorageSizeArchive, - "resources.cpuArchive (or SEI_RESOURCE_CPU_ARCHIVE)": c.ResourceCPUArchive, - "resources.memArchive (or SEI_RESOURCE_MEM_ARCHIVE)": c.ResourceMemArchive, - "resources.cpuDefault (or SEI_RESOURCE_CPU_DEFAULT)": c.ResourceCPUDefault, - "resources.memDefault (or SEI_RESOURCE_MEM_DEFAULT)": c.ResourceMemDefault, - "snapshot.bucket (or SEI_SNAPSHOT_BUCKET)": c.SnapshotBucket, - "snapshot.region (or SEI_SNAPSHOT_REGION)": c.SnapshotRegion, - "resultExport.bucket (or SEI_RESULT_EXPORT_BUCKET)": c.ResultExportBucket, - "resultExport.region (or SEI_RESULT_EXPORT_REGION)": c.ResultExportRegion, - "resultExport.prefix (or SEI_RESULT_EXPORT_PREFIX)": c.ResultExportPrefix, - "genesis.bucket (or SEI_GENESIS_BUCKET)": c.GenesisBucket, - "genesis.region (or SEI_GENESIS_REGION)": c.GenesisRegion, - "images.sidecar (or SEI_SIDECAR_IMAGE)": c.SidecarImage, - "images.kubeRBACProxy (or SEI_KUBE_RBAC_PROXY_IMAGE)": c.KubeRBACProxyImage, - "SEI_GATEWAY_NAME": c.GatewayName, - "SEI_GATEWAY_NAMESPACE": c.GatewayNamespace, - "SEI_GATEWAY_DOMAIN": c.GatewayDomain, + // fileKey is empty for env-only fields (networking/gateway); they report + // just the env var. Slice order is the report order for the first missing. + required := []struct { + fileKey string + envVar string + val string + }{ + {"scheduling.nodepoolName", envNodepoolName, c.NodepoolName}, + {"scheduling.nodepoolArchive", envNodepoolArchive, c.NodepoolArchive}, + {"scheduling.tolerationKey", envTolerationKey, c.TolerationKey}, + {"scheduling.serviceAccount", envServiceAccount, c.ServiceAccount}, + {"storage.classPerf", envStorageClassPerf, c.StorageClassPerf}, + {"storage.classDefault", envStorageClassDefault, c.StorageClassDefault}, + {"storage.classArchive", envStorageClassArchive, c.StorageClassArchive}, + {"storage.sizeDefault", envStorageSizeDefault, c.StorageSizeDefault}, + {"storage.sizeArchive", envStorageSizeArchive, c.StorageSizeArchive}, + {"resources.cpuArchive", envResourceCPUArchive, c.ResourceCPUArchive}, + {"resources.memArchive", envResourceMemArchive, c.ResourceMemArchive}, + {"resources.cpuDefault", envResourceCPUDefault, c.ResourceCPUDefault}, + {"resources.memDefault", envResourceMemDefault, c.ResourceMemDefault}, + {"snapshot.bucket", envSnapshotBucket, c.SnapshotBucket}, + {"snapshot.region", envSnapshotRegion, c.SnapshotRegion}, + {"resultExport.bucket", envResultExportBucket, c.ResultExportBucket}, + {"resultExport.region", envResultExportRegion, c.ResultExportRegion}, + {"resultExport.prefix", envResultExportPrefix, c.ResultExportPrefix}, + {"genesis.bucket", envGenesisBucket, c.GenesisBucket}, + {"genesis.region", envGenesisRegion, c.GenesisRegion}, + {"images.sidecar", envSidecarImage, c.SidecarImage}, + {"images.kubeRBACProxy", envKubeRBACProxyImage, c.KubeRBACProxyImage}, + {"", envGatewayName, c.GatewayName}, + {"", envGatewayNamespace, c.GatewayNamespace}, + {"", envGatewayDomain, c.GatewayDomain}, } - for name, val := range required { - if strings.TrimSpace(val) == "" { - return fmt.Errorf("%s is required", name) + for _, f := range required { + if strings.TrimSpace(f.val) != "" { + continue } + if f.fileKey == "" { + return fmt.Errorf("%s is required", f.envVar) + } + return fmt.Errorf("%s (or %s) is required", f.fileKey, f.envVar) } return nil }