diff --git a/CLAUDE.md b/CLAUDE.md index 6095e9e..722674f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -119,6 +119,6 @@ make docker-push IMG= # Push container image - **Condition ownership** — The planner owns all condition management on the owning resource. It sets conditions when creating plans (e.g., `NodeUpdateInProgress=True`) and when observing terminal plans (e.g., `NodeUpdateInProgress=False`). The executor does not set conditions — it only mutates plan/task state and phase transitions. - **Single-patch model** — All status mutations (plan state, conditions, phase, currentImage) accumulate in-memory during a reconcile and are flushed in a single `Status().Patch()` at the end. Tasks mutate owned resources (StatefulSets, Services, PVCs); the executor mutates plan state in-memory; the reconciler flushes once. - **Resource generators** live in `internal/noderesource/` — pure functions that produce StatefulSets, Services, and PVCs from a SeiNode spec. Used by both the controller and plan tasks. -- **Platform config** is fully environment-driven — all fields in `platform.Config` must be set via env vars (no defaults). See `internal/platform/platform.go` for the full list. +- **Platform config** is resolved by `platform.Load` (`internal/platform/load.go`). Infra fields (scheduling, storage, resources, snapshot/genesis/result-export buckets, images) are read from the mounted app-config file (`SEI_CONTROLLER_CONFIG` → `platform.FileConfig`) when present, falling back to their historical env vars — PLT-475, transitional: the env fallback is removed in a follow-up once the ConfigMap is verified populated. Networking/gateway fields (`SEI_GATEWAY_*`, `SEI_P2P_ENDPOINT_DOMAIN`, `SEI_NLB_TARGET_TYPE`) stay env-sourced pending their removal from the controller in PLT-451. The file is read once at startup for infra fields (an infra change needs a restart); the `stateSync` section is re-read per reconcile (it hot-reloads). See `internal/platform/platform.go` for the field list and `docs/controller-app-config.md` for the file schema. - **Genesis resolution** is handled by the sidecar autonomously: embedded sei-config for well-known chains, S3 fallback at `{SEI_GENESIS_BUCKET}/{chainID}/genesis.json` for custom chains. - Config keys in seid's `config.toml` use **hyphens** (e.g., `persistent-peers`, `trust-height`), not underscores. diff --git a/cmd/main.go b/cmd/main.go index 84f8990..2c5c4ad 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -135,40 +135,11 @@ func main() { os.Exit(1) } - platformCfg := platform.Config{ - NodepoolName: os.Getenv("SEI_NODEPOOL_NAME"), - NodepoolArchive: os.Getenv("SEI_NODEPOOL_ARCHIVE"), - TolerationKey: os.Getenv("SEI_TOLERATION_KEY"), - ServiceAccount: os.Getenv("SEI_SERVICE_ACCOUNT"), - StorageClassPerf: os.Getenv("SEI_STORAGE_CLASS_PERF"), - StorageClassDefault: os.Getenv("SEI_STORAGE_CLASS_DEFAULT"), - StorageClassArchive: os.Getenv("SEI_STORAGE_CLASS_ARCHIVE"), - StorageSizeDefault: os.Getenv("SEI_STORAGE_SIZE_DEFAULT"), - StorageSizeArchive: os.Getenv("SEI_STORAGE_SIZE_ARCHIVE"), - ResourceCPUArchive: os.Getenv("SEI_RESOURCE_CPU_ARCHIVE"), - ResourceMemArchive: os.Getenv("SEI_RESOURCE_MEM_ARCHIVE"), - ResourceCPUDefault: os.Getenv("SEI_RESOURCE_CPU_DEFAULT"), - ResourceMemDefault: os.Getenv("SEI_RESOURCE_MEM_DEFAULT"), - SnapshotBucket: os.Getenv("SEI_SNAPSHOT_BUCKET"), - SnapshotRegion: os.Getenv("SEI_SNAPSHOT_REGION"), - ResultExportBucket: os.Getenv("SEI_RESULT_EXPORT_BUCKET"), - ResultExportRegion: os.Getenv("SEI_RESULT_EXPORT_REGION"), - ResultExportPrefix: os.Getenv("SEI_RESULT_EXPORT_PREFIX"), - GenesisBucket: os.Getenv("SEI_GENESIS_BUCKET"), - GenesisRegion: os.Getenv("SEI_GENESIS_REGION"), - GatewayName: os.Getenv("SEI_GATEWAY_NAME"), - GatewayNamespace: os.Getenv("SEI_GATEWAY_NAMESPACE"), - GatewayDomain: os.Getenv("SEI_GATEWAY_DOMAIN"), - GatewayPublicDomain: os.Getenv("SEI_GATEWAY_PUBLIC_DOMAIN"), - KubeRBACProxyImage: os.Getenv("SEI_KUBE_RBAC_PROXY_IMAGE"), - SidecarImage: os.Getenv("SEI_SIDECAR_IMAGE"), - CosmosExporterImage: os.Getenv("SEI_COSMOS_EXPORTER_IMAGE"), - - // The application-config file is opt-in; this may be empty. Points at a - // read-only mounted file (a GitOps-written ConfigMap volume). - ControllerConfigFile: os.Getenv("SEI_CONTROLLER_CONFIG"), + platformCfg, err := platform.Load() + if err != nil { + setupLog.Error(err, "Failed to load platform configuration") + os.Exit(1) } - if err := platformCfg.Validate(); err != nil { setupLog.Error(err, "Invalid platform configuration") os.Exit(1) diff --git a/docs/controller-app-config.md b/docs/controller-app-config.md new file mode 100644 index 0000000..871ef52 --- /dev/null +++ b/docs/controller-app-config.md @@ -0,0 +1,82 @@ +# Controller app-config file + +The controller reads a single read-only application-config file, pointed at by +`SEI_CONTROLLER_CONFIG` and mounted as a directory (a GitOps-written ConfigMap, +typically `sei-controller-config`). It is decoded into `platform.FileConfig` +(`internal/platform/platform.go`). + +Two read paths, by design: + +- **Infra sections** (`scheduling`, `storage`, `resources`, `snapshot`, + `resultExport`, `genesis`, `images`) are resolved **once at startup** by + `platform.Load`. Editing them in the live ConfigMap propagates to the mount + but has **no effect until the controller pod restarts** + (`kubectl rollout restart`) — only `stateSync` hot-reloads. +- **`stateSync`** is re-read **per reconcile** so syncer changes hot-reload + without a restart (the directory mount swaps atomically). + +## Transitional env fallback (PLT-475) + +For each infra field, a non-empty file value wins; an absent one falls back to +its historical `SEI_*` env var. So an unset `SEI_CONTROLLER_CONFIG` reproduces +the original all-env behavior. The fallback is removed in a follow-up once the +ConfigMap is verified populated, after which the file is authoritative. + +Networking/gateway config (`SEI_GATEWAY_*`, `SEI_P2P_ENDPOINT_DOMAIN`, +`SEI_NLB_TARGET_TYPE`) is **not** in the file — it stays env-sourced pending its +removal from the controller in the GitOps networking move (PLT-451). + +## Schema + +```yaml +# State-sync canonical syncers, keyed by chain-id. Bare host:port (no scheme). +# Read per-reconcile; >=2 entries per chain or the node fails closed. +stateSync: + syncers: + pacific-1: + - rpc-1.example.net:26657 + - rpc-2.example.net:26657 + +# --- infra (read once at startup; env-var fallback during PLT-475) --- + +scheduling: + nodepoolName: sei-node # SEI_NODEPOOL_NAME + nodepoolArchive: sei-archive # SEI_NODEPOOL_ARCHIVE + tolerationKey: sei.io/workload # SEI_TOLERATION_KEY + serviceAccount: seid-node # SEI_SERVICE_ACCOUNT + +storage: # note: no sizePerf — matches the historical env layout + classPerf: gp3-10k-750 # SEI_STORAGE_CLASS_PERF + classDefault: gp3 # SEI_STORAGE_CLASS_DEFAULT + classArchive: gp3-archive # SEI_STORAGE_CLASS_ARCHIVE + sizeDefault: 2000Gi # SEI_STORAGE_SIZE_DEFAULT + sizeArchive: 40Ti # SEI_STORAGE_SIZE_ARCHIVE + +resources: + cpuArchive: "48" # SEI_RESOURCE_CPU_ARCHIVE + memArchive: 448Gi # SEI_RESOURCE_MEM_ARCHIVE + cpuDefault: "4" # SEI_RESOURCE_CPU_DEFAULT + memDefault: 32Gi # SEI_RESOURCE_MEM_DEFAULT + +snapshot: + bucket: sei-snapshots # SEI_SNAPSHOT_BUCKET + region: us-east-2 # SEI_SNAPSHOT_REGION + +resultExport: + bucket: sei-shadow-results # SEI_RESULT_EXPORT_BUCKET + region: us-east-2 # SEI_RESULT_EXPORT_REGION + prefix: shadow-results/ # SEI_RESULT_EXPORT_PREFIX + +genesis: + bucket: sei-k8s-genesis # SEI_GENESIS_BUCKET + region: us-east-2 # SEI_GENESIS_REGION + +images: + sidecar: ghcr.io/sei-protocol/seictl@sha256:... # SEI_SIDECAR_IMAGE + kubeRBACProxy: quay.io/brancz/kube-rbac-proxy:v0.19.1 # SEI_KUBE_RBAC_PROXY_IMAGE + cosmosExporter: ghcr.io/sei-protocol/sei-cosmos-exporter@sha256:... # SEI_COSMOS_EXPORTER_IMAGE +``` + +A present-but-unparseable file is a hard startup error — it never silently falls +back to env. Required fields missing from both sources fail `Config.Validate` +with a message naming the file key and the env var. diff --git a/internal/controller/node/statesync.go b/internal/controller/node/statesync.go index b87d3c7..a51cdfe 100644 --- a/internal/controller/node/statesync.go +++ b/internal/controller/node/statesync.go @@ -2,13 +2,11 @@ package node import ( "fmt" - "os" "slices" "strings" apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/yaml" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" "github.com/sei-protocol/sei-k8s-controller/internal/platform" @@ -88,24 +86,11 @@ func (r *SeiNodeReconciler) reconcileStateSyncGate(node *seiv1alpha1.SeiNode) (b // on the directory mount), so re-reading picks up GitOps updates without a pod // restart. Never cache an open handle. func (r *SeiNodeReconciler) canonicalSyncers(chainID string) ([]string, error) { - path := strings.TrimSpace(r.Platform.ControllerConfigFile) - if path == "" { - return nil, nil - } - - raw, err := os.ReadFile(path) + cfg, err := platform.ReadFileConfig(r.Platform.ControllerConfigFile) if err != nil { - if os.IsNotExist(err) { - return nil, nil - } return nil, err } - var cfg platform.FileConfig - if err := yaml.Unmarshal(raw, &cfg); err != nil { - return nil, fmt.Errorf("parsing controller config file %q: %w", path, err) - } - // A YAML list entry may itself carry comma/whitespace-joined endpoints, so // route the joined value through the same splitter the ConfigMap source used. return parseSyncerList(strings.Join(cfg.StateSync.Syncers[chainID], "\n")), nil diff --git a/internal/platform/load.go b/internal/platform/load.go new file mode 100644 index 0000000..06c2ad9 --- /dev/null +++ b/internal/platform/load.go @@ -0,0 +1,143 @@ +package platform + +import ( + "fmt" + "os" + "strings" + + "sigs.k8s.io/yaml" +) + +// Environment-variable names. SEI_CONTROLLER_CONFIG points at the read-only +// app-config file (a GitOps-written ConfigMap mounted as a directory); the rest +// are the historical infra knobs Load falls back to when a field is absent from +// that file. Single source of truth — referenced by Load and Config.Validate. +const ( + envControllerConfig = "SEI_CONTROLLER_CONFIG" + + envNodepoolName = "SEI_NODEPOOL_NAME" + envNodepoolArchive = "SEI_NODEPOOL_ARCHIVE" + envTolerationKey = "SEI_TOLERATION_KEY" + envServiceAccount = "SEI_SERVICE_ACCOUNT" + + envStorageClassPerf = "SEI_STORAGE_CLASS_PERF" + envStorageClassDefault = "SEI_STORAGE_CLASS_DEFAULT" + envStorageClassArchive = "SEI_STORAGE_CLASS_ARCHIVE" + envStorageSizeDefault = "SEI_STORAGE_SIZE_DEFAULT" + envStorageSizeArchive = "SEI_STORAGE_SIZE_ARCHIVE" + + envResourceCPUArchive = "SEI_RESOURCE_CPU_ARCHIVE" + envResourceMemArchive = "SEI_RESOURCE_MEM_ARCHIVE" + envResourceCPUDefault = "SEI_RESOURCE_CPU_DEFAULT" + envResourceMemDefault = "SEI_RESOURCE_MEM_DEFAULT" + + envSnapshotBucket = "SEI_SNAPSHOT_BUCKET" + envSnapshotRegion = "SEI_SNAPSHOT_REGION" + + envResultExportBucket = "SEI_RESULT_EXPORT_BUCKET" + envResultExportRegion = "SEI_RESULT_EXPORT_REGION" + envResultExportPrefix = "SEI_RESULT_EXPORT_PREFIX" + + envGenesisBucket = "SEI_GENESIS_BUCKET" + envGenesisRegion = "SEI_GENESIS_REGION" + + envSidecarImage = "SEI_SIDECAR_IMAGE" + envKubeRBACProxyImage = "SEI_KUBE_RBAC_PROXY_IMAGE" + envCosmosExporterImage = "SEI_COSMOS_EXPORTER_IMAGE" + + envGatewayName = "SEI_GATEWAY_NAME" + envGatewayNamespace = "SEI_GATEWAY_NAMESPACE" + envGatewayDomain = "SEI_GATEWAY_DOMAIN" + envGatewayPublicDomain = "SEI_GATEWAY_PUBLIC_DOMAIN" +) + +// Load resolves the platform Config at startup. A non-empty value in the +// app-config file wins; an absent infra field falls back to its historical env +// var, so an unset SEI_CONTROLLER_CONFIG yields the original all-env behavior. +// That env fallback is transitional — removed once the ConfigMap is populated +// everywhere (PLT-475). Networking/gateway fields and the config-file path +// itself are env-sourced. +// +// The file is read once here; infra changes therefore require a controller +// restart. The stateSync section is read per-reconcile elsewhere (it hot-reloads). +// Caller is expected to run Config.Validate after Load. +func Load() (Config, error) { + path := strings.TrimSpace(os.Getenv(envControllerConfig)) + file, err := ReadFileConfig(path) + if err != nil { + return Config{}, err + } + + return Config{ + NodepoolName: fileOrEnv(file.Scheduling.NodepoolName, envNodepoolName), + NodepoolArchive: fileOrEnv(file.Scheduling.NodepoolArchive, envNodepoolArchive), + TolerationKey: fileOrEnv(file.Scheduling.TolerationKey, envTolerationKey), + ServiceAccount: fileOrEnv(file.Scheduling.ServiceAccount, envServiceAccount), + + StorageClassPerf: fileOrEnv(file.Storage.ClassPerf, envStorageClassPerf), + StorageClassDefault: fileOrEnv(file.Storage.ClassDefault, envStorageClassDefault), + StorageClassArchive: fileOrEnv(file.Storage.ClassArchive, envStorageClassArchive), + StorageSizeDefault: fileOrEnv(file.Storage.SizeDefault, envStorageSizeDefault), + StorageSizeArchive: fileOrEnv(file.Storage.SizeArchive, envStorageSizeArchive), + + ResourceCPUArchive: fileOrEnv(file.Resources.CPUArchive, envResourceCPUArchive), + ResourceMemArchive: fileOrEnv(file.Resources.MemArchive, envResourceMemArchive), + ResourceCPUDefault: fileOrEnv(file.Resources.CPUDefault, envResourceCPUDefault), + ResourceMemDefault: fileOrEnv(file.Resources.MemDefault, envResourceMemDefault), + + SnapshotBucket: fileOrEnv(file.Snapshot.Bucket, envSnapshotBucket), + SnapshotRegion: fileOrEnv(file.Snapshot.Region, envSnapshotRegion), + + ResultExportBucket: fileOrEnv(file.ResultExport.Bucket, envResultExportBucket), + ResultExportRegion: fileOrEnv(file.ResultExport.Region, envResultExportRegion), + ResultExportPrefix: fileOrEnv(file.ResultExport.Prefix, envResultExportPrefix), + + GenesisBucket: fileOrEnv(file.Genesis.Bucket, envGenesisBucket), + GenesisRegion: fileOrEnv(file.Genesis.Region, envGenesisRegion), + + SidecarImage: fileOrEnv(file.Images.Sidecar, envSidecarImage), + KubeRBACProxyImage: fileOrEnv(file.Images.KubeRBACProxy, envKubeRBACProxyImage), + CosmosExporterImage: fileOrEnv(file.Images.CosmosExporter, envCosmosExporterImage), + + // Networking/gateway: env-only, pending removal in the GitOps networking + // move (PLT-451). Not migrated to the file to avoid migrate-then-delete. + GatewayName: os.Getenv(envGatewayName), + GatewayNamespace: os.Getenv(envGatewayNamespace), + GatewayDomain: os.Getenv(envGatewayDomain), + GatewayPublicDomain: os.Getenv(envGatewayPublicDomain), + + ControllerConfigFile: path, + }, nil +} + +// ReadFileConfig reads and decodes the app-config file. An empty path or a +// missing file yields a zero FileConfig (the file is opt-in) — only a present +// file that can't be read or parsed is an error. It is the single read path for +// the config file, shared by Load (startup) and the per-reconcile state-sync +// reader. +func ReadFileConfig(path string) (FileConfig, error) { + if strings.TrimSpace(path) == "" { + return FileConfig{}, nil + } + raw, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return FileConfig{}, nil + } + return FileConfig{}, fmt.Errorf("reading controller config file %q: %w", path, err) + } + var cfg FileConfig + if err := yaml.Unmarshal(raw, &cfg); err != nil { + return FileConfig{}, fmt.Errorf("parsing controller config file %q: %w", path, err) + } + return cfg, nil +} + +// fileOrEnv returns the file value when non-empty, otherwise the named env var +// (the transitional fallback). +func fileOrEnv(fileVal, envVar string) string { + if strings.TrimSpace(fileVal) != "" { + return fileVal + } + return os.Getenv(envVar) +} diff --git a/internal/platform/load_test.go b/internal/platform/load_test.go new file mode 100644 index 0000000..8b209e8 --- /dev/null +++ b/internal/platform/load_test.go @@ -0,0 +1,164 @@ +package platform + +import ( + "os" + "path/filepath" + "testing" +) + +// envNodepool is asserted in multiple fallback cases, so it's named (goconst). +const envNodepool = "env-nodepool" + +// setMigratedEnv sets every migrated infra env var to a recognizable "env-" +// prefixed value so a test can assert which source a resolved field came from. +func setMigratedEnv(t *testing.T) { + t.Helper() + for _, kv := range [][2]string{ + {envNodepoolName, envNodepool}, + {envNodepoolArchive, "env-nodepool-archive"}, + {envTolerationKey, "env-toleration"}, + {envServiceAccount, "env-sa"}, + {envStorageClassPerf, "env-perf"}, + {envStorageClassDefault, "env-default"}, + {envStorageClassArchive, "env-archive"}, + {envStorageSizeDefault, "env-size-default"}, + {envStorageSizeArchive, "env-size-archive"}, + {envResourceCPUArchive, "env-cpu-archive"}, + {envResourceMemArchive, "env-mem-archive"}, + {envResourceCPUDefault, "env-cpu-default"}, + {envResourceMemDefault, "env-mem-default"}, + {envSnapshotBucket, "env-snap-bucket"}, + {envSnapshotRegion, "env-snap-region"}, + {envResultExportBucket, "env-export-bucket"}, + {envResultExportRegion, "env-export-region"}, + {envResultExportPrefix, "env-export-prefix"}, + {envGenesisBucket, "env-genesis-bucket"}, + {envGenesisRegion, "env-genesis-region"}, + {envSidecarImage, "env-sidecar"}, + {envKubeRBACProxyImage, "env-rbac-proxy"}, + {envCosmosExporterImage, "env-cosmos-exporter"}, + {envGatewayName, "env-gw-name"}, + {envGatewayNamespace, "env-gw-ns"}, + {envGatewayDomain, "env-gw-domain"}, + {envGatewayPublicDomain, "env-gw-public"}, + } { + t.Setenv(kv[0], kv[1]) + } +} + +func writeConfig(t *testing.T, body string) string { + t.Helper() + path := filepath.Join(t.TempDir(), "config.yaml") + if err := os.WriteFile(path, []byte(body), 0o600); err != nil { + t.Fatalf("write config: %v", err) + } + return path +} + +// No file configured: every infra field resolves from the environment. +func TestLoad_NoFile_AllEnv(t *testing.T) { + setMigratedEnv(t) + t.Setenv(envControllerConfig, "") + + cfg, err := Load() + if err != nil { + t.Fatalf("Load: %v", err) + } + if err := cfg.Validate(); err != nil { + t.Fatalf("Validate: %v", err) + } + if cfg.NodepoolName != envNodepool || cfg.SnapshotBucket != "env-snap-bucket" || cfg.SidecarImage != "env-sidecar" { + t.Errorf("expected env-sourced values, got nodepool=%q snapshot=%q sidecar=%q", + cfg.NodepoolName, cfg.SnapshotBucket, cfg.SidecarImage) + } + if cfg.ControllerConfigFile != "" { + t.Errorf("ControllerConfigFile = %q, want empty", cfg.ControllerConfigFile) + } +} + +// A field present in the file wins; a field absent from the file falls back to +// its env var. Networking/gateway fields are always env-sourced. +func TestLoad_FileWinsEnvFallback(t *testing.T) { + setMigratedEnv(t) + path := writeConfig(t, ` +scheduling: + nodepoolName: file-nodepool + serviceAccount: file-sa +storage: + classPerf: file-perf +images: + sidecar: file-sidecar +`) + t.Setenv(envControllerConfig, path) + + cfg, err := Load() + if err != nil { + t.Fatalf("Load: %v", err) + } + + // File-sourced. + if cfg.NodepoolName != "file-nodepool" { + t.Errorf("NodepoolName = %q, want file-nodepool", cfg.NodepoolName) + } + if cfg.ServiceAccount != "file-sa" { + t.Errorf("ServiceAccount = %q, want file-sa", cfg.ServiceAccount) + } + if cfg.StorageClassPerf != "file-perf" { + t.Errorf("StorageClassPerf = %q, want file-perf", cfg.StorageClassPerf) + } + if cfg.SidecarImage != "file-sidecar" { + t.Errorf("SidecarImage = %q, want file-sidecar", cfg.SidecarImage) + } + + // Env fallback (absent from file). + if cfg.NodepoolArchive != "env-nodepool-archive" { + t.Errorf("NodepoolArchive = %q, want env fallback", cfg.NodepoolArchive) + } + if cfg.TolerationKey != "env-toleration" { + t.Errorf("TolerationKey = %q, want env fallback", cfg.TolerationKey) + } + + // Networking/gateway: always env, never file. + if cfg.GatewayName != "env-gw-name" || cfg.GatewayDomain != "env-gw-domain" { + t.Errorf("gateway fields should be env-sourced, got name=%q domain=%q", cfg.GatewayName, cfg.GatewayDomain) + } + if cfg.ControllerConfigFile != path { + t.Errorf("ControllerConfigFile = %q, want %q", cfg.ControllerConfigFile, path) + } +} + +// A configured-but-missing file is not an error (the file is opt-in); resolution +// falls back to the environment. +func TestLoad_MissingFileFallsBackToEnv(t *testing.T) { + setMigratedEnv(t) + t.Setenv(envControllerConfig, filepath.Join(t.TempDir(), "absent.yaml")) + + cfg, err := Load() + if err != nil { + t.Fatalf("Load: %v", err) + } + if cfg.NodepoolName != envNodepool { + t.Errorf("NodepoolName = %q, want env fallback", cfg.NodepoolName) + } +} + +// Malformed YAML is a hard error — a present-but-broken file must not silently +// fall back to env (that would mask an operator mistake). +func TestLoad_MalformedFile_Errors(t *testing.T) { + path := writeConfig(t, "scheduling: [not-a-map") + t.Setenv(envControllerConfig, path) + + if _, err := Load(); err == nil { + t.Fatal("expected error for malformed config file, got nil") + } +} + +func TestReadFileConfig_EmptyPath(t *testing.T) { + cfg, err := ReadFileConfig("") + if err != nil { + t.Fatalf("ReadFileConfig(\"\"): %v", err) + } + if cfg.StateSync.Syncers != nil || cfg.Scheduling.NodepoolName != "" { + t.Errorf("empty path should yield zero FileConfig, got %+v", cfg) + } +} diff --git a/internal/platform/platform.go b/internal/platform/platform.go index b97ac43..089383d 100644 --- a/internal/platform/platform.go +++ b/internal/platform/platform.go @@ -21,13 +21,12 @@ const ( ) // Config holds infrastructure-level settings that vary per deployment -// environment. Fields are read from environment variables in main.go and are -// required unless documented otherwise — ControllerConfigFile is optional -// (state-sync is opt-in). See platformtest.Config() for test fixtures. -// -// Config is env-sourced infra; FileConfig (below) is the file-sourced -// application config. They are deliberately distinct: ControllerConfigFile is -// the path to the latter, not its contents. +// environment. It is resolved by Load: the infra fields are read from the +// app-config file (FileConfig) when present, falling back to their historical +// env vars (PLT-475, transitional); the networking/gateway fields and +// ControllerConfigFile are env-sourced. Fields are required unless documented +// otherwise — ControllerConfigFile is optional (state-sync is opt-in). See +// platformtest.Config() for test fixtures. type Config struct { NodepoolName string NodepoolArchive string @@ -79,8 +78,24 @@ type Config struct { } // FileConfig is the controller's file-sourced application config (SEI_CONTROLLER_CONFIG). +// +// The infra sections (scheduling, storage, resources, snapshot, resultExport, +// genesis, images) carry the infra config that was historically env-sourced. +// They are resolved once at startup by Load, the file value winning over the +// env fallback. The stateSync section is read per-reconcile (it hot-reloads); +// the infra sections are not (an infra change warrants a restart). +// +// Networking/gateway config is deliberately absent — it stays env-sourced +// pending its removal from the controller in the GitOps networking move (PLT-451). type FileConfig struct { - StateSync StateSyncConfig `json:"stateSync"` + StateSync StateSyncConfig `json:"stateSync"` + Scheduling SchedulingConfig `json:"scheduling"` + Storage StorageConfig `json:"storage"` + Resources ResourcesConfig `json:"resources"` + Snapshot BucketConfig `json:"snapshot"` + ResultExport ResultExportConfig `json:"resultExport"` + Genesis BucketConfig `json:"genesis"` + Images ImagesConfig `json:"images"` } // StateSyncConfig is the state-sync section of the application config. @@ -89,6 +104,51 @@ type StateSyncConfig struct { Syncers map[string][]string `json:"syncers"` } +// SchedulingConfig places node pods onto Karpenter pools and the seid service account. +type SchedulingConfig struct { + NodepoolName string `json:"nodepoolName"` + NodepoolArchive string `json:"nodepoolArchive"` + TolerationKey string `json:"tolerationKey"` + ServiceAccount string `json:"serviceAccount"` +} + +// StorageConfig holds the PVC storage classes and sizes for default and archive nodes. +type StorageConfig struct { + ClassPerf string `json:"classPerf"` + ClassDefault string `json:"classDefault"` + ClassArchive string `json:"classArchive"` + SizeDefault string `json:"sizeDefault"` + SizeArchive string `json:"sizeArchive"` +} + +// ResourcesConfig holds the CPU/memory requests for default and archive nodes. +type ResourcesConfig struct { + CPUArchive string `json:"cpuArchive"` + MemArchive string `json:"memArchive"` + CPUDefault string `json:"cpuDefault"` + MemDefault string `json:"memDefault"` +} + +// BucketConfig is an S3 bucket + region pair (snapshot, genesis). +type BucketConfig struct { + Bucket string `json:"bucket"` + Region string `json:"region"` +} + +// ResultExportConfig is the shadow-replay result-export bucket, region, and key prefix. +type ResultExportConfig struct { + Bucket string `json:"bucket"` + Region string `json:"region"` + Prefix string `json:"prefix"` +} + +// ImagesConfig holds the sidecar container images attached to every SeiNode pod. +type ImagesConfig struct { + Sidecar string `json:"sidecar"` + KubeRBACProxy string `json:"kubeRBACProxy"` + CosmosExporter string `json:"cosmosExporter"` +} + // NodepoolForMode returns the Karpenter NodePool name for the given // sei-config mode string. Archive nodes use a dedicated pool; all // other modes share the default pool. @@ -99,39 +159,52 @@ func (c Config) NodepoolForMode(mode string) string { return c.NodepoolName } -// Validate returns an error if required fields are missing. +// Validate returns an error if a required field is missing from both the +// app-config file and the environment. The source label names the file key and +// the env var so the error points at either fix; networking/gateway fields name +// only their env var. func (c Config) Validate() error { - required := map[string]string{ - "SEI_NODEPOOL_NAME": c.NodepoolName, - "SEI_TOLERATION_KEY": c.TolerationKey, - "SEI_SERVICE_ACCOUNT": c.ServiceAccount, - "SEI_STORAGE_CLASS_PERF": c.StorageClassPerf, - "SEI_STORAGE_CLASS_DEFAULT": c.StorageClassDefault, - "SEI_STORAGE_CLASS_ARCHIVE": c.StorageClassArchive, - "SEI_STORAGE_SIZE_DEFAULT": c.StorageSizeDefault, - "SEI_STORAGE_SIZE_ARCHIVE": c.StorageSizeArchive, - "SEI_NODEPOOL_ARCHIVE": c.NodepoolArchive, - "SEI_RESOURCE_CPU_ARCHIVE": c.ResourceCPUArchive, - "SEI_RESOURCE_MEM_ARCHIVE": c.ResourceMemArchive, - "SEI_RESOURCE_CPU_DEFAULT": c.ResourceCPUDefault, - "SEI_RESOURCE_MEM_DEFAULT": c.ResourceMemDefault, - "SEI_SNAPSHOT_BUCKET": c.SnapshotBucket, - "SEI_SNAPSHOT_REGION": c.SnapshotRegion, - "SEI_RESULT_EXPORT_BUCKET": c.ResultExportBucket, - "SEI_RESULT_EXPORT_REGION": c.ResultExportRegion, - "SEI_RESULT_EXPORT_PREFIX": c.ResultExportPrefix, - "SEI_GENESIS_BUCKET": c.GenesisBucket, - "SEI_GENESIS_REGION": c.GenesisRegion, - "SEI_GATEWAY_NAME": c.GatewayName, - "SEI_GATEWAY_NAMESPACE": c.GatewayNamespace, - "SEI_GATEWAY_DOMAIN": c.GatewayDomain, - "SEI_SIDECAR_IMAGE": c.SidecarImage, - "SEI_KUBE_RBAC_PROXY_IMAGE": c.KubeRBACProxyImage, + // fileKey is empty for env-only fields (networking/gateway); they report + // just the env var. Slice order is the report order for the first missing. + required := []struct { + fileKey string + envVar string + val string + }{ + {"scheduling.nodepoolName", envNodepoolName, c.NodepoolName}, + {"scheduling.nodepoolArchive", envNodepoolArchive, c.NodepoolArchive}, + {"scheduling.tolerationKey", envTolerationKey, c.TolerationKey}, + {"scheduling.serviceAccount", envServiceAccount, c.ServiceAccount}, + {"storage.classPerf", envStorageClassPerf, c.StorageClassPerf}, + {"storage.classDefault", envStorageClassDefault, c.StorageClassDefault}, + {"storage.classArchive", envStorageClassArchive, c.StorageClassArchive}, + {"storage.sizeDefault", envStorageSizeDefault, c.StorageSizeDefault}, + {"storage.sizeArchive", envStorageSizeArchive, c.StorageSizeArchive}, + {"resources.cpuArchive", envResourceCPUArchive, c.ResourceCPUArchive}, + {"resources.memArchive", envResourceMemArchive, c.ResourceMemArchive}, + {"resources.cpuDefault", envResourceCPUDefault, c.ResourceCPUDefault}, + {"resources.memDefault", envResourceMemDefault, c.ResourceMemDefault}, + {"snapshot.bucket", envSnapshotBucket, c.SnapshotBucket}, + {"snapshot.region", envSnapshotRegion, c.SnapshotRegion}, + {"resultExport.bucket", envResultExportBucket, c.ResultExportBucket}, + {"resultExport.region", envResultExportRegion, c.ResultExportRegion}, + {"resultExport.prefix", envResultExportPrefix, c.ResultExportPrefix}, + {"genesis.bucket", envGenesisBucket, c.GenesisBucket}, + {"genesis.region", envGenesisRegion, c.GenesisRegion}, + {"images.sidecar", envSidecarImage, c.SidecarImage}, + {"images.kubeRBACProxy", envKubeRBACProxyImage, c.KubeRBACProxyImage}, + {"", envGatewayName, c.GatewayName}, + {"", envGatewayNamespace, c.GatewayNamespace}, + {"", envGatewayDomain, c.GatewayDomain}, } - for name, val := range required { - if strings.TrimSpace(val) == "" { - return fmt.Errorf("%s is required", name) + for _, f := range required { + if strings.TrimSpace(f.val) != "" { + continue + } + if f.fileKey == "" { + return fmt.Errorf("%s is required", f.envVar) } + return fmt.Errorf("%s (or %s) is required", f.fileKey, f.envVar) } return nil }