diff --git a/api/v1alpha1/seinode_types.go b/api/v1alpha1/seinode_types.go index 0679a9c2..c502ff8f 100644 --- a/api/v1alpha1/seinode_types.go +++ b/api/v1alpha1/seinode_types.go @@ -289,6 +289,31 @@ const ( // ConditionSeiNodePaused mirrors spec.paused: True when paused. ConditionSeiNodePaused = "Paused" + + // ConditionStateSyncReady gates the state-sync-bearing plan. Always-present + // once reconciled. True means canonical syncers are configured and the plan + // may proceed; False fails closed (no state-sync plan built, and peers are + // never used as witnesses). It is a configured-count gate: witness + // reliability comes from curating the canonical-syncer set, and the sidecar + // establishes the trust point from them as it does today. + ConditionStateSyncReady = "StateSyncReady" +) + +// Reasons for the StateSyncReady condition. +const ( + // ReasonStateSyncReady: state-sync enabled and >=2 canonical syncers are + // configured for the chain; the state-sync-bearing plan may proceed. + ReasonStateSyncReady = "Ready" + // ReasonStateSyncNoSyncersConfigured: state-sync enabled but the canonical- + // syncer source yields <2 entries for the chain (fail closed). + ReasonStateSyncNoSyncersConfigured = "NoSyncersConfigured" + // ReasonStateSyncNotApplicable: the node does not enable state-sync. + ReasonStateSyncNotApplicable = "NotApplicable" + // ReasonStateSyncSyncerSourceError: reading or parsing the canonical-syncer + // source file failed for a reason other than absence (transient). Fails + // closed and requeues; the rest of the reconcile (StatefulSet, Failed/Paused + // handling, status flush) still runs. + ReasonStateSyncSyncerSourceError = "SyncerSourceError" ) // Reasons for the ImportPVCReady condition. @@ -360,15 +385,24 @@ type SeiNodeStatus struct { // +optional ResolvedPeers []string `json:"resolvedPeers,omitempty"` - // ResolvedRPCWitnesses carries the in-cluster RPC endpoints - // (`-0...svc.cluster.local:26657`) of the label-resolved - // peers, used as CometBFT state-sync light-client witnesses. Unlike - // ResolvedPeers these never carry an external P2P address — RPC is - // internal-only. When empty the sidecar derives witnesses from - // persistent_peers instead. + // ResolvedRPCWitnesses is DEPRECATED and no longer written. State-sync + // witnesses now come from the controller-level canonical-syncer ConfigMap + // (see ResolvedStateSyncers), not label-derived fleet peers. The field is + // retained present-but-unwritten this release (CRD field removal is a + // one-way door); remove it at the version bump. + // + // Deprecated: use ResolvedStateSyncers. // +optional ResolvedRPCWitnesses []string `json:"resolvedRPCWitnesses,omitempty"` + // ResolvedStateSyncers carries the canonical state-sync RPC endpoints + // (`host:port`) read from the canonical-syncer ConfigMap for this node's + // chain, fed verbatim into ConfigureStateSyncTask.RpcServers. Written by the + // StateSyncReady gate only when state-sync is enabled and >=2 syncers are + // configured; otherwise left empty (fail closed). + // +optional + ResolvedStateSyncers []string `json:"resolvedStateSyncers,omitempty"` + // StatefulSet references the StatefulSet the controller created for // this SeiNode. UID is the identity check: an STS with the expected // name but a different UID is not the one this controller created diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 06f84c58..98d37457 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -1132,6 +1132,11 @@ func (in *SeiNodeStatus) DeepCopyInto(out *SeiNodeStatus) { *out = make([]string, len(*in)) copy(*out, *in) } + if in.ResolvedStateSyncers != nil { + in, out := &in.ResolvedStateSyncers, &out.ResolvedStateSyncers + *out = make([]string, len(*in)) + copy(*out, *in) + } if in.StatefulSet != nil { in, out := &in.StatefulSet, &out.StatefulSet *out = new(StatefulSetRef) diff --git a/cmd/main.go b/cmd/main.go index bedb287f..84f89905 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -163,6 +163,10 @@ func main() { KubeRBACProxyImage: os.Getenv("SEI_KUBE_RBAC_PROXY_IMAGE"), SidecarImage: os.Getenv("SEI_SIDECAR_IMAGE"), CosmosExporterImage: os.Getenv("SEI_COSMOS_EXPORTER_IMAGE"), + + // The application-config file is opt-in; this may be empty. Points at a + // read-only mounted file (a GitOps-written ConfigMap volume). + ControllerConfigFile: os.Getenv("SEI_CONTROLLER_CONFIG"), } if err := platformCfg.Validate(); err != nil { diff --git a/config/crd/sei.io_seinodes.yaml b/config/crd/sei.io_seinodes.yaml index a56e9c59..eb47bf05 100644 --- a/config/crd/sei.io_seinodes.yaml +++ b/config/crd/sei.io_seinodes.yaml @@ -972,12 +972,23 @@ spec: type: array resolvedRPCWitnesses: description: |- - ResolvedRPCWitnesses carries the in-cluster RPC endpoints - (`-0...svc.cluster.local:26657`) of the label-resolved - peers, used as CometBFT state-sync light-client witnesses. Unlike - ResolvedPeers these never carry an external P2P address — RPC is - internal-only. When empty the sidecar derives witnesses from - persistent_peers instead. + ResolvedRPCWitnesses is DEPRECATED and no longer written. State-sync + witnesses now come from the controller-level canonical-syncer ConfigMap + (see ResolvedStateSyncers), not label-derived fleet peers. The field is + retained present-but-unwritten this release (CRD field removal is a + one-way door); remove it at the version bump. + + Deprecated: use ResolvedStateSyncers. + items: + type: string + type: array + resolvedStateSyncers: + description: |- + ResolvedStateSyncers carries the canonical state-sync RPC endpoints + (`host:port`) read from the canonical-syncer ConfigMap for this node's + chain, fed verbatim into ConfigureStateSyncTask.RpcServers. Written by the + StateSyncReady gate only when state-sync is enabled and >=2 syncers are + configured; otherwise left empty (fail closed). items: type: string type: array diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index d0e3a1e9..d88d7a1c 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -81,6 +81,11 @@ spec: value: gateway - name: SEI_GATEWAY_DOMAIN value: prod.platform.sei.io + # Read-only application-config source. Points at the mounted ConfigMap + # (directory mount, not subPath) so GitOps swaps propagate without a + # pod restart. Optional: the controller fails closed when absent. + - name: SEI_CONTROLLER_CONFIG + value: /etc/sei-controller/config.yaml ports: - containerPort: 8080 name: metrics @@ -110,7 +115,19 @@ spec: requests: cpu: 50m memory: 128Mi - volumeMounts: [] - volumes: [] + volumeMounts: + # Directory mount (not subPath): subPath snapshots the ConfigMap at + # mount time and never updates, defeating the fresh-read-per-reconcile + # contract. readOnly so the controller can't rewrite its trust root. + - name: sei-controller-config + mountPath: /etc/sei-controller + readOnly: true + volumes: + # GitOps provisions the ConfigMap content out of this repo. optional so + # the controller starts (and fails closed) when it isn't present yet. + - name: sei-controller-config + configMap: + name: sei-controller-config + optional: true serviceAccountName: controller-manager terminationGracePeriodSeconds: 10 diff --git a/internal/controller/node/controller.go b/internal/controller/node/controller.go index dcc4b884..fd1435fe 100644 --- a/internal/controller/node/controller.go +++ b/internal/controller/node/controller.go @@ -97,6 +97,7 @@ func (r *SeiNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct statusBase := client.MergeFromWithOptions(before, client.MergeFromWithOptimisticLock{}) observedPhase := node.Status.Phase prevSidecar := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarReady) + prevStateSync := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionStateSyncReady) setNodePausedCondition(node) @@ -107,6 +108,15 @@ func (r *SeiNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct return r.Status().Patch(ctx, node, statusBase) } + // Resolve the always-present StateSyncReady condition before the Failed and + // Paused early-returns so it rides the existing flush on every path (Failed + // flush, Paused flush, and the normal end-of-reconcile patch) — no separate + // status write. Fail-closed enforcement lives in ResolvePlan, which declines + // to build a state-sync plan when this condition isn't True; that keeps + // terminal-plan cleanup and non-state-sync work running. A blocked gate + // requeues (see end of reconcile) without aborting the steps below. + stateSyncBlocked := r.reconcileStateSyncGate(node) + // Failed is terminal — flush any condition updates and exit. if node.Status.Phase == seiv1alpha1.PhaseFailed { if err := flushStatus(); err != nil { @@ -133,11 +143,15 @@ func (r *SeiNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct } planAlreadyActive := node.Status.Plan != nil && node.Status.Plan.Phase == seiv1alpha1.TaskPlanActive + // ResolvePlan runs unconditionally: it clears terminal plans and drives + // non-state-sync work. Its internal fail-closed gate declines to build a + // state-sync plan when StateSyncReady isn't True. if err := r.Planner.ResolvePlan(ctx, node); err != nil { return ctrl.Result{}, fmt.Errorf("resolving plan: %w", err) } r.emitSidecarReadinessEvent(node, prevSidecar) + r.emitStateSyncBlockedEvent(node, prevStateSync) var result ctrl.Result var execErr error @@ -163,33 +177,7 @@ func (r *SeiNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct return result, execErr } - // Emit metrics/events if the phase changed. - if node.Status.Phase != observedPhase { - ns, name := node.Namespace, node.Name - nodePhaseTransitions.Add(ctx, 1, - metric.WithAttributes( - observability.AttrController.String(seiNodeControllerName), - observability.AttrNamespace.String(ns), - observability.AttrFromPhase.String(string(observedPhase)), - observability.AttrToPhase.String(string(node.Status.Phase)), - ), - ) - emitNodePhase(ns, name, node.Status.Phase) - r.Recorder.Eventf(node, corev1.EventTypeNormal, "PhaseTransition", - "Phase changed from %s to %s", observedPhase, node.Status.Phase) - - // Record time spent in the previous phase. - if node.Status.PhaseTransitionTime != nil && observedPhase != "" { - dur := time.Since(node.Status.PhaseTransitionTime.Time).Seconds() - nodePhaseDuration.Record(ctx, dur, - metric.WithAttributes( - observability.AttrNamespace.String(ns), - observability.AttrChainID.String(node.Spec.ChainID), - observability.AttrPhase.String(string(observedPhase)), - ), - ) - } - } + r.emitPhaseTransition(ctx, node, observedPhase) // Running nodes with no active plan requeue on a steady-state interval. // Spec changes trigger immediate reconciles via GenerationChangedPredicate. @@ -197,9 +185,51 @@ func (r *SeiNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct return ctrl.Result{RequeueAfter: statusPollInterval}, nil } + // A blocked state-sync node (fail-closed or transient) builds no plan to + // drive a requeue, and the syncer file is a mounted volume with no watch. + // Poll so the gate re-resolves and unblocks once GitOps provisions or fixes + // the syncers. IsZero defers to any stronger requeue above (running-node + // poll, plan execution), so an active-plan node is unaffected. + if stateSyncBlocked && result.IsZero() { + return ctrl.Result{RequeueAfter: statusPollInterval}, nil + } + return result, nil } +// emitPhaseTransition records phase-transition metrics and a PhaseTransition +// Event when the node's phase changed during this reconcile. A no-op when the +// phase is unchanged. +func (r *SeiNodeReconciler) emitPhaseTransition(ctx context.Context, node *seiv1alpha1.SeiNode, observedPhase seiv1alpha1.SeiNodePhase) { + if node.Status.Phase == observedPhase { + return + } + ns, name := node.Namespace, node.Name + nodePhaseTransitions.Add(ctx, 1, + metric.WithAttributes( + observability.AttrController.String(seiNodeControllerName), + observability.AttrNamespace.String(ns), + observability.AttrFromPhase.String(string(observedPhase)), + observability.AttrToPhase.String(string(node.Status.Phase)), + ), + ) + emitNodePhase(ns, name, node.Status.Phase) + r.Recorder.Eventf(node, corev1.EventTypeNormal, "PhaseTransition", + "Phase changed from %s to %s", observedPhase, node.Status.Phase) + + // Record time spent in the previous phase. + if node.Status.PhaseTransitionTime != nil && observedPhase != "" { + dur := time.Since(node.Status.PhaseTransitionTime.Time).Seconds() + nodePhaseDuration.Record(ctx, dur, + metric.WithAttributes( + observability.AttrNamespace.String(ns), + observability.AttrChainID.String(node.Spec.ChainID), + observability.AttrPhase.String(string(observedPhase)), + ), + ) + } +} + // SetupWithManager sets up the controller with the Manager. func (r *SeiNodeReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). @@ -300,3 +330,26 @@ func (r *SeiNodeReconciler) emitSidecarReadinessEvent(node *seiv1alpha1.SeiNode, "sidecar Healthz returned 200; mark-ready gate is open") } } + +// emitStateSyncBlockedEvent fires a StateSyncBlocked Warning once, on the +// transition into fail-closed (StateSyncReady leaving True/absent for a +// fail-closed reason) — not on every requeue. NotApplicable (state-sync +// disabled) never trips it. +func (r *SeiNodeReconciler) emitStateSyncBlockedEvent(node *seiv1alpha1.SeiNode, prev *metav1.Condition) { + cur := apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionStateSyncReady) + if cur == nil || cur.Status == metav1.ConditionTrue { + return + } + blockedReason := cur.Reason == seiv1alpha1.ReasonStateSyncNoSyncersConfigured || + cur.Reason == seiv1alpha1.ReasonStateSyncSyncerSourceError + if !blockedReason { + return + } + // Transition = previously True, absent, or a different (non-blocked) reason. + if prev != nil && prev.Status == cur.Status && prev.Reason == cur.Reason { + return + } + r.Recorder.Eventf(node, corev1.EventTypeWarning, "StateSyncBlocked", + "state sync enabled but not ready for chain %q (%s); not building plan", + node.Spec.ChainID, cur.Reason) +} diff --git a/internal/controller/node/peers.go b/internal/controller/node/peers.go index 5cc853d0..484e6984 100644 --- a/internal/controller/node/peers.go +++ b/internal/controller/node/peers.go @@ -10,9 +10,12 @@ import ( ) // reconcilePeers resolves spec.peers into status.resolvedPeers (the composed -// persistent_peers set) and status.resolvedRPCWitnesses (state-sync witnesses). -// The plan plumbs the resolved set into config via the config-apply override -// (init path) or the config-patch (running path). +// persistent_peers set). The plan plumbs the resolved set into config via the +// config-apply override (init path) or the config-patch (running path). +// +// State-sync witnesses are no longer derived here from label-matched peers; +// they come from the controller-level canonical-syncer ConfigMap via the +// StateSyncReady gate (see statesync.go). func (r *SeiNodeReconciler) reconcilePeers(ctx context.Context, node *seiv1alpha1.SeiNode) error { resolver := peering.Resolver{ Reader: r.Client, @@ -27,8 +30,5 @@ func (r *SeiNodeReconciler) reconcilePeers(ctx context.Context, node *seiv1alpha if !slices.Equal(node.Status.ResolvedPeers, result.Peers) { node.Status.ResolvedPeers = result.Peers } - if !slices.Equal(node.Status.ResolvedRPCWitnesses, result.Witnesses) { - node.Status.ResolvedRPCWitnesses = result.Witnesses - } return nil } diff --git a/internal/controller/node/peers_test.go b/internal/controller/node/peers_test.go index 3cbe6b89..c24775c2 100644 --- a/internal/controller/node/peers_test.go +++ b/internal/controller/node/peers_test.go @@ -14,8 +14,6 @@ const ( testRoleValue = "validator" testConsumerName = "consumer" testPeer1ResolvedID = "mock-node-id@peer-1-0.peer-1.default.svc.cluster.local:26656" - testWitnessNS = "arctic-1" - testWitnessRole = "syncer" ) type errStub string @@ -111,54 +109,12 @@ func TestReconcilePeers_PrefersExternalAddress(t *testing.T) { t.Errorf("resolvedPeers[0] = %q, want %q", node.Status.ResolvedPeers[0], want) } - // The witness must be the internal RPC DNS, NOT the external P2P address: - // the NLB exposes P2P only. Writing the external address as a witness is - // the regression this fix prevents. - wantWitness := "pub-peer-0.pub-peer.default.svc.cluster.local:26657" - if len(node.Status.ResolvedRPCWitnesses) != 1 || node.Status.ResolvedRPCWitnesses[0] != wantWitness { - t.Errorf("resolvedRPCWitnesses = %v, want [%q]", node.Status.ResolvedRPCWitnesses, wantWitness) - } -} - -func TestReconcilePeers_WitnessesExcludeSelfAndUseRPCPort(t *testing.T) { - const peerName = "syncer-0-1" - node := &seiv1alpha1.SeiNode{ - ObjectMeta: metav1.ObjectMeta{ - Name: "syncer-0-0", Namespace: testWitnessNS, - Labels: map[string]string{testRoleLabel: testWitnessRole}, - }, - Spec: seiv1alpha1.SeiNodeSpec{ - ChainID: testWitnessNS, - Image: "sei:latest", - Peers: []seiv1alpha1.PeerSource{ - {Label: &seiv1alpha1.LabelPeerSource{ - Selector: map[string]string{testRoleLabel: testWitnessRole}, - }}, - }, - FullNode: &seiv1alpha1.FullNodeSpec{}, - }, - } - peer := &seiv1alpha1.SeiNode{ - ObjectMeta: metav1.ObjectMeta{ - Name: peerName, Namespace: testWitnessNS, - Labels: map[string]string{testRoleLabel: testWitnessRole}, - }, - Spec: seiv1alpha1.SeiNodeSpec{ - ChainID: testWitnessNS, - Image: "sei:latest", - FullNode: &seiv1alpha1.FullNodeSpec{}, - }, - } - - r, _ := newNodeReconciler(t, node, peer) - if err := r.reconcilePeers(context.Background(), node); err != nil { - t.Fatalf("reconcilePeers: %v", err) - } - - want := peerName + "-0." + peerName + "." + testWitnessNS + ".svc.cluster.local:26657" - if len(node.Status.ResolvedRPCWitnesses) != 1 || node.Status.ResolvedRPCWitnesses[0] != want { - t.Errorf("resolvedRPCWitnesses = %v, want [%q] (self excluded, RPC port)", - node.Status.ResolvedRPCWitnesses, want) + // State-sync witnesses are no longer derived from peers — they come from + // the canonical-syncer ConfigMap via the StateSyncReady gate. reconcilePeers + // must not write the deprecated ResolvedRPCWitnesses field. + //nolint:staticcheck // deliberately asserting the deprecated field stays unwritten + if node.Status.ResolvedRPCWitnesses != nil { + t.Errorf("resolvedRPCWitnesses should be unwritten, got %v", node.Status.ResolvedRPCWitnesses) //nolint:staticcheck // see above } } @@ -402,14 +358,6 @@ func TestReconcilePeers_NilSidecarFactorySkipsNewPeer(t *testing.T) { if len(node.Status.ResolvedPeers) != 0 { t.Fatalf("expected unresolvable peer to be skipped, got %d: %v", len(node.Status.ResolvedPeers), node.Status.ResolvedPeers) } - // Intentional asymmetry: the witness needs no node_id, so it is emitted - // even though the peer was skipped from persistent_peers. seid can dial a - // state-sync RPC witness it has no P2P peering with; do not "symmetrize" - // this with ResolvedPeers. - wantWitness := "peer-1-0.peer-1.default.svc.cluster.local:26657" - if len(node.Status.ResolvedRPCWitnesses) != 1 || node.Status.ResolvedRPCWitnesses[0] != wantWitness { - t.Errorf("expected witness despite skipped peer, got %v", node.Status.ResolvedRPCWitnesses) - } } // Nil factory + prior entry: preserve-prior branch fires. diff --git a/internal/controller/node/statesync.go b/internal/controller/node/statesync.go new file mode 100644 index 00000000..b87d3c73 --- /dev/null +++ b/internal/controller/node/statesync.go @@ -0,0 +1,143 @@ +package node + +import ( + "fmt" + "os" + "slices" + "strings" + + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/yaml" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/platform" +) + +// minCanonicalSyncers is the controller-side fail-closed floor: state-sync +// requires at least two configured canonical syncers before the +// state-sync-bearing plan may proceed (CometBFT needs >=2 rpc-servers, and we +// never fall back to peers as witnesses). Reliability comes from curating the +// canonical-syncer set, not from cross-witness checking — the sidecar keeps its +// existing trust-pinning behavior. +const minCanonicalSyncers = 2 + +// reconcileStateSyncGate resolves the canonical-syncer set and sets the +// always-present ConditionStateSyncReady. It mutates node.Status in-memory only; +// the caller's single status patch flushes it. Run before the Failed/Paused +// early-returns so StateSyncReady is seeded on every path. +// +// Fail-closed is enforced downstream: the planner declines to build a state-sync +// plan whenever StateSyncReady is not True (see ResolvePlan), so this method only +// resolves the condition. It returns blocked=true whenever state-sync is enabled +// but the condition didn't resolve to True — both NoSyncersConfigured and the +// transient SyncerSourceError. The caller requeues on blocked: the syncer file is +// a mounted volume with no watch, so polling is the only way the gate re-resolves +// and unblocks once GitOps provisions or fixes the syncers. +func (r *SeiNodeReconciler) reconcileStateSyncGate(node *seiv1alpha1.SeiNode) (blocked bool) { + snap := node.Spec.SnapshotSource() + if snap == nil || snap.StateSync == nil { + // State-sync disabled: no state-sync task in the plan to gate. + node.Status.ResolvedStateSyncers = nil + setStateSyncReady(node, metav1.ConditionFalse, seiv1alpha1.ReasonStateSyncNotApplicable, + "node does not enable state sync") + return false + } + + syncers, err := r.canonicalSyncers(node.Spec.ChainID) + if err != nil { + // Transient read/parse error: fail closed (clear any stale set) but don't + // abort the reconcile. Requeue and re-read next tick. + node.Status.ResolvedStateSyncers = nil + setStateSyncReady(node, metav1.ConditionUnknown, seiv1alpha1.ReasonStateSyncSyncerSourceError, + fmt.Sprintf("reading canonical-syncer source for chain %q: %v", node.Spec.ChainID, err)) + return true + } + + if len(syncers) < minCanonicalSyncers { + // Fail closed: do not feed a witness-less (or single-witness) set into + // the plan. Leave ResolvedStateSyncers empty so a stale set can't leak + // into ConfigureStateSyncTask on a later reconcile. + node.Status.ResolvedStateSyncers = nil + setStateSyncReady(node, metav1.ConditionFalse, seiv1alpha1.ReasonStateSyncNoSyncersConfigured, + fmt.Sprintf("state sync requires >=%d canonical syncers configured for chain %q; found %d", + minCanonicalSyncers, node.Spec.ChainID, len(syncers))) + return true + } + + if !slices.Equal(node.Status.ResolvedStateSyncers, syncers) { + node.Status.ResolvedStateSyncers = syncers + } + setStateSyncReady(node, metav1.ConditionTrue, seiv1alpha1.ReasonStateSyncReady, + fmt.Sprintf("%d canonical syncers configured for chain %q", len(syncers), node.Spec.ChainID)) + return false +} + +// canonicalSyncers reads the read-only application-config file fresh and +// returns the parsed syncer RPC endpoints for the given chain. An unset path, a +// missing file, or a chain with no entry all yield an empty slice (no error) so +// the caller fails closed via the StateSyncReady gate rather than crashing — +// state-sync is opt-in and the file may legitimately be absent until GitOps +// provisions the backing ConfigMap. Any other read or parse error is returned +// so the gate can treat it as transient. +// +// Endpoints are bare host:port (no scheme; the sidecar adds it); see +// platform.FileConfig for the file shape. +// +// Read fresh on every call: a mounted ConfigMap swaps atomically (a symlink flip +// on the directory mount), so re-reading picks up GitOps updates without a pod +// restart. Never cache an open handle. +func (r *SeiNodeReconciler) canonicalSyncers(chainID string) ([]string, error) { + path := strings.TrimSpace(r.Platform.ControllerConfigFile) + if path == "" { + return nil, nil + } + + raw, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, err + } + + var cfg platform.FileConfig + if err := yaml.Unmarshal(raw, &cfg); err != nil { + return nil, fmt.Errorf("parsing controller config file %q: %w", path, err) + } + + // A YAML list entry may itself carry comma/whitespace-joined endpoints, so + // route the joined value through the same splitter the ConfigMap source used. + return parseSyncerList(strings.Join(cfg.StateSync.Syncers[chainID], "\n")), nil +} + +// parseSyncerList splits a syncer value on newlines and commas, trims +// whitespace, drops blanks, then sorts and de-duplicates. +func parseSyncerList(raw string) []string { + fields := strings.FieldsFunc(raw, func(r rune) bool { + return r == '\n' || r == '\r' || r == ',' || r == ' ' || r == '\t' + }) + if len(fields) == 0 { + return nil + } + out := make([]string, 0, len(fields)) + for _, f := range fields { + if f != "" { + out = append(out, f) + } + } + slices.Sort(out) + return slices.Compact(out) +} + +// setStateSyncReady sets ConditionStateSyncReady with ObservedGeneration +// stamped, following the always-present condition discipline. +func setStateSyncReady(node *seiv1alpha1.SeiNode, status metav1.ConditionStatus, reason, message string) { + apimeta.SetStatusCondition(&node.Status.Conditions, metav1.Condition{ + Type: seiv1alpha1.ConditionStateSyncReady, + Status: status, + Reason: reason, + Message: message, + ObservedGeneration: node.Generation, + }) +} diff --git a/internal/controller/node/statesync_test.go b/internal/controller/node/statesync_test.go new file mode 100644 index 00000000..72c1de9c --- /dev/null +++ b/internal/controller/node/statesync_test.go @@ -0,0 +1,535 @@ +package node + +import ( + "context" + "os" + "path/filepath" + "slices" + "strings" + "testing" + + . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/planner" +) + +const ( + testChainID = "arctic-1" + testNamespace = "default" + testImage = "sei:latest" + testNodeName = "sei-test" + syncerA = "a:26657" + syncerB = "b:26657" + syncerSingle = "only-one:26657" +) + +// syncerFileYAML renders a chainID -> [host:port] map as the read-only +// controller-config file content, wrapped under the stateSync.syncers section +// (matching canonicalSyncers' expected platform.FileConfig shape). +func syncerFileYAML(byChain map[string][]string) string { + var b strings.Builder + b.WriteString("stateSync:\n") + b.WriteString(" syncers:\n") + for chain, syncers := range byChain { + b.WriteString(" ") + b.WriteString(chain) + b.WriteString(":\n") + for _, s := range syncers { + b.WriteString(" - ") + b.WriteString(s) + b.WriteString("\n") + } + } + return b.String() +} + +// writeSyncerFile writes content to a temp file and points the reconciler's +// platform at it. The file lives under t.TempDir(), auto-cleaned by the test. +func writeSyncerFile(t *testing.T, r *SeiNodeReconciler, content string) { + t.Helper() + path := filepath.Join(t.TempDir(), "config.yaml") + if err := os.WriteFile(path, []byte(content), 0o600); err != nil { + t.Fatalf("writing controller config file: %v", err) + } + r.Platform.ControllerConfigFile = path +} + +// withSyncers writes a chainID -> syncers file and wires the reconciler to it. +func withSyncers(t *testing.T, r *SeiNodeReconciler, byChain map[string][]string) { + t.Helper() + writeSyncerFile(t, r, syncerFileYAML(byChain)) +} + +// stateSyncNode returns a FullNode with state sync enabled on the given chain. +func stateSyncNode(name, chainID string) *seiv1alpha1.SeiNode { + return &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: testNamespace}, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: chainID, + Image: testImage, + FullNode: &seiv1alpha1.FullNodeSpec{ + Snapshot: &seiv1alpha1.SnapshotSource{StateSync: &seiv1alpha1.StateSyncSource{}}, + }, + }, + } +} + +func stateSyncCondition(node *seiv1alpha1.SeiNode) *metav1.Condition { + return apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionStateSyncReady) +} + +func TestParseSyncerList(t *testing.T) { + g := NewWithT(t) + cases := []struct { + name, in string + want []string + }{ + {"empty", "", nil}, + {"whitespace only", " \n\t ", nil}, + {"newline separated", "b:26657\na:26657", []string{syncerA, syncerB}}, + {"comma separated", "b:26657,a:26657", []string{syncerA, syncerB}}, + {"mixed with blanks", "a:26657,\n b:26657 ,,\n", []string{syncerA, syncerB}}, + {"dedup", "a:26657\na:26657\nb:26657", []string{syncerA, syncerB}}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + g.Expect(parseSyncerList(tc.in)).To(Equal(tc.want)) + }) + } +} + +func TestStateSyncGate_EnabledWithTwoSyncers_Ready(t *testing.T) { + g := NewWithT(t) + + node := stateSyncNode("n", testChainID) + r, _ := newNodeReconciler(t, node) + withSyncers(t, r, map[string][]string{ + testChainID: {"syncer-1.arctic-1.example.com:26657", "syncer-0.arctic-1.example.com:26657"}, + }) + + blocked := r.reconcileStateSyncGate(node) + g.Expect(blocked).To(BeFalse()) + + cond := stateSyncCondition(node) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonStateSyncReady)) + g.Expect(cond.ObservedGeneration).To(Equal(node.Generation)) + // Sorted + fed verbatim into the task source. + g.Expect(node.Status.ResolvedStateSyncers).To(Equal([]string{ + "syncer-0.arctic-1.example.com:26657", + "syncer-1.arctic-1.example.com:26657", + })) +} + +func TestStateSyncGate_EnabledWithOneSyncer_FailsClosed(t *testing.T) { + g := NewWithT(t) + + node := stateSyncNode("n", testChainID) + r, _ := newNodeReconciler(t, node) + withSyncers(t, r, map[string][]string{testChainID: {"only-one.arctic-1.example.com:26657"}}) + + blocked := r.reconcileStateSyncGate(node) + g.Expect(blocked).To(BeTrue()) + + cond := stateSyncCondition(node) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonStateSyncNoSyncersConfigured)) + g.Expect(node.Status.ResolvedStateSyncers).To(BeNil()) +} + +// A configured path pointing at a file that doesn't exist (the backing +// ConfigMap isn't provisioned yet) fails closed, not transient. +func TestStateSyncGate_EnabledMissingFile_FailsClosed(t *testing.T) { + g := NewWithT(t) + + node := stateSyncNode("n", testChainID) + r, _ := newNodeReconciler(t, node) + r.Platform.ControllerConfigFile = filepath.Join(t.TempDir(), "absent.yaml") // never created + + blocked := r.reconcileStateSyncGate(node) + g.Expect(blocked).To(BeTrue()) + + cond := stateSyncCondition(node) + g.Expect(cond.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonStateSyncNoSyncersConfigured)) +} + +func TestStateSyncGate_EnabledNoChainEntry_FailsClosed(t *testing.T) { + g := NewWithT(t) + + node := stateSyncNode("n", testChainID) + r, _ := newNodeReconciler(t, node) + withSyncers(t, r, map[string][]string{"other-chain": {syncerA, syncerB}}) + + blocked := r.reconcileStateSyncGate(node) + g.Expect(blocked).To(BeTrue()) + g.Expect(stateSyncCondition(node).Reason).To(Equal(seiv1alpha1.ReasonStateSyncNoSyncersConfigured)) +} + +// An unreadable/unparseable file is transient (Unknown + requeue), distinct +// from absence which fails closed. +func TestStateSyncGate_ParseError_Transient(t *testing.T) { + g := NewWithT(t) + + node := stateSyncNode("n", testChainID) + r, _ := newNodeReconciler(t, node) + writeSyncerFile(t, r, "this: [is: not: valid: yaml") // malformed + + blocked := r.reconcileStateSyncGate(node) + g.Expect(blocked).To(BeTrue()) + + cond := stateSyncCondition(node) + g.Expect(cond.Status).To(Equal(metav1.ConditionUnknown)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonStateSyncSyncerSourceError)) + g.Expect(node.Status.ResolvedStateSyncers).To(BeNil()) +} + +func TestStateSyncGate_UnconfiguredSource_FailsClosed(t *testing.T) { + g := NewWithT(t) + + node := stateSyncNode("n", testChainID) + r, _ := newNodeReconciler(t, node) // Platform leaves ControllerConfigFile empty. + + blocked := r.reconcileStateSyncGate(node) + g.Expect(blocked).To(BeTrue()) + g.Expect(stateSyncCondition(node).Reason).To(Equal(seiv1alpha1.ReasonStateSyncNoSyncersConfigured)) +} + +func TestStateSyncGate_Disabled_NotApplicable(t *testing.T) { + g := NewWithT(t) + + // S3 snapshot node: state sync not enabled. + node := newSnapshotNode("n", testNamespace) + r, _ := newNodeReconciler(t, node) + withSyncers(t, r, map[string][]string{testChainID: {syncerA, syncerB}}) + + blocked := r.reconcileStateSyncGate(node) + g.Expect(blocked).To(BeFalse()) + + cond := stateSyncCondition(node) + g.Expect(cond.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonStateSyncNotApplicable)) + g.Expect(node.Status.ResolvedStateSyncers).To(BeNil()) +} + +// A node with no snapshot source at all (e.g. genesis validator) is treated as +// state-sync-disabled: NotApplicable, never blocks the plan. +func TestStateSyncGate_NoSnapshotSource_NotApplicable(t *testing.T) { + g := NewWithT(t) + + node := &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{Name: "n", Namespace: testNamespace}, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: testNodeName, + Image: testImage, + FullNode: &seiv1alpha1.FullNodeSpec{}, + }, + } + r, _ := newNodeReconciler(t, node) + withSyncers(t, r, map[string][]string{testChainID: {syncerA, syncerB}}) + + blocked := r.reconcileStateSyncGate(node) + g.Expect(blocked).To(BeFalse()) + g.Expect(stateSyncCondition(node).Reason).To(Equal(seiv1alpha1.ReasonStateSyncNotApplicable)) +} + +// Stale ResolvedStateSyncers must be cleared when the gate later fails closed, +// so a previously-good set can't leak into ConfigureStateSyncTask. +func TestStateSyncGate_FailClosedClearsStaleSyncers(t *testing.T) { + g := NewWithT(t) + + node := stateSyncNode("n", testChainID) + node.Status.ResolvedStateSyncers = []string{"old-a:26657", "old-b:26657"} + r, _ := newNodeReconciler(t, node) // unconfigured source → fail closed + + blocked := r.reconcileStateSyncGate(node) + g.Expect(blocked).To(BeTrue()) + g.Expect(node.Status.ResolvedStateSyncers).To(BeNil()) +} + +// The gate must not block (or even set ResolvedStateSyncers on) a non-state-sync +// node — regression guard for the full reconcile path. +func TestStateSyncGate_NonStateSyncNodeUnaffected(t *testing.T) { + g := NewWithT(t) + + node := newSnapshotNode("n", testNamespace) + r, _ := newNodeReconciler(t, node) + + blocked := r.reconcileStateSyncGate(node) + g.Expect(blocked).To(BeFalse()) + g.Expect(node.Status.ResolvedStateSyncers).To(BeEmpty()) + g.Expect(slices.Contains([]string{ + seiv1alpha1.ReasonStateSyncReady, seiv1alpha1.ReasonStateSyncNoSyncersConfigured, + }, stateSyncCondition(node).Reason)).To(BeFalse()) +} + +// Full reconcile path: a state-sync node with <2 syncers fails closed — no plan +// is built, the condition is persisted via the single status patch, and a +// Warning Event is emitted. +func TestReconcile_StateSyncFailClosed_NoPlanBuilt(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + node := stateSyncNode("ss-0", testNamespace) + node.Spec.ChainID = testChainID + r, c := newNodeReconciler(t, node) + rec := record.NewFakeRecorder(10) + r.Recorder = rec + withSyncers(t, r, map[string][]string{testChainID: {syncerSingle}}) + + _, err := r.Reconcile(ctx, nodeReqFor("ss-0", testNamespace)) + g.Expect(err).NotTo(HaveOccurred()) + + fetched := getSeiNode(t, ctx, c, "ss-0", testNamespace) + g.Expect(fetched.Status.Plan).To(BeNil(), "no state-sync plan must be built when fail-closed") + cond := stateSyncCondition(fetched) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonStateSyncNoSyncersConfigured)) + + var sawWarning bool + for { + select { + case e := <-rec.Events: + if strings.Contains(e, "StateSyncBlocked") { + sawWarning = true + } + continue + default: + } + break + } + g.Expect(sawWarning).To(BeTrue(), "expected a StateSyncBlocked Warning Event") +} + +// A pre-Running, fail-closed state-sync node builds no plan, so nothing else +// drives a requeue and the mounted syncer file has no watch. The gate must +// requeue on the poll interval; once the file gains >=2 syncers the next +// reconcile re-resolves the gate and builds the plan (unblocks). +func TestReconcile_StateSyncFailClosed_RequeuesAndUnblocks(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + node := stateSyncNode("ss-poll", testNamespace) + node.Spec.ChainID = testChainID + node.Status.Phase = seiv1alpha1.PhasePending + r, c := newNodeReconciler(t, node) + withSyncers(t, r, map[string][]string{testChainID: {syncerSingle}}) + + res, err := r.Reconcile(ctx, nodeReqFor("ss-poll", testNamespace)) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(res.RequeueAfter).To(Equal(statusPollInterval), + "fail-closed pre-Running node must poll the mounted syncer file") + + fetched := getSeiNode(t, ctx, c, "ss-poll", testNamespace) + g.Expect(fetched.Status.Plan).To(BeNil()) + g.Expect(stateSyncCondition(fetched).Reason).To(Equal(seiv1alpha1.ReasonStateSyncNoSyncersConfigured)) + + // GitOps provisions the syncers; the mounted file now satisfies the floor. + withSyncers(t, r, map[string][]string{testChainID: {syncerA, syncerB}}) + + _, err = r.Reconcile(ctx, nodeReqFor("ss-poll", testNamespace)) + g.Expect(err).NotTo(HaveOccurred()) + + fetched = getSeiNode(t, ctx, c, "ss-poll", testNamespace) + g.Expect(fetched.Status.Plan).NotTo(BeNil(), "gate unblocks once the file has >=2 syncers") + g.Expect(findPlannedTask(fetched.Status.Plan, planner.TaskConfigureStateSync)).NotTo(BeNil()) + g.Expect(stateSyncCondition(fetched).Status).To(Equal(metav1.ConditionTrue)) +} + +// Full reconcile path: a paused node still gets the always-present +// StateSyncReady condition seeded, even though reconcile returns early on +// spec.paused before the gate enforcement. State-sync enabled with no syncers +// resolves to False/NoSyncersConfigured; state-sync disabled resolves to +// False/NotApplicable. Either way the condition must be present — and a paused +// node must build NO plan (pause semantics preserved). +func TestReconcile_PausedNode_StateSyncReadyStillSeeded(t *testing.T) { + cases := []struct { + name string + node *seiv1alpha1.SeiNode + withSyncers bool + wantReason string + }{ + { + name: "state-sync enabled, no syncers", + node: stateSyncNode("paused-ss", testNamespace), + withSyncers: true, + wantReason: seiv1alpha1.ReasonStateSyncNoSyncersConfigured, + }, + { + name: "state-sync disabled", + node: newSnapshotNode("paused-s3", testNamespace), + withSyncers: false, + wantReason: seiv1alpha1.ReasonStateSyncNotApplicable, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + tc.node.Spec.Paused = true + r, c := newNodeReconciler(t, tc.node) + if tc.withSyncers { + // Empty map for the chain → fail closed, but source is wired. + withSyncers(t, r, map[string][]string{"other-chain": {syncerA, syncerB}}) + } + + _, err := r.Reconcile(ctx, nodeReqFor(tc.node.Name, testNamespace)) + g.Expect(err).NotTo(HaveOccurred()) + + fetched := getSeiNode(t, ctx, c, tc.node.Name, testNamespace) + cond := stateSyncCondition(fetched) + g.Expect(cond).NotTo(BeNil(), "StateSyncReady must be present even on a paused node") + g.Expect(cond.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(cond.Reason).To(Equal(tc.wantReason)) + g.Expect(fetched.Status.Plan).To(BeNil(), "a paused node must build no plan") + }) + } +} + +// Fail-closed must NOT block terminal-plan cleanup. A state-sync node with <2 +// syncers and a terminal plan on status must have that plan cleared +// (handleTerminalPlan runs inside ResolvePlan, which now runs unconditionally) +// while still building NO new state-sync plan. +func TestReconcile_StateSyncFailClosed_ClearsTerminalPlan(t *testing.T) { + cases := []struct { + name string + phase seiv1alpha1.TaskPlanPhase + }{ + {"complete", seiv1alpha1.TaskPlanComplete}, + {"failed", seiv1alpha1.TaskPlanFailed}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + node := stateSyncNode("ss-term", testNamespace) + node.Spec.ChainID = testChainID + node.Status.Phase = seiv1alpha1.PhaseInitializing + node.Status.Plan = &seiv1alpha1.TaskPlan{ + ID: "stale-plan", + Phase: tc.phase, + Tasks: []seiv1alpha1.PlannedTask{{Type: planner.TaskConfigApply, Status: seiv1alpha1.TaskComplete}}, + } + r, c := newNodeReconciler(t, node) + withSyncers(t, r, map[string][]string{testChainID: {syncerSingle}}) + + _, err := r.Reconcile(ctx, nodeReqFor("ss-term", testNamespace)) + g.Expect(err).NotTo(HaveOccurred()) + + fetched := getSeiNode(t, ctx, c, "ss-term", testNamespace) + g.Expect(fetched.Status.Plan).To(BeNil(), + "terminal plan must be cleared even when fail-closed") + cond := stateSyncCondition(fetched) + g.Expect(cond.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonStateSyncNoSyncersConfigured)) + }) + } +} + +// An unparseable syncer file must be transient: reconcileStatefulSet, +// Paused/Failed handling, and the status flush still run, the reconcile +// requeues instead of hard-aborting, and StateSyncReady reflects it via +// Unknown/SyncerSourceError. +func TestReconcile_StateSyncSyncerSourceError_Transient(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + node := stateSyncNode("ss-err", testNamespace) + node.Spec.ChainID = testChainID + r, c := newNodeReconciler(t, node) + writeSyncerFile(t, r, "{ this is not: valid yaml: at all") // malformed + + res, err := r.Reconcile(ctx, nodeReqFor("ss-err", testNamespace)) + g.Expect(err).NotTo(HaveOccurred(), "transient syncer-source error must not hard-abort") + g.Expect(res.RequeueAfter).To(BeNumerically(">", 0), "transient error must requeue") + + fetched := getSeiNode(t, ctx, c, "ss-err", testNamespace) + cond := stateSyncCondition(fetched) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionUnknown)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonStateSyncSyncerSourceError)) + g.Expect(fetched.Status.Plan).To(BeNil(), "no state-sync plan while the gate is unresolved") + // StatefulSet sync still ran despite the syncer-source error. + sts := &appsv1.StatefulSet{} + g.Expect(c.Get(ctx, types.NamespacedName{Name: "ss-err", Namespace: testNamespace}, sts)).To(Succeed(), + "reconcileStatefulSet must run even when the syncer source read fails") +} + +// The StateSyncBlocked Warning fires once on the transition into fail-closed, +// not on every requeue. +func TestReconcile_StateSyncBlocked_EventFiresOncePerTransition(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + node := stateSyncNode("ss-evt", testNamespace) + node.Spec.ChainID = testChainID + r, _ := newNodeReconciler(t, node) + rec := record.NewFakeRecorder(10) + r.Recorder = rec + withSyncers(t, r, map[string][]string{testChainID: {syncerSingle}}) + + countBlocked := func() int { + n := 0 + for { + select { + case e := <-rec.Events: + if strings.Contains(e, "StateSyncBlocked") { + n++ + } + continue + default: + } + return n + } + } + + _, err := r.Reconcile(ctx, nodeReqFor("ss-evt", testNamespace)) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(countBlocked()).To(Equal(1), "first fail-closed reconcile emits StateSyncBlocked") + + // Plan stays nil and the condition is already False/NoSyncersConfigured, so a + // second reconcile is a no-op transition — no repeat event. + _, err = r.Reconcile(ctx, nodeReqFor("ss-evt", testNamespace)) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(countBlocked()).To(Equal(0), "steady-state requeue must not re-emit StateSyncBlocked") +} + +// Full reconcile path: a state-sync node with >=2 syncers proceeds — a plan is +// built carrying the canonical syncers, and StateSyncReady is True. +func TestReconcile_StateSyncReady_BuildsPlanWithSyncers(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + node := stateSyncNode("ss-1", testNamespace) + node.Spec.ChainID = testChainID + r, c := newNodeReconciler(t, node) + withSyncers(t, r, map[string][]string{ + testChainID: {"a.arctic-1.example.com:26657", "b.arctic-1.example.com:26657"}, + }) + + _, err := r.Reconcile(ctx, nodeReqFor("ss-1", testNamespace)) + g.Expect(err).NotTo(HaveOccurred()) + + fetched := getSeiNode(t, ctx, c, "ss-1", testNamespace) + g.Expect(fetched.Status.Plan).NotTo(BeNil(), "a plan must be built when state-sync is ready") + g.Expect(findPlannedTask(fetched.Status.Plan, planner.TaskConfigureStateSync)). + NotTo(BeNil(), "plan must carry the configure-state-sync task") + g.Expect(fetched.Status.ResolvedStateSyncers).To(Equal([]string{ + "a.arctic-1.example.com:26657", + "b.arctic-1.example.com:26657", + })) + cond := stateSyncCondition(fetched) + g.Expect(cond.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(cond.Reason).To(Equal(seiv1alpha1.ReasonStateSyncReady)) +} diff --git a/internal/peering/resolver.go b/internal/peering/resolver.go index 161f8f28..983370f0 100644 --- a/internal/peering/resolver.go +++ b/internal/peering/resolver.go @@ -1,6 +1,8 @@ // Package peering resolves a SeiNode's spec.peers into the fully-composed -// `@:` persistent_peers set and the in-cluster RPC witness -// endpoints for state sync. The controller is the sole owner of peer resolution. +// `@:` persistent_peers set. The controller is the sole +// owner of peer resolution. State-sync witnesses are NOT resolved here — they +// come from the controller-level canonical-syncer ConfigMap (see the node +// controller's StateSyncReady gate), not label-matched fleet peers. // // Resolve handles all three source kinds in-controller: // - Label — list matching SeiNodes, fetch each peer's node_id via its @@ -36,8 +38,8 @@ import ( // degraded or test environment without a sidecar factory still reconciles. var errNoSidecarFactory = errors.New("sidecar client factory is nil") -// Resolver resolves spec.peers into persistent_peers + RPC witnesses. All -// dependencies are injected so it is unit-testable without a cluster or AWS. +// Resolver resolves spec.peers into the persistent_peers set. All dependencies +// are injected so it is unit-testable without a cluster or AWS. type Resolver struct { // Reader lists SeiNodes for Label sources. Reader client.Reader @@ -57,9 +59,6 @@ type Result struct { // Peers are the fully-composed `@:` persistent_peers, // sorted and de-duplicated. Peers []string - // Witnesses are the in-cluster RPC endpoints of label-resolved peers, - // sorted and de-duplicated, for state-sync light-client use. - Witnesses []string } // Resolve resolves all of node.Spec.Peers. prior is the node's last resolved @@ -70,18 +69,17 @@ type Result struct { func (r *Resolver) Resolve(ctx context.Context, node *seiv1alpha1.SeiNode, prior []string) (Result, error) { priorByHost := indexResolvedPeersByHost(prior) - var peers, witnesses []string + var peers []string var preserveEC2Prior bool for i := range node.Spec.Peers { src := &node.Spec.Peers[i] switch { case src.Label != nil: - endpoints, w, err := r.resolveLabel(ctx, node, src.Label, priorByHost) + endpoints, err := r.resolveLabel(ctx, node, src.Label, priorByHost) if err != nil { return Result{}, err } peers = append(peers, endpoints...) - witnesses = append(witnesses, w...) case src.Static != nil: peers = append(peers, src.Static.Addresses...) case src.EC2Tags != nil: @@ -102,24 +100,19 @@ func (r *Resolver) Resolve(ctx context.Context, node *seiv1alpha1.SeiNode, prior slices.Sort(peers) peers = slices.Compact(peers) - slices.Sort(witnesses) - witnesses = slices.Compact(witnesses) - return Result{Peers: peers, Witnesses: witnesses}, nil + return Result{Peers: peers}, nil } -// resolveLabel lists SeiNodes matching the selector, composes each peer's -// `@:` entry, and yields the in-cluster RPC witness for -// each matched peer. A per-peer node_id fetch failure preserves the prior -// composed entry (from priorByHost) or skips the peer until it is resolvable — -// transients never wedge the fleet. Witnesses are deterministic from peer -// identity (no node_id needed) so every matched peer yields one regardless of -// sidecar reachability. +// resolveLabel lists SeiNodes matching the selector and composes each peer's +// `@:` entry. A per-peer node_id fetch failure preserves +// the prior composed entry (from priorByHost) or skips the peer until it is +// resolvable — transients never wedge the fleet. func (r *Resolver) resolveLabel( ctx context.Context, node *seiv1alpha1.SeiNode, src *seiv1alpha1.LabelPeerSource, priorByHost map[string]string, -) ([]string, []string, error) { +) ([]string, error) { logger := log.FromContext(ctx) ns := node.Namespace if src.Namespace != "" { @@ -131,18 +124,16 @@ func (r *Resolver) resolveLabel( client.InNamespace(ns), client.MatchingLabels(src.Selector), ); err != nil { - return nil, nil, fmt.Errorf("listing peers by label: %w", err) + return nil, fmt.Errorf("listing peers by label: %w", err) } - var endpoints, witnesses []string + var endpoints []string for i := range nodeList.Items { peer := &nodeList.Items[i] if peer.Name == node.Name && peer.Namespace == node.Namespace { continue } - witnesses = append(witnesses, peerRPCAddress(peer)) - address := peerAddress(peer) var sc task.SidecarClient err := errNoSidecarFactory @@ -164,7 +155,7 @@ func (r *Resolver) resolveLabel( } logger.Info("skipping peer until node_id is resolvable", "peer", peer.Name, "err", err) } - return endpoints, witnesses, nil + return endpoints, nil } // indexResolvedPeersByHost maps `host:port` → `@host:port` for O(1) @@ -190,12 +181,3 @@ func peerAddress(peer *seiv1alpha1.SeiNode) string { return fmt.Sprintf("%s-0.%s.%s.svc.cluster.local:%d", peer.Name, peer.Name, peer.Namespace, seiconfig.PortP2P) } - -// peerRPCAddress returns the in-cluster headless Service DNS for a peer's RPC -// port. Unlike peerAddress it never consults Spec.ExternalAddress: the external -// NLB exposes P2P only, so a state-sync light-client witness must target the -// cluster-internal RPC endpoint or seid exits on "no witnesses connected". -func peerRPCAddress(peer *seiv1alpha1.SeiNode) string { - return fmt.Sprintf("%s-0.%s.%s.svc.cluster.local:%d", - peer.Name, peer.Name, peer.Namespace, seiconfig.PortRPC) -} diff --git a/internal/peering/resolver_test.go b/internal/peering/resolver_test.go index 81f739e8..c9622526 100644 --- a/internal/peering/resolver_test.go +++ b/internal/peering/resolver_test.go @@ -101,12 +101,9 @@ func TestResolve_Static_Verbatim(t *testing.T) { } want := []string{"abc@1.2.3.4:26656", "def@5.6.7.8:26656"} assertEqualPeers(t, got.Peers, want) - if len(got.Witnesses) != 0 { - t.Errorf("static source should yield no witnesses, got %v", got.Witnesses) - } } -func TestResolve_Label_ComposesNodeIDAndWitness(t *testing.T) { +func TestResolve_Label_ComposesNodeID(t *testing.T) { consumer := fullNode("consumer", nil) consumer.Spec.Peers = []seiv1alpha1.PeerSource{ {Label: &seiv1alpha1.LabelPeerSource{Selector: map[string]string{roleLabel: rolePeer}}}, @@ -123,7 +120,6 @@ func TestResolve_Label_ComposesNodeIDAndWitness(t *testing.T) { t.Fatalf("Resolve: %v", err) } assertEqualPeers(t, got.Peers, []string{"node-xyz@peer-1-0.peer-1.default.svc.cluster.local:26656"}) - assertEqualPeers(t, got.Witnesses, []string{"peer-1-0.peer-1.default.svc.cluster.local:26657"}) } func TestResolve_Label_PrefersExternalAddress(t *testing.T) { @@ -143,9 +139,7 @@ func TestResolve_Label_PrefersExternalAddress(t *testing.T) { if err != nil { t.Fatalf("Resolve: %v", err) } - // Peer uses external P2P address; witness stays internal RPC DNS. assertEqualPeers(t, got.Peers, []string{"nid@pub.example.com:26656"}) - assertEqualPeers(t, got.Witnesses, []string{"pub-0.pub.default.svc.cluster.local:26657"}) } func TestResolve_Label_EmptySetYieldsNoPeers(t *testing.T) { @@ -187,8 +181,6 @@ func TestResolve_Label_TransientFailurePreservesPriorEntry(t *testing.T) { } // node_id fetch failed but a prior entry for this host exists → preserved. assertEqualPeers(t, got.Peers, prior) - // Witness is deterministic from identity → still emitted. - assertEqualPeers(t, got.Witnesses, []string{"peer-1-0.peer-1.default.svc.cluster.local:26657"}) } func TestResolve_Label_TransientFailureNoPriorSkips(t *testing.T) { diff --git a/internal/planner/planner.go b/internal/planner/planner.go index 81a45135..4427d39b 100644 --- a/internal/planner/planner.go +++ b/internal/planner/planner.go @@ -155,6 +155,14 @@ func (p *NodeResolver) ResolvePlan(ctx context.Context, node *seiv1alpha1.SeiNod handleTerminalPlan(ctx, node) + // Fail-closed state-sync gate. Runs after handleTerminalPlan (so terminal + // plans still clear) but before building: a state-sync node whose + // StateSyncReady condition isn't True must never get a state-sync-bearing + // plan (CometBFT needs >=2 rpc-servers; we never fall back to peers). + if stateSyncBlocksPlan(node) { + return nil + } + mode, err := p.plannerForMode(node) if err != nil { return err @@ -180,6 +188,25 @@ func (p *NodeResolver) ResolvePlan(ctx context.Context, node *seiv1alpha1.SeiNod return nil } +// stateSyncBlocksPlan reports whether the fail-closed state-sync gate must +// suppress plan construction this reconcile. It fires only on the init path +// (pre-Running), which is the only path that builds a state-sync-bearing plan +// via buildSidecarProgression — a Running node's update plans carry no +// state-sync task, so an image roll must not be blocked by a syncer-source +// blip. The gate trips when state-sync is enabled and the controller-resolved +// StateSyncReady condition is not True (NoSyncersConfigured or the transient +// SyncerSourceError). A missing condition (not yet resolved) does not gate. +func stateSyncBlocksPlan(node *seiv1alpha1.SeiNode) bool { + if node.Status.Phase == seiv1alpha1.PhaseRunning { + return false + } + if !hasStateSync(node.Spec.SnapshotSource()) { + return false + } + cond := meta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionStateSyncReady) + return cond != nil && cond.Status != metav1.ConditionTrue +} + // handleTerminalPlan handles completed or failed plans: clears conditions // and nils the plan so the planner can build the next one if needed. func handleTerminalPlan(ctx context.Context, node *seiv1alpha1.SeiNode) { @@ -656,9 +683,15 @@ func snapshotRestoreTask(snap *seiv1alpha1.SnapshotSource) sidecar.SnapshotResto func configureStateSyncTask(node *seiv1alpha1.SeiNode) sidecar.ConfigureStateSyncTask { snap := node.Spec.SnapshotSource() + // RpcServers come from the controller-level canonical-syncer ConfigMap via + // Status.ResolvedStateSyncers (peers are no longer used as witnesses). This + // runs only when stateSyncBlocksPlan passed, i.e. StateSyncReady=True, which + // guarantees >=2 curated entries. The set is snapshotted into the task params + // here and never re-read at execution — so a transient ConfigMap blip on an + // already-active plan can't empty an in-flight witness list. t := sidecar.ConfigureStateSyncTask{ UseLocalSnapshot: hasS3Snapshot(snap), - RpcServers: node.Status.ResolvedRPCWitnesses, + RpcServers: node.Status.ResolvedStateSyncers, } if snap != nil { if snap.TrustPeriod != "" { diff --git a/internal/planner/statesync_witness_test.go b/internal/planner/statesync_witness_test.go index 79234dd9..47c86bd9 100644 --- a/internal/planner/statesync_witness_test.go +++ b/internal/planner/statesync_witness_test.go @@ -7,10 +7,10 @@ import ( seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" ) -func TestConfigureStateSyncTask_PassesResolvedWitnesses(t *testing.T) { - witnesses := []string{ - "syncer-0-0-0.syncer-0-0.arctic-1.svc.cluster.local:26657", - "syncer-0-1-0.syncer-0-1.arctic-1.svc.cluster.local:26657", +func TestConfigureStateSyncTask_PassesCanonicalSyncers(t *testing.T) { + syncers := []string{ + "syncer-0.arctic-1.example.com:26657", + "syncer-1.arctic-1.example.com:26657", } node := &seiv1alpha1.SeiNode{ Spec: seiv1alpha1.SeiNodeSpec{ @@ -18,13 +18,13 @@ func TestConfigureStateSyncTask_PassesResolvedWitnesses(t *testing.T) { Snapshot: &seiv1alpha1.SnapshotSource{TrustPeriod: "168h0m0s", BackfillBlocks: 6000}, }, }, - Status: seiv1alpha1.SeiNodeStatus{ResolvedRPCWitnesses: witnesses}, + Status: seiv1alpha1.SeiNodeStatus{ResolvedStateSyncers: syncers}, } task := configureStateSyncTask(node) - if !slices.Equal(task.RpcServers, witnesses) { - t.Errorf("RpcServers = %v, want %v", task.RpcServers, witnesses) + if !slices.Equal(task.RpcServers, syncers) { + t.Errorf("RpcServers = %v, want %v", task.RpcServers, syncers) } if task.TrustPeriod != "168h0m0s" { t.Errorf("TrustPeriod = %q, want 168h0m0s", task.TrustPeriod) @@ -34,9 +34,10 @@ func TestConfigureStateSyncTask_PassesResolvedWitnesses(t *testing.T) { } } -// No resolved witnesses (e.g. EC2/static peers) leaves RpcServers empty so the -// sidecar falls back to deriving witnesses from persistent_peers. -func TestConfigureStateSyncTask_NoWitnessesLeavesEmpty(t *testing.T) { +// No resolved syncers leaves RpcServers empty. In production the StateSyncReady +// gate fails closed before this task is reached when <2 syncers are configured; +// this guards the builder's nil-safety regardless. +func TestConfigureStateSyncTask_NoSyncersLeavesEmpty(t *testing.T) { node := &seiv1alpha1.SeiNode{} task := configureStateSyncTask(node) if len(task.RpcServers) != 0 { diff --git a/internal/platform/platform.go b/internal/platform/platform.go index d620b655..b97ac43f 100644 --- a/internal/platform/platform.go +++ b/internal/platform/platform.go @@ -21,8 +21,13 @@ const ( ) // Config holds infrastructure-level settings that vary per deployment -// environment. All fields are required and read from environment variables -// in main.go. See platformtest.Config() for test fixtures. +// environment. Fields are read from environment variables in main.go and are +// required unless documented otherwise — ControllerConfigFile is optional +// (state-sync is opt-in). See platformtest.Config() for test fixtures. +// +// Config is env-sourced infra; FileConfig (below) is the file-sourced +// application config. They are deliberately distinct: ControllerConfigFile is +// the path to the latter, not its contents. type Config struct { NodepoolName string NodepoolArchive string @@ -58,6 +63,30 @@ type Config struct { // CosmosExporterImage is the sei-cosmos-exporter sidecar image. // The cosmos-exporter container is attached to every SeiNode pod. CosmosExporterImage string + + // ControllerConfigFile is the path to the read-only application-config file + // the controller reads (SEI_CONTROLLER_CONFIG). It is the trust root for + // state-sync today: a GitOps-written ConfigMap mounted read-only (directory + // mount, not subPath, so atomic ConfigMap swaps propagate without a pod + // restart). Content is YAML decoded into FileConfig. + // + // The file is opt-in, so this may be empty when no node uses state-sync. + // When a node DOES enable state-sync and this is unset (or the file is + // missing, or yields <2 entries for its chain), the controller fails closed + // via StateSyncReady=False/NoSyncersConfigured rather than building a + // witness-less plan. + ControllerConfigFile string +} + +// FileConfig is the controller's file-sourced application config (SEI_CONTROLLER_CONFIG). +type FileConfig struct { + StateSync StateSyncConfig `json:"stateSync"` +} + +// StateSyncConfig is the state-sync section of the application config. +type StateSyncConfig struct { + // Syncers maps chainID -> bare host:port RPC endpoints (no scheme; sidecar adds it). + Syncers map[string][]string `json:"syncers"` } // NodepoolForMode returns the Karpenter NodePool name for the given diff --git a/manifests/sei.io_seinodes.yaml b/manifests/sei.io_seinodes.yaml index a56e9c59..eb47bf05 100644 --- a/manifests/sei.io_seinodes.yaml +++ b/manifests/sei.io_seinodes.yaml @@ -972,12 +972,23 @@ spec: type: array resolvedRPCWitnesses: description: |- - ResolvedRPCWitnesses carries the in-cluster RPC endpoints - (`-0...svc.cluster.local:26657`) of the label-resolved - peers, used as CometBFT state-sync light-client witnesses. Unlike - ResolvedPeers these never carry an external P2P address — RPC is - internal-only. When empty the sidecar derives witnesses from - persistent_peers instead. + ResolvedRPCWitnesses is DEPRECATED and no longer written. State-sync + witnesses now come from the controller-level canonical-syncer ConfigMap + (see ResolvedStateSyncers), not label-derived fleet peers. The field is + retained present-but-unwritten this release (CRD field removal is a + one-way door); remove it at the version bump. + + Deprecated: use ResolvedStateSyncers. + items: + type: string + type: array + resolvedStateSyncers: + description: |- + ResolvedStateSyncers carries the canonical state-sync RPC endpoints + (`host:port`) read from the canonical-syncer ConfigMap for this node's + chain, fed verbatim into ConfigureStateSyncTask.RpcServers. Written by the + StateSyncReady gate only when state-sync is enabled and >=2 syncers are + configured; otherwise left empty (fail closed). items: type: string type: array