diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 000000000..2688d5e37 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,135 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: release + +on: + workflow_dispatch: + inputs: + tag: + description: 'Image tag (e.g. v1.2.3-rc1). Leave blank to auto-generate from branch+SHA.' + required: false + create_release: + description: 'Create a GitHub release' + type: boolean + default: false + +permissions: + contents: write + packages: write + +jobs: + release: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Validate and resolve tag + id: tag + run: | + TAG="${{ inputs.tag }}" + if [[ -z "${TAG}" ]]; then + BRANCH="${GITHUB_REF_NAME//\//-}" + SHA="$(git rev-parse --short HEAD)" + TAG="${BRANCH}-${SHA}" + fi + if [[ "${{ inputs.create_release }}" == "true" ]]; then + if [[ ! "${TAG}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9._-]+)?$ ]]; then + echo "::error::Tag '${TAG}' must match vMAJOR.MINOR.PATCH[-prerelease] when creating a release (e.g. v1.2.3 or v1.2.3-rc1)" + exit 1 + fi + fi + echo "value=${TAG}" >> "$GITHUB_OUTPUT" + if [[ "${{ inputs.create_release }}" == "true" ]]; then + echo "tags=${TAG},latest" >> "$GITHUB_OUTPUT" + else + echo "tags=${TAG}" >> "$GITHUB_OUTPUT" + fi + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version-file: 'go.mod' + + - name: Install ko + uses: ko-build/setup-ko@v0.7 + + - name: Install Helm + uses: azure/setup-helm@v4 + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up QEMU (multi-arch) + uses: docker/setup-qemu-action@v3 + + - name: Build and push images + env: + # ghcr.io// — resolves correctly in forks + IMAGE_REPOSITORY: ghcr.io/${{ github.repository }} + IMAGE_TAGS: ${{ steps.tag.outputs.tags }} + run: | + set -o errexit -o nounset -o pipefail + + for component in ateapi atecontroller atelet ateom-gvisor podcertcontroller atenet; do + KO_DOCKER_REPO="${IMAGE_REPOSITORY}/${component}" \ + ./hack/run-tool.sh ko build \ + --tags "${IMAGE_TAGS}" \ + --platform linux/amd64,linux/arm64 \ + --bare \ + "./cmd/${component}" + done + + - name: Package and push Helm charts + if: inputs.create_release + env: + HELM_EXPERIMENTAL_OCI: "1" + CHART_REPOSITORY: oci://ghcr.io/kagent-dev/substrate/helm + run: | + set -o errexit -o nounset -o pipefail + + tag="${{ steps.tag.outputs.value }}" + chart_version="${tag#v}" + package_dir="${RUNNER_TEMP}/helm-packages" + mkdir -p "${package_dir}" + + echo "${{ secrets.GITHUB_TOKEN }}" \ + | helm registry login ghcr.io \ + --username "${{ github.actor }}" \ + --password-stdin + + helm package charts/substrate-crds \ + --destination "${package_dir}" \ + --version "${chart_version}" \ + --app-version "${tag}" + helm package charts/substrate \ + --destination "${package_dir}" \ + --version "${chart_version}" \ + --app-version "${tag}" + + helm push "${package_dir}/substrate-crds-${chart_version}.tgz" "${CHART_REPOSITORY}" + helm push "${package_dir}/substrate-${chart_version}.tgz" "${CHART_REPOSITORY}" + + - name: Create GitHub Release + if: inputs.create_release + uses: softprops/action-gh-release@v2 + with: + tag_name: ${{ steps.tag.outputs.value }} + generate_release_notes: true diff --git a/.ko.yaml b/.ko.yaml index 774665cec..a001cee43 100644 --- a/.ko.yaml +++ b/.ko.yaml @@ -21,3 +21,6 @@ defaultPlatforms: baseImageOverrides: github.com/agent-substrate/substrate/demos/sandbox: alpine github.com/agent-substrate/substrate/demos/agent-secret: alpine + +x-agentgatewayEgressBaseImageOverrides: + github.com/agent-substrate/substrate/cmd/ateom-gvisor: cr.agentgateway.dev/agentgateway:latest-dev diff --git a/Makefile b/Makefile index c6b70cc5e..770d426e7 100644 --- a/Makefile +++ b/Makefile @@ -44,10 +44,11 @@ build: build-images build-atectl .PHONY: build-images build-images: - $(KO) build --ldflags "$(LDFLAGS)" ./cmd/ateapi - $(KO) build --ldflags "$(LDFLAGS)" ./cmd/atelet - $(KO) build --ldflags "$(LDFLAGS)" ./cmd/podcertcontroller - $(KO) build --ldflags "$(LDFLAGS)" ./cmd/atenet + $(KO) build --base-import-paths --ldflags "$(LDFLAGS)" ./cmd/ateapi + $(KO) build --base-import-paths --ldflags "$(LDFLAGS)" ./cmd/atecontroller + $(KO) build --base-import-paths --ldflags "$(LDFLAGS)" ./cmd/atelet + $(KO) build --base-import-paths --ldflags "$(LDFLAGS)" ./cmd/podcertcontroller + $(KO) build --base-import-paths --ldflags "$(LDFLAGS)" ./cmd/atenet .PHONY: build-atectl build-atectl: @@ -92,3 +93,19 @@ verify: test .PHONY: clean clean: rm -rf $(BINDIR) + +# Render the substrate Helm chart into manifests/ate-install/ (mTLS mode, +# the historical default install). Run this whenever charts/substrate/ changes. +.PHONY: helm-template +helm-template: + @./hack/render-manifests.sh + +# Verify that manifests/ate-install/ matches the chart output. Used in CI. +.PHONY: verify-helm-template +verify-helm-template: + @./hack/render-manifests.sh --check + +# Verify that the CRD chart mirrors the generated CRDs. +.PHONY: verify-crd-chart +verify-crd-chart: + @./hack/verify/crd-chart.sh diff --git a/README.md b/README.md index fb63afb54..a29bdebe3 100644 --- a/README.md +++ b/README.md @@ -103,10 +103,10 @@ To quickly set up the complete environment: 2. Run the following steps: ```shell # create cluster and local registry -hack/create-kind-cluster.sh +KIND_ENABLE_PODCERT=false hack/create-kind-cluster.sh -# install ate, valkey, rustfs -hack/install-ate-kind.sh --deploy-ate-system +# install ate, valkey, rustfs using Helm in JWT mode +hack/install-ate-kind-jwt.sh # install counter demo hack/install-ate-kind.sh --deploy-demo-counter @@ -126,6 +126,21 @@ kubectl port-forward -n ate-system svc/atenet-router 8000:80 curl -X POST -H "Host: my-counter-1.actors.resources.substrate.ate.dev" -i http://localhost:8000/ ``` +#### mTLS mode + +JWT mode is the default install path and does not require pod certificate +feature gates. To test the older mTLS path, create kind with the +`ClusterTrustBundle` / `PodCertificateRequest` feature gates enabled and use the +mTLS install helper. + +```shell +# create cluster WITH podcert feature gates +hack/create-kind-cluster.sh + +# install ate using the mTLS manifests path +hack/install-ate-kind.sh --deploy-ate-system +``` + ### GKE Quickstart (Development) 1. Create and configure your environment file: diff --git a/manifests/ate-install/ate-system-namespace.yaml b/charts/substrate-crds/Chart.yaml similarity index 65% rename from manifests/ate-install/ate-system-namespace.yaml rename to charts/substrate-crds/Chart.yaml index 4fa19da0a..a69dcee0e 100644 --- a/manifests/ate-install/ate-system-namespace.yaml +++ b/charts/substrate-crds/Chart.yaml @@ -12,7 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -apiVersion: v1 -kind: Namespace -metadata: - name: ate-system \ No newline at end of file +apiVersion: v2 +name: substrate-crds +description: Agent Substrate CustomResourceDefinitions. +type: application +version: 0.1.0 +appVersion: "0.1.0" +home: https://github.com/agent-substrate/substrate +sources: +- https://github.com/agent-substrate/substrate +keywords: +- agent +- actor +- substrate +- crds diff --git a/charts/substrate-crds/README.md b/charts/substrate-crds/README.md new file mode 100644 index 000000000..12fa31f0a --- /dev/null +++ b/charts/substrate-crds/README.md @@ -0,0 +1,13 @@ +# substrate-crds + +Helm chart for installing the Agent Substrate CRDs. + +Install this chart before installing the main `substrate` chart: + +```bash +helm upgrade --install substrate-crds ./charts/substrate-crds +helm upgrade --install substrate ./charts/substrate --namespace ate-system --create-namespace +``` + +The CRD YAMLs in `templates/` mirror `manifests/ate-install/generated/`. +Run `hack/verify/crd-chart.sh` to verify they are in sync. diff --git a/charts/substrate-crds/templates/ate.dev_actortemplates.yaml b/charts/substrate-crds/templates/ate.dev_actortemplates.yaml new file mode 100644 index 000000000..e0b9a98f4 --- /dev/null +++ b/charts/substrate-crds/templates/ate.dev_actortemplates.yaml @@ -0,0 +1,680 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.1 + name: actortemplates.ate.dev +spec: + group: ate.dev + names: + kind: ActorTemplate + listKind: ActorTemplateList + plural: actortemplates + shortNames: + - actortemplate + singular: actortemplate + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec defines the desired state of ActorTemplate + properties: + containers: + description: Containers is the workload definition. + items: + description: A single application container that you want to run + within a WorkerPool. + properties: + command: + description: Entrypoint array. Not executed within a shell. + items: + type: string + maxItems: 64 + type: array + x-kubernetes-list-type: atomic + env: + description: Environment variables to set in the worker replicas. + items: + description: |- + EnvVar represents an environment variable supplied to a container in an + ActorTemplate. It models only a subset of Kubernetes Pod env behavior: + literal values are not expanded with Kubernetes-style $(VAR) references, + envFrom is not supported, and valueFrom currently supports only secretKeyRef. + properties: + name: + description: |- + Name is the name of the environment variable. May be any printable ASCII + character except '='. + minLength: 1 + pattern: ^[ -<>-~]+$ + type: string + value: + description: |- + Variable value. Mutually exclusive with ValueFrom. + Value is the literal value of the environment variable. Unlike in + Kubernetes pods, this value is not interpolated, and $(VAR) + references are not expanded. + minLength: 0 + type: string + valueFrom: + description: |- + Source for the environment variable's value. Mutually exclusive with + Value. + maxProperties: 1 + minProperties: 1 + properties: + secretKeyRef: + description: Selects a key of a Secret in the ActorTemplate's + namespace. + properties: + key: + description: Key to select within the Secret. + minLength: 1 + pattern: ^[-._a-zA-Z0-9]+$ + type: string + name: + description: Name of the referent Secret. + maxLength: 253 + type: string + x-kubernetes-validations: + - message: Name must be a valid DNS subdomain + rule: '!format.dns1123Subdomain().validate(self).hasValue()' + optional: + description: Specify whether the Secret or its + key must be defined. + type: boolean + required: + - key + - name + type: object + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: exactly one of the fields in [value valueFrom] + must be set + rule: '[has(self.value),has(self.valueFrom)].filter(x,x==true).size() + == 1' + maxItems: 32 + type: array + image: + description: Image to use for the worker replicas. + type: string + x-kubernetes-validations: + - message: All images must be pinned (changing the image invalidates + snapshots) + rule: self.contains('@') + name: + description: Name of the container. + maxLength: 63 + type: string + x-kubernetes-validations: + - message: Name must be a valid DNS label + rule: '!format.dns1123Label().validate(self).hasValue()' + required: + - image + - name + type: object + maxItems: 10 + type: array + egressPolicy: + description: |- + EgressPolicy defines the default outbound network policy for actors + created from this template. + properties: + allow: + description: Allow contains destination rules actors created from + this template may reach. + items: + properties: + credentials: + description: |- + Credentials configures explicit egress gateway credential injection for + matching outbound requests. + properties: + inject: + description: |- + Inject configures credentials that the egress gateway injects into + matching outbound requests. Values are referenced from Kubernetes Secrets; + the policy does not contain credential material. + items: + properties: + header: + description: Header is the outbound HTTP header + name to set. + type: string + valueFrom: + description: ValueFrom selects the source of the + injected credential value. + properties: + secretKeyRef: + description: SecretKeyRef selects a key in + a Kubernetes Secret. + properties: + key: + description: The key of the secret to + select from. Must be a valid secret + key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret + or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - header + - valueFrom + type: object + type: array + type: object + name: + description: Name is an optional human-readable identifier + for this rule. + type: string + ports: + description: |- + Ports is the list of destination ports matched by this rule. + If empty, the rule applies to all destination ports. + items: + properties: + port: + description: Port is the destination port number. + format: int32 + type: integer + protocol: + description: Protocol is the transport protocol for + this port. + type: string + required: + - port + type: object + type: array + tls: + description: TLS defines transport security requirements + for this destination. + properties: + intercept: + description: Intercept configures explicit TLS interception + for matching egress traffic. + properties: + issuerSecretRef: + description: |- + IssuerSecretRef references the CA material used by the egress gateway to + issue certificates for intercepted TLS traffic. + properties: + name: + description: name is unique within a namespace + to reference a secret resource. + type: string + namespace: + description: namespace defines the space within + which the secret name must be unique. + type: string + type: object + x-kubernetes-map-type: atomic + validateUpstream: + description: |- + ValidateUpstream controls whether the egress gateway validates the + upstream service certificate before proxying intercepted traffic. + type: boolean + type: object + mode: + description: Mode controls how TLS is handled for matching + egress traffic. + enum: + - Require + - Originate + - Intercept + - Disable + type: string + required: + description: Required controls whether matching egress + traffic must use TLS. + type: boolean + type: object + to: + description: To lists the destinations matched by this rule. + items: + properties: + host: + description: Host is the DNS name to match for egress + traffic. + type: string + ipBlock: + description: IPBlock is the IP range to match for + egress traffic. + properties: + cidr: + description: CIDR is an IP address range in CIDR + notation. + type: string + required: + - cidr + type: object + type: object + type: array + type: object + type: array + audit: + description: Audit configures egress logging and tracing for actors + created from this template. + properties: + logs: + description: Logs enables egress access logs for actors created + from this template. + type: boolean + redactHeaders: + description: RedactHeaders is the list of headers that must + be redacted from audit output. + items: + type: string + type: array + traces: + description: Traces enables egress tracing for actors created + from this template. + type: boolean + type: object + defaultAction: + description: DefaultAction is applied when no allow rule matches. + enum: + - Allow + - Deny + type: string + deny: + description: Deny contains destination rules actors created from + this template may not reach. + items: + properties: + credentials: + description: |- + Credentials configures explicit egress gateway credential injection for + matching outbound requests. + properties: + inject: + description: |- + Inject configures credentials that the egress gateway injects into + matching outbound requests. Values are referenced from Kubernetes Secrets; + the policy does not contain credential material. + items: + properties: + header: + description: Header is the outbound HTTP header + name to set. + type: string + valueFrom: + description: ValueFrom selects the source of the + injected credential value. + properties: + secretKeyRef: + description: SecretKeyRef selects a key in + a Kubernetes Secret. + properties: + key: + description: The key of the secret to + select from. Must be a valid secret + key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret + or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - header + - valueFrom + type: object + type: array + type: object + name: + description: Name is an optional human-readable identifier + for this rule. + type: string + ports: + description: |- + Ports is the list of destination ports matched by this rule. + If empty, the rule applies to all destination ports. + items: + properties: + port: + description: Port is the destination port number. + format: int32 + type: integer + protocol: + description: Protocol is the transport protocol for + this port. + type: string + required: + - port + type: object + type: array + tls: + description: TLS defines transport security requirements + for this destination. + properties: + intercept: + description: Intercept configures explicit TLS interception + for matching egress traffic. + properties: + issuerSecretRef: + description: |- + IssuerSecretRef references the CA material used by the egress gateway to + issue certificates for intercepted TLS traffic. + properties: + name: + description: name is unique within a namespace + to reference a secret resource. + type: string + namespace: + description: namespace defines the space within + which the secret name must be unique. + type: string + type: object + x-kubernetes-map-type: atomic + validateUpstream: + description: |- + ValidateUpstream controls whether the egress gateway validates the + upstream service certificate before proxying intercepted traffic. + type: boolean + type: object + mode: + description: Mode controls how TLS is handled for matching + egress traffic. + enum: + - Require + - Originate + - Intercept + - Disable + type: string + required: + description: Required controls whether matching egress + traffic must use TLS. + type: boolean + type: object + to: + description: To lists the destinations matched by this rule. + items: + properties: + host: + description: Host is the DNS name to match for egress + traffic. + type: string + ipBlock: + description: IPBlock is the IP range to match for + egress traffic. + properties: + cidr: + description: CIDR is an IP address range in CIDR + notation. + type: string + required: + - cidr + type: object + type: object + type: array + type: object + type: array + type: object + pauseImage: + description: |- + PauseImage is the container to use as the root sandbox container. + + Typically, set it to [1] for on-gcp, and [2] for off-gcp + + - [1] gcr.io/gke-release/pause@sha256:bcbd57ba5653580ec647b16d8163cdd1112df3609129b01f912a8032e48265da + - [2] registry.k8s.io/pause:3.10.2@sha256:f548e0e8e3dc1896ca956272154dde3314e8cc4fde0a57577ee9fa1c63f5baf4 + type: string + x-kubernetes-validations: + - message: All images must be pinned (changing the image invalidates + snapshots) + rule: self.contains('@') + runsc: + description: Parameters for fetching the runsc binary to use. + properties: + amd64: + description: Configuration for the amd64 binary. + properties: + sha256Hash: + description: |- + The SHA256 hash of the binary to download. Used both to name the + downloaded file (for preventing conflicts), and to check the integrity of + the downloaded file. + pattern: ^[a-z0-9]+$ + type: string + url: + description: | + A gs:// URL pointing to a runsc binary that can be downloaded (possibly + with atelet's credentials). + minLength: 1 + type: string + required: + - sha256Hash + - url + type: object + arm64: + description: Configuration for the arm64 binary. + properties: + sha256Hash: + description: |- + The SHA256 hash of the binary to download. Used both to name the + downloaded file (for preventing conflicts), and to check the integrity of + the downloaded file. + pattern: ^[a-z0-9]+$ + type: string + url: + description: | + A gs:// URL pointing to a runsc binary that can be downloaded (possibly + with atelet's credentials). + minLength: 1 + type: string + required: + - sha256Hash + - url + type: object + authentication: + description: How should atelet authenticate to download the runsc + binary? + properties: + gcp: + description: Use GCP application-default credentials. + type: object + type: object + type: object + snapshotsConfig: + description: Snapshots configuration for the actor. + properties: + location: + description: Location to store snapshots in. + minLength: 1 + type: string + required: + - location + type: object + workerPoolRef: + description: | + Name of the worker pool to use for the actor. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic + required: + - pauseImage + - runsc + - snapshotsConfig + - workerPoolRef + type: object + status: + description: status is the observed state of ActorTemplate + properties: + conditions: + description: conditions defines the status conditions array + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + goldenActorID: + type: string + goldenSnapshot: + type: string + phase: + description: Phase of the actor template. + type: string + takeGoldenSnapshotAt: + format: date-time + type: string + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/charts/substrate-crds/templates/ate.dev_workerpools.yaml b/charts/substrate-crds/templates/ate.dev_workerpools.yaml new file mode 100644 index 000000000..3e2387802 --- /dev/null +++ b/charts/substrate-crds/templates/ate.dev_workerpools.yaml @@ -0,0 +1,99 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.1 + name: workerpools.ate.dev +spec: + group: ate.dev + names: + kind: WorkerPool + listKind: WorkerPoolList + plural: workerpools + shortNames: + - workerpool + singular: workerpool + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.replicas + name: Desired + type: integer + - jsonPath: .status.replicas + name: Replicas + type: integer + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: WorkerPool is the Schema for the workerpools API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec defines the desired state of WorkerPool + properties: + ateomImage: + description: AteomImage is the ateom container image to deploy as + workers. + minLength: 1 + type: string + replicas: + description: Replicas is the number of worker pods to run. + format: int32 + minimum: 0 + type: integer + required: + - ateomImage + - replicas + type: object + status: + description: status is the observed state of WorkerPool + properties: + replicas: + description: Replicas is the total number of worker pods. + format: int32 + minimum: 0 + type: integer + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + scale: + specReplicasPath: .spec.replicas + statusReplicasPath: .status.replicas + status: {} diff --git a/charts/substrate/Chart.yaml b/charts/substrate/Chart.yaml new file mode 100644 index 000000000..52bd74800 --- /dev/null +++ b/charts/substrate/Chart.yaml @@ -0,0 +1,27 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v2 +name: substrate +description: Agent Substrate — actor runtime, control plane, and data-plane router. +type: application +version: 0.1.0 +appVersion: "0.1.0" +home: https://github.com/agent-substrate/substrate +sources: +- https://github.com/agent-substrate/substrate +keywords: +- agent +- actor +- substrate diff --git a/charts/substrate/README.md b/charts/substrate/README.md new file mode 100644 index 000000000..2fe9bbbbb --- /dev/null +++ b/charts/substrate/README.md @@ -0,0 +1,71 @@ +# substrate + +Helm chart for installing Agent Substrate. + +## Install modes + +| Mode | Default? | Cluster requirements | Trade-off | +|------|----------|----------------------|-----------| +| `jwt` | yes | none beyond stock K8s | Server certs and session signing pools are generated by the chart; clients authenticate via projected ServiceAccount tokens. Valkey runs plaintext intra-cluster. | +| `mtls` | | feature gates `ClusterTrustBundle`, `ClusterTrustBundleProjection`, `PodCertificateRequest` + `certificates.k8s.io/v1beta1` API | Full in-cluster mTLS via the bundled `podcertcontroller`. | + +```bash +# CRDs +helm upgrade --install substrate-crds ./charts/substrate-crds + +# JWT mode (default; no off-by-default feature gates) +helm upgrade --install substrate ./charts/substrate + +# mTLS mode (requires off-by-default feature gates) +helm upgrade --install substrate ./charts/substrate \ + --set auth.mode=mtls +``` + +By default, component images are pulled from `ghcr.io/kagent-dev/substrate` +using the chart `appVersion` as the tag. Override `image.registry` and +`image.tag` to install from a different image repository or tag. + +## JWT-mode bootstrap + +JWT mode is standalone by default. The chart generates: + +- `Secret/ateapi-tls` +- `ConfigMap/ateapi-ca` +- `Secret/session-id-jwt-pool` +- `Secret/session-id-ca-pool` + +Existing generated data is reused on upgrade so key material does not rotate +during normal chart upgrades. Set `auth.jwt.bootstrap.enabled=false` to bring +your own resources with those names. + +## Render manifests without applying + +```bash +helm template substrate ./charts/substrate # jwt +helm template substrate ./charts/substrate --set auth.mode=mtls +``` + +`manifests/ate-install/` in the repo is the rendered mTLS output and is +regenerated by `make helm-template`. The separate `substrate-crds` chart +mirrors `manifests/ate-install/generated/`. + +## Values + +See `values.yaml` for the full set; the important keys: + +| Key | Default | Notes | +|-----|---------|-------| +| `auth.mode` | `jwt` | `jwt` or `mtls` | +| `auth.jwt.issuer` | `https://kubernetes.default.svc.cluster.local` | Override for managed clusters with provider-specific issuers | +| `auth.jwt.audience` | `api.ate-system.svc` | SA token audience | +| `auth.jwt.bootstrap.enabled` | `true` | Generate JWT TLS and session signing material | +| `auth.jwt.serverCertSecret` | `ateapi-tls` | Secret name | +| `auth.jwt.caBundleConfigMap` | `ateapi-ca` | ConfigMap name | +| `valkey.enabled` | `true` | Set false if you bring your own Redis/Valkey | +| `valkey.replicas` | `6` | StatefulSet size | +| `rustfs.enabled` | `true` | Deploy an in-cluster S3-compatible RustFS bucket for snapshots | +| `atelet.storageBackend` | `s3` | Default snapshot backend, wired to RustFS when `rustfs.enabled=true` | +| `redis.clusterAddress` | `""` (in-cluster) | Override to use external Redis | +| `redis.useIAMAuth` | `false` | Google IAM auth | +| `atelet.gcpAuthForImagePulls` | `false` | Enable only when using GCP registry auth | +| `otel.endpoint` | `""` | Set to an OTLP endpoint to export traces/metrics | diff --git a/charts/substrate/templates/NOTES.txt b/charts/substrate/templates/NOTES.txt new file mode 100644 index 000000000..f736c32f7 --- /dev/null +++ b/charts/substrate/templates/NOTES.txt @@ -0,0 +1,21 @@ +substrate {{ .Chart.AppVersion }} installed in mode: {{ .Values.auth.mode }} + +{{ if eq .Values.auth.mode "mtls" -}} +NOTE: mtls mode REQUIRES the following Kubernetes feature gates to be enabled: + - ClusterTrustBundle + - ClusterTrustBundleProjection + - PodCertificateRequest +plus the v1beta1 certificates API. On vanilla clusters (kind, EKS, etc.) you +must enable these explicitly. To install without them, pick auth.mode=jwt. +{{- else }} +JWT mode is active. + +{{- if .Values.auth.jwt.bootstrap.enabled }} +JWT bootstrap resources are managed by this chart. Existing key material is +reused on upgrade. +{{- else }} +JWT bootstrap is disabled. Provide {{ .Values.auth.jwt.serverCertSecret }}, +{{ .Values.auth.jwt.caBundleConfigMap }}, session-id-jwt-pool, and +session-id-ca-pool before pods become healthy. +{{- end }} +{{- end }} diff --git a/charts/substrate/templates/_helpers.tpl b/charts/substrate/templates/_helpers.tpl new file mode 100644 index 000000000..2f8ecc8e6 --- /dev/null +++ b/charts/substrate/templates/_helpers.tpl @@ -0,0 +1,80 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{/* +Qualified resource name for a chart component. + +Usage: + {{ include "substrate.fullname" (list "ate-api-server" .) }} + +When the release name equals the chart name (the canonical render in +hack/render-manifests.sh — `helm template substrate charts/substrate`), this +returns the bare component name, so the generated manifests/ate-install/ +files keep their historical names ("ate-api-server", "ate-controller", ...). + +Otherwise resources are prefixed with the release name in the standard Helm +style ("foo-ate-api-server", ...) so multiple releases coexist without +colliding. +*/}} +{{- define "substrate.fullname" -}} +{{- $name := index . 0 -}} +{{- $ctx := index . 1 -}} +{{- if eq $ctx.Release.Name $ctx.Chart.Name -}} +{{- $name -}} +{{- else -}} +{{- printf "%s-%s" $ctx.Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{/* +Build an image reference for a substrate component binary. + +Usage: + {{ include "substrate.componentImage" (list "ateapi" .) }} + +Produces {image.registry}/{name}:{tag} where tag is resolved as: + 1. image.tag value, if set and not the sentinel "" + 2. .Chart.AppVersion, if image.tag is empty + 3. no tag (no colon) when image.tag is the sentinel "" + +The "" sentinel is used by hack/render-manifests.sh so that ko:// refs +are emitted without a tag, letting `ko resolve` supply the digest at build time. +*/}} +{{- define "substrate.componentImage" -}} +{{- $name := index . 0 -}} +{{- $ctx := index . 1 -}} +{{- $registry := $ctx.Values.image.registry -}} +{{- $tag := $ctx.Values.image.tag | default $ctx.Chart.AppVersion -}} +{{- if ne $tag "" -}} +{{- printf "%s/%s:%s" $registry $name $tag -}} +{{- else -}} +{{- printf "%s/%s" $registry $name -}} +{{- end -}} +{{- end -}} + +{{/* +Validate auth.mode at template time. +*/}} +{{- define "substrate.validateAuthMode" -}} +{{- if not (or (eq .Values.auth.mode "mtls") (eq .Values.auth.mode "jwt")) -}} +{{- fail (printf "auth.mode must be 'mtls' or 'jwt', got %q" .Values.auth.mode) -}} +{{- end -}} +{{- if eq .Values.auth.mode "jwt" -}} +{{- if not .Values.auth.jwt.issuer -}} +{{- fail "auth.jwt.issuer is required when auth.mode=jwt" -}} +{{- end -}} +{{- end -}} +{{- end -}} diff --git a/charts/substrate/templates/ate-api-server-envvars.yaml b/charts/substrate/templates/ate-api-server-envvars.yaml new file mode 100644 index 000000000..d294a8046 --- /dev/null +++ b/charts/substrate/templates/ate-api-server-envvars.yaml @@ -0,0 +1,27 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Values.ateApiServerEnvVarsConfigMap }} + namespace: {{ .Release.Namespace }} +data: + ATE_API_REDIS_ADDRESS: {{ .Values.redis.clusterAddress | default (printf "valkey-cluster.%s.svc:6379" .Release.Namespace) | quote }} + ATE_API_REDIS_USE_IAM_AUTH: {{ .Values.redis.useIAMAuth | toString | quote }} + ATE_API_REDIS_TLS_SERVER_NAME: {{ .Values.redis.tlsServerName | quote }} + ATE_API_REDIS_CLIENT_CERT: {{ .Values.redis.clientCert | default "" | quote }} + ATE_API_K8SJWT_ISSUER: {{ .Values.auth.jwt.issuer | quote }} diff --git a/charts/substrate/templates/ate-api-server.yaml b/charts/substrate/templates/ate-api-server.yaml new file mode 100644 index 000000000..fcf5ccc7e --- /dev/null +++ b/charts/substrate/templates/ate-api-server.yaml @@ -0,0 +1,232 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "substrate.fullname" (list "ate-api-server-role" .) }} +rules: +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "watch", "list"] +- apiGroups: ["ate.dev"] + resources: ["actortemplates"] + verbs: ["get", "watch", "list"] +# Secret reads for env source resolution are intentionally NOT granted +# cluster-wide here. Each demo / tenant is responsible for granting +# ate-api-server read access only to the specific Secrets referenced by its +# ActorTemplates (e.g. via a namespace-scoped Role + RoleBinding using +# resourceNames). +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "substrate.fullname" (list "ate-api-server" .) }} + namespace: {{ .Release.Namespace }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "ate-api-server-binding" .) }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "ate-api-server" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "substrate.fullname" (list "ate-api-server-role" .) }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "substrate.fullname" (list "ate-api-server-deployment" .) }} + namespace: {{ .Release.Namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: ate-api-server + template: + metadata: + labels: + app: ate-api-server + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + spec: + serviceAccountName: {{ include "substrate.fullname" (list "ate-api-server" .) }} +{{- if eq .Values.auth.mode "jwt" }} + initContainers: + - name: assemble-cred-bundle + image: {{ .Values.images.busybox }} + command: + - sh + - -c + - cat /run/ateapi-tls-src/tls.crt /run/ateapi-tls-src/tls.key > /run/ateapi-tls/credential-bundle.pem + volumeMounts: + - { name: ateapi-tls-src, mountPath: /run/ateapi-tls-src, readOnly: true } + - { name: ateapi-tls, mountPath: /run/ateapi-tls } +{{- end }} + containers: + - name: ate-api-server + image: {{ include "substrate.componentImage" (list "ateapi" .) }} + args: + - "--grpc-listen-addr=0.0.0.0:443" +{{- if eq .Values.auth.mode "mtls" }} + - "--grpc-server-cred-bundle=/run/servicedns.podcert.ate.dev/credential-bundle.pem" + - "--redis-cluster-address=@env" + - "--redis-ca-certs=/etc/valkey-ca/ca.crt" + - "--redis-use-iam-auth=@env" + - "--redis-tls-server-name=@env" + - "--redis-client-cert=@env" + - "--client-jwt-issuer=@env" + - "--client-jwt-audience={{ .Values.auth.jwt.audience }}" + - "--session-id-jwt-pool=/run/session-id-jwt-pool/pool.json" + - "--session-id-ca-pool=/run/session-id-ca-pool/pool.json" + - "--workerpool-ca-certs=/run/workerpool-ca-certs/trust-bundle.pem" +{{- else }} + - "--grpc-server-cred-bundle=/run/ateapi-tls/credential-bundle.pem" + - "--auth-mode=jwt" + - "--redis-cluster-address=@env" + - "--redis-no-tls=true" + - "--redis-use-iam-auth=@env" + - "--client-jwt-issuer={{ .Values.auth.jwt.issuer }}" + - "--client-jwt-audience={{ .Values.auth.jwt.audience }}" + - "--session-id-jwt-pool=/run/session-id-jwt-pool/pool.json" + - "--session-id-ca-pool=/run/session-id-ca-pool/pool.json" + - "--client-jwt-ca-cert=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" +{{- end }} + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: OTEL_RESOURCE_ATTRIBUTES + value: k8s.namespace.name=$(POD_NAMESPACE),k8s.pod.name=$(POD_NAME),k8s.pod.uid=$(POD_UID),service.instance.id=$(POD_UID) +{{- if .Values.otel.endpoint }} + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: {{ .Values.otel.endpoint | quote }} +{{- end }} + envFrom: + - configMapRef: + name: {{ .Values.ateApiServerEnvVarsConfigMap }} + optional: true + volumeMounts: +{{- if eq .Values.auth.mode "mtls" }} + - { name: servicedns, mountPath: /run/servicedns.podcert.ate.dev } + - { name: session-id-jwt-pool, mountPath: /run/session-id-jwt-pool } + - { name: valkey-ca-certs, mountPath: /etc/valkey-ca, readOnly: true } + - { name: session-id-ca-pool, mountPath: /run/session-id-ca-pool, readOnly: true } + - { name: workerpool-ca-certs, mountPath: /run/workerpool-ca-certs, readOnly: true } +{{- else }} + - { name: ateapi-tls, mountPath: /run/ateapi-tls, readOnly: true } + - { name: session-id-jwt-pool, mountPath: /run/session-id-jwt-pool } + - { name: session-id-ca-pool, mountPath: /run/session-id-ca-pool, readOnly: true } +{{- end }} + ports: + - containerPort: 443 + - name: prometheus + containerPort: 9090 + readinessProbe: + httpGet: + path: /readyz + port: 9090 + initialDelaySeconds: 5 + periodSeconds: 2 + volumes: +{{- if eq .Values.auth.mode "mtls" }} + - name: servicedns + projected: + sources: + - podCertificate: + signerName: servicedns.podcert.ate.dev/identity + keyType: ECDSAP256 + credentialBundlePath: credential-bundle.pem + - name: session-id-jwt-pool + projected: + sources: + - secret: + name: session-id-jwt-pool + items: + - { key: pool, path: pool.json } + - name: valkey-ca-certs + projected: + sources: + - secret: + name: valkey-ca-certs + items: + - { key: ca.crt, path: ca.crt } + - name: session-id-ca-pool + projected: + sources: + - secret: + name: session-id-ca-pool + items: + - { key: pool, path: pool.json } + - name: workerpool-ca-certs + projected: + sources: + - clusterTrustBundle: + signerName: podidentity.podcert.ate.dev/identity + labelSelector: + matchLabels: + podcert.ate.dev/canarying: live + path: trust-bundle.pem +{{- else }} + - name: ateapi-tls-src + secret: + secretName: {{ .Values.auth.jwt.serverCertSecret }} + - name: ateapi-tls + emptyDir: {} + - name: session-id-jwt-pool + projected: + sources: + - secret: + name: session-id-jwt-pool + items: + - { key: pool, path: pool.json } + - name: session-id-ca-pool + projected: + sources: + - secret: + name: session-id-ca-pool + items: + - { key: pool, path: pool.json } +{{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "substrate.fullname" (list "api" .) }} + namespace: {{ .Release.Namespace }} +spec: + type: ClusterIP + selector: + app: ate-api-server + ports: + - name: grpc + protocol: TCP + port: 443 + targetPort: 443 diff --git a/charts/substrate/templates/ate-controller.yaml b/charts/substrate/templates/ate-controller.yaml new file mode 100644 index 000000000..5c403837c --- /dev/null +++ b/charts/substrate/templates/ate-controller.yaml @@ -0,0 +1,102 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "substrate.fullname" (list "ate-controller" .) }} + namespace: {{ .Release.Namespace }} + labels: + apps: ate-controller +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "ate-controller" .) }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "ate-controller" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "substrate.fullname" (list "ate-controller" .) }} + apiGroup: rbac.authorization.k8s.io +--- +kind: Service +apiVersion: v1 +metadata: + name: {{ include "substrate.fullname" (list "ate-controller" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: ate-controller +spec: + selector: + app: ate-controller + ports: + - name: metrics + port: 8080 + targetPort: metrics + protocol: TCP +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: {{ include "substrate.fullname" (list "ate-controller" .) }} + namespace: {{ .Release.Namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: ate-controller + template: + metadata: + labels: + app: ate-controller + spec: + serviceAccountName: {{ include "substrate.fullname" (list "ate-controller" .) }} + containers: + - name: ate-controller + image: {{ include "substrate.componentImage" (list "atecontroller" .) }} +{{- if eq .Values.auth.mode "jwt" }} + args: + - "--ateapi-auth=jwt" + - "--ateapi-ca-file=/run/ateapi-ca/ca.crt" + - "--ateapi-server-name={{ include "substrate.fullname" (list "api" .) }}.{{ .Release.Namespace }}.svc" + - "--ateapi-token-file=/var/run/secrets/tokens/ateapi/token" +{{- end }} + ports: + - name: metrics + containerPort: 8080 + protocol: TCP + - name: healthz + containerPort: 8081 + protocol: TCP +{{- if eq .Values.auth.mode "jwt" }} + volumeMounts: + - { name: ateapi-ca, mountPath: /run/ateapi-ca, readOnly: true } + - { name: ateapi-token, mountPath: /var/run/secrets/tokens/ateapi, readOnly: true } + volumes: + - name: ateapi-ca + configMap: + name: {{ .Values.auth.jwt.caBundleConfigMap }} + - name: ateapi-token + projected: + sources: + - serviceAccountToken: + audience: {{ .Values.auth.jwt.audience }} + expirationSeconds: 3600 + path: token +{{- end }} diff --git a/charts/substrate/templates/atelet.yaml b/charts/substrate/templates/atelet.yaml new file mode 100644 index 000000000..10a13f505 --- /dev/null +++ b/charts/substrate/templates/atelet.yaml @@ -0,0 +1,117 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +# atelet — identical across auth modes (does not dial ateapi). +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "substrate.fullname" (list "atelet" .) }} + namespace: {{ .Release.Namespace }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "substrate.fullname" (list "atelet-role" .) }} +rules: +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "watch", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "atelet-binding" .) }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "atelet" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "substrate.fullname" (list "atelet-role" .) }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "substrate.fullname" (list "atelet" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: atelet +spec: + selector: + matchLabels: + app: atelet + template: + metadata: + labels: + app: atelet + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + spec: + serviceAccountName: {{ include "substrate.fullname" (list "atelet" .) }} + containers: + - name: atelet + image: {{ include "substrate.componentImage" (list "atelet" .) }} + args: + - --gcp-auth-for-image-pulls={{ .Values.atelet.gcpAuthForImagePulls }} +{{- with .Values.atelet.extraArgs }} +{{ toYaml . | indent 8 }} +{{- end }} + securityContext: + privileged: true + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName +{{- if .Values.otel.endpoint }} + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: {{ .Values.otel.endpoint | quote }} +{{- end }} + - name: ATE_STORAGE_BACKEND + value: {{ .Values.atelet.storageBackend | quote }} +{{- if .Values.rustfs.enabled }} + - name: AWS_REGION + value: us-east-1 + - name: AWS_ENDPOINT_URL + value: http://{{ include "substrate.fullname" (list "rustfs" .) }}.{{ .Release.Namespace }}.svc:9000 + - name: AWS_S3_USE_PATH_STYLE + value: "true" + - name: AWS_ACCESS_KEY_ID + value: {{ .Values.rustfs.accessKey | quote }} + - name: AWS_SECRET_ACCESS_KEY + value: {{ .Values.rustfs.secretKey | quote }} +{{- end }} +{{- with .Values.atelet.extraEnv }} +{{ toYaml . | indent 8 }} +{{- end }} + ports: + - name: grpc + containerPort: 8085 + hostPort: 8085 + - name: prometheus + containerPort: 9090 + hostPort: 9090 + protocol: TCP + volumeMounts: + - name: run-ateom + mountPath: /var/lib/ateom-gvisor + volumes: + - name: run-ateom + hostPath: + path: /var/lib/ateom-gvisor + type: DirectoryOrCreate diff --git a/charts/substrate/templates/atenet-dns.yaml b/charts/substrate/templates/atenet-dns.yaml new file mode 100644 index 000000000..0838d2c0b --- /dev/null +++ b/charts/substrate/templates/atenet-dns.yaml @@ -0,0 +1,177 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +# atenet-dns — identical across auth modes (does not dial ateapi). +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: dns +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: {{ .Release.Namespace }} +rules: +- apiGroups: [""] + resources: ["services"] + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "list", "watch", "create", "update", "patch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: {{ .Release.Namespace }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: Role + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: kube-system +rules: +- apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "list", "watch", "create", "update", "patch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: kube-system +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: Role + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "substrate.fullname" (list "dns" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: dns +spec: + replicas: 1 + selector: + matchLabels: + app: dns + template: + metadata: + labels: + app: dns + spec: + serviceAccountName: {{ include "substrate.fullname" (list "atenet-dns" .) }} + shareProcessNamespace: true + initContainers: + - name: init-dns + image: {{ .Values.images.busybox }} + command: ["sh", "-c"] + args: + - | + cat <<'EOF' > /etc/coredns/Corefile + .:53 { + errors + health :8080 + ready :8181 + reload + } + EOF + volumeMounts: + - name: dns-config-volume + mountPath: /etc/coredns + containers: + - name: coredns + image: {{ .Values.images.coredns }} + imagePullPolicy: IfNotPresent + args: [ "-conf", "/etc/coredns/Corefile" ] + volumeMounts: + - name: dns-config-volume + mountPath: /etc/coredns + ports: + - name: dns + containerPort: 53 + protocol: UDP + - name: dns-tcp + containerPort: 53 + protocol: TCP + livenessProbe: + httpGet: + path: /health + port: 8080 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 5 + readinessProbe: + httpGet: + path: /ready + port: 8181 + scheme: HTTP + initialDelaySeconds: 5 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + - name: dns-controller + image: {{ include "substrate.componentImage" (list "atenet" .) }} + args: + - "dns" + - "--log-level=debug" + - "--interval=10s" + - "--corefile-path=/etc/coredns/Corefile" + volumeMounts: + - name: dns-config-volume + mountPath: /etc/coredns + volumes: + - name: dns-config-volume + emptyDir: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "substrate.fullname" (list "dns" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: dns +spec: + selector: + app: dns + type: ClusterIP + ports: + - name: dns + port: 53 + protocol: UDP + - name: dns-tcp + port: 53 + protocol: TCP diff --git a/charts/substrate/templates/atenet-router.yaml b/charts/substrate/templates/atenet-router.yaml new file mode 100644 index 000000000..01536184a --- /dev/null +++ b/charts/substrate/templates/atenet-router.yaml @@ -0,0 +1,270 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "substrate.fullname" (list "atenet-router" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: atenet-router +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "substrate.fullname" (list "atenet-router" .) }} +rules: +- apiGroups: + - "ate.dev" + resources: + - actortemplates + verbs: + - get + - watch + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "atenet-router" .) }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "atenet-router" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "substrate.fullname" (list "atenet-router" .) }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "substrate.fullname" (list "atenet-router-agentgateway-config" .) }} + namespace: {{ .Release.Namespace }} +data: + config.yaml: | + # yaml-language-server: $schema=https://agentgateway.dev/schema/config + config: + adminAddr: "127.0.0.1:15000" + readinessAddr: "0.0.0.0:15021" + statsAddr: "0.0.0.0:15020" + binds: + - port: 8080 + listeners: + - name: http + protocol: HTTP + routes: + - name: substrate-http + matches: + - path: + pathPrefix: / + policies: + extProc: + host: "127.0.0.1:50051" + failureMode: failClosed + processingOptions: + requestBodyMode: none + responseBodyMode: none + requestHeaderMode: send + responseHeaderMode: skip + requestTrailerMode: skip + responseTrailerMode: skip + backends: + - dynamic: {} + - port: 8443 + listeners: + - name: https + protocol: HTTPS + tls: +{{ if eq .Values.auth.mode "mtls" }} + cert: "/run/servicedns.podcert.ate.dev/cert.pem" + key: "/run/servicedns.podcert.ate.dev/key.pem" +{{ else }} + cert: "/run/agentgateway-tls/tls.crt" + key: "/run/agentgateway-tls/tls.key" +{{ end }} + routes: + - name: substrate-https + matches: + - path: + pathPrefix: / + policies: + extProc: + host: "127.0.0.1:50051" + failureMode: failClosed + processingOptions: + requestBodyMode: none + responseBodyMode: none + requestHeaderMode: send + responseHeaderMode: skip + requestTrailerMode: skip + responseTrailerMode: skip + backends: + - dynamic: {} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "substrate.fullname" (list "atenet-router" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: atenet-router +spec: + replicas: 1 + selector: + matchLabels: + app: atenet-router + template: + metadata: + labels: + app: atenet-router + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + spec: + serviceAccountName: {{ include "substrate.fullname" (list "atenet-router" .) }} + containers: + - name: atenet-router + image: {{ include "substrate.componentImage" (list "atenet" .) }} + args: + - "router" + - "--standalone" + - "--networking-mode=agentgateway" + - "--namespace={{ .Release.Namespace }}" + - "--port-http=8080" + - "--port-extproc=50051" + - "--extproc-address=127.0.0.1" + - "--ateapi-address={{ include "substrate.fullname" (list "api" .) }}.{{ .Release.Namespace }}.svc:443" +{{- if eq .Values.auth.mode "jwt" }} + - "--ateapi-auth=jwt" + - "--ateapi-ca-file=/run/ateapi-ca/ca.crt" + - "--ateapi-server-name={{ include "substrate.fullname" (list "api" .) }}.{{ .Release.Namespace }}.svc" + - "--ateapi-token-file=/var/run/secrets/tokens/ateapi/token" +{{- end }} + - "--status-port=4040" + - "--port-https=8443" + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: OTEL_RESOURCE_ATTRIBUTES + value: k8s.namespace.name=$(POD_NAMESPACE),k8s.pod.name=$(POD_NAME),k8s.pod.uid=$(POD_UID),service.instance.id=$(POD_UID) +{{- if .Values.otel.endpoint }} + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: {{ .Values.otel.endpoint | quote }} +{{- end }} + ports: + - name: extproc + containerPort: 50051 + - name: status + containerPort: 4040 + - name: metrics + containerPort: 9090 +{{- if eq .Values.auth.mode "jwt" }} + volumeMounts: + - name: ateapi-ca + mountPath: /run/ateapi-ca + readOnly: true + - name: ateapi-token + mountPath: /var/run/secrets/tokens/ateapi + readOnly: true +{{- end }} + - name: agentgateway + image: {{ .Values.images.agentgateway }} + args: + - "-f" + - "/etc/agentgateway/config.yaml" + ports: + - name: http + containerPort: 8080 + - name: https + containerPort: 8443 + - name: readiness + containerPort: 15021 + - name: gw-metrics + containerPort: 15020 + volumeMounts: + - name: agentgateway-config + mountPath: /etc/agentgateway +{{- if eq .Values.auth.mode "mtls" }} + - name: "servicedns" + mountPath: "/run/servicedns.podcert.ate.dev" +{{- else }} + - name: agentgateway-tls + mountPath: /run/agentgateway-tls + readOnly: true +{{- end }} + readinessProbe: + httpGet: + path: /healthz/ready + port: readiness + periodSeconds: 10 + volumes: + - name: agentgateway-config + configMap: + name: {{ include "substrate.fullname" (list "atenet-router-agentgateway-config" .) }} +{{- if eq .Values.auth.mode "mtls" }} + - name: "servicedns" + projected: + sources: + - podCertificate: + signerName: servicedns.podcert.ate.dev/identity + keyType: ECDSAP256 + certificateChainPath: cert.pem + keyPath: key.pem +{{- else }} + - name: agentgateway-tls + secret: + secretName: {{ .Values.auth.jwt.serverCertSecret }} + - name: ateapi-ca + configMap: + name: {{ .Values.auth.jwt.caBundleConfigMap }} + - name: ateapi-token + projected: + sources: + - serviceAccountToken: + audience: {{ .Values.auth.jwt.audience }} + expirationSeconds: 3600 + path: token +{{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "substrate.fullname" (list "atenet-router" .) }} + namespace: {{ .Release.Namespace }} +spec: + type: ClusterIP + selector: + app: atenet-router + ports: + - name: http + port: 80 + targetPort: 8080 + protocol: TCP + - name: https + port: 443 + targetPort: 8443 + protocol: TCP diff --git a/charts/substrate/templates/jwt-bootstrap.yaml b/charts/substrate/templates/jwt-bootstrap.yaml new file mode 100644 index 000000000..e3299fe08 --- /dev/null +++ b/charts/substrate/templates/jwt-bootstrap.yaml @@ -0,0 +1,73 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if and (eq .Values.auth.mode "jwt") .Values.auth.jwt.bootstrap.enabled }} +{{- $apiName := include "substrate.fullname" (list "api" .) }} +{{- $routerName := include "substrate.fullname" (list "atenet-router" .) }} +{{- $apiHost := printf "%s.%s.svc" $apiName .Release.Namespace }} +{{- $ca := genCA (printf "%s-ca" $apiName) 3650 }} +{{- $serverCert := genSignedCert $apiHost nil (list $apiHost (printf "%s.%s.svc.cluster.local" $apiName .Release.Namespace) (printf "%s.%s.svc" $routerName .Release.Namespace)) 365 $ca }} +{{- $sessionJWTKey := genPrivateKey "ecdsa" }} +{{- $sessionCA := genCA "session-id-ca" 3650 }} +{{- if .Values.auth.jwt.bootstrap.serverCert.enabled }} +{{- $existingTLS := lookup "v1" "Secret" .Release.Namespace .Values.auth.jwt.serverCertSecret }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Values.auth.jwt.serverCertSecret }} + namespace: {{ .Release.Namespace }} +type: kubernetes.io/tls +data: + tls.crt: {{ if $existingTLS }}{{ index $existingTLS.data "tls.crt" }}{{ else }}{{ $serverCert.Cert | b64enc }}{{ end }} + tls.key: {{ if $existingTLS }}{{ index $existingTLS.data "tls.key" }}{{ else }}{{ $serverCert.Key | b64enc }}{{ end }} +--- +{{- $existingCA := lookup "v1" "ConfigMap" .Release.Namespace .Values.auth.jwt.caBundleConfigMap }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Values.auth.jwt.caBundleConfigMap }} + namespace: {{ .Release.Namespace }} +data: + ca.crt: | +{{- if $existingCA }} +{{ index $existingCA.data "ca.crt" | nindent 4 }} +{{- else }} +{{ $ca.Cert | nindent 4 }} +{{- end }} +{{- end }} +{{- if .Values.auth.jwt.bootstrap.sessionPools.enabled }} +--- +{{- $existingJWTSecret := lookup "v1" "Secret" .Release.Namespace "session-id-jwt-pool" }} +apiVersion: v1 +kind: Secret +metadata: + name: session-id-jwt-pool + namespace: {{ .Release.Namespace }} +type: Opaque +data: + pool: {{ if $existingJWTSecret }}{{ index $existingJWTSecret.data "pool" }}{{ else }}{{ dict "Authorities" (list (dict "ID" "1" "Algorithm" "ES256" "SigningKeyPEM" $sessionJWTKey)) | toJson | b64enc }}{{ end }} +--- +{{- $existingCASecret := lookup "v1" "Secret" .Release.Namespace "session-id-ca-pool" }} +apiVersion: v1 +kind: Secret +metadata: + name: session-id-ca-pool + namespace: {{ .Release.Namespace }} +type: Opaque +data: + pool: {{ if $existingCASecret }}{{ index $existingCASecret.data "pool" }}{{ else }}{{ dict "CAs" (list (dict "ID" "1" "SigningKeyPEM" $sessionCA.Key "RootCertificatePEM" $sessionCA.Cert)) | toJson | b64enc }}{{ end }} +{{- end }} +{{- end }} diff --git a/charts/substrate/templates/jwt-oidc-rbac.yaml b/charts/substrate/templates/jwt-oidc-rbac.yaml new file mode 100644 index 000000000..a9fd499e9 --- /dev/null +++ b/charts/substrate/templates/jwt-oidc-rbac.yaml @@ -0,0 +1,42 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if eq .Values.auth.mode "jwt" }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "substrate.fullname" (list "oidc-discovery-viewer" .) }} +rules: +- nonResourceURLs: + - /.well-known/openid-configuration + - /openid/v1/jwks + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "oidc-discovery-viewer" .) }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "ate-api-server" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "substrate.fullname" (list "oidc-discovery-viewer" .) }} + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/charts/substrate/templates/namespace.yaml b/charts/substrate/templates/namespace.yaml new file mode 100644 index 000000000..63401c00d --- /dev/null +++ b/charts/substrate/templates/namespace.yaml @@ -0,0 +1,23 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- include "substrate.validateAuthMode" . -}} +{{- if .Values.createNamespace }} +apiVersion: v1 +kind: Namespace +metadata: + name: {{ .Release.Namespace }} +{{- end }} diff --git a/charts/substrate/templates/pod-certificate-controller.yaml b/charts/substrate/templates/pod-certificate-controller.yaml new file mode 100644 index 000000000..3aaaa9df9 --- /dev/null +++ b/charts/substrate/templates/pod-certificate-controller.yaml @@ -0,0 +1,200 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if eq .Values.auth.mode "mtls" -}} +apiVersion: v1 +kind: Namespace +metadata: + name: podcertificate-controller-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "substrate.fullname" (list "podcert-ate-dev-signer" .) }} +rules: +# The service signer needs to be able to read services and pods. +- apiGroups: + - "" + resources: + - services + - pods + verbs: + - get + - list + - watch +- apiGroups: + - certificates.k8s.io + resources: + - podcertificaterequests + verbs: + - get + - list + - watch + - update +- apiGroups: + - certificates.k8s.io + resources: + - clustertrustbundles + verbs: + - create + - get + - list + - watch + - update + - delete +- apiGroups: + - certificates.k8s.io + resources: + - podcertificaterequests/status + verbs: + - update +- apiGroups: + - certificates.k8s.io + resources: + - signers + resourceNames: + - servicedns.podcert.ate.dev/* + - podidentity.podcert.ate.dev/* + verbs: + - sign + - attest +- apiGroups: + - events.k8s.io + resources: + - events + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "podcert-ate-dev-signer" .) }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "substrate.fullname" (list "podcert-ate-dev-signer" .) }} +subjects: +- kind: ServiceAccount + namespace: podcertificate-controller-system + name: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + namespace: podcertificate-controller-system + name: coordinator +rules: +- apiGroups: + - "coordination.k8s.io" + resources: + - "leases" + verbs: + - create + - get + - list + - watch + - update + - delete +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: podcertificate-controller-is-a-coordinator + namespace: podcertificate-controller-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: coordinator +subjects: +- kind: ServiceAccount + namespace: podcertificate-controller-system + name: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: podcertificate-controller + namespace: podcertificate-controller-system + labels: + app: podcertificate-controller +spec: + replicas: 1 + selector: + matchLabels: + app: podcertificate-controller + template: + metadata: + labels: + app: podcertificate-controller + spec: + containers: + - name: controller + image: {{ include "substrate.componentImage" (list "podcertcontroller" .) }} + args: + - --in-cluster=true + - --sharding-pod-namespace=$(POD_NAMESPACE) + - --sharding-pod-name=$(POD_NAME) + - --sharding-pod-uid=$(POD_UID) + - --sharding-application-name=podcertificate-controller + - --service-dns-ca-pool=/run/ca-state/service-dns-pool.json + - --pod-identity-ca-pool=/run/ca-state/pod-identity-pool.json + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + volumeMounts: + - name: "ca-state" + mountPath: "/run/ca-state" + securityContext: + allowPrivilegeEscalation: false + capabilities: + add: + - NET_BIND_SERVICE + drop: + - ALL + readOnlyRootFilesystem: true + volumes: + - name: "ca-state" + projected: + sources: + - secret: + name: "service-dns-ca-pool" + items: + - key: "pool" + path: "service-dns-pool.json" + - secret: + name: "pod-identity-ca-pool" + items: + - key: "pool" + path: "pod-identity-pool.json" + dnsPolicy: Default + nodeSelector: + kubernetes.io/os: linux + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + serviceAccountName: default + terminationGracePeriodSeconds: 30 +{{- end }} diff --git a/manifests/ate-install/generated/role.yaml b/charts/substrate/templates/role.yaml similarity index 95% rename from manifests/ate-install/generated/role.yaml rename to charts/substrate/templates/role.yaml index 7341d28dd..8e3f7117d 100644 --- a/manifests/ate-install/generated/role.yaml +++ b/charts/substrate/templates/role.yaml @@ -16,7 +16,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: ate-controller + name: {{ include "substrate.fullname" (list "ate-controller" .) }} rules: - apiGroups: - "" diff --git a/charts/substrate/templates/rustfs.yaml b/charts/substrate/templates/rustfs.yaml new file mode 100644 index 000000000..edaad3cfa --- /dev/null +++ b/charts/substrate/templates/rustfs.yaml @@ -0,0 +1,137 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.rustfs.enabled -}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "substrate.fullname" (list "rustfs-data" .) }} + namespace: {{ .Release.Namespace }} +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.rustfs.storageSize }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "substrate.fullname" (list "rustfs" .) }} + namespace: {{ .Release.Namespace }} +spec: + selector: + app: rustfs + ports: + - name: api + port: 9000 + targetPort: 9000 + - name: console + port: 9001 + targetPort: 9001 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "substrate.fullname" (list "rustfs" .) }} + namespace: {{ .Release.Namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: rustfs + template: + metadata: + labels: + app: rustfs + spec: + securityContext: + runAsUser: 10001 + runAsGroup: 10001 + fsGroup: 10001 + containers: + - name: rustfs + image: {{ .Values.images.rustfs }} + imagePullPolicy: IfNotPresent + ports: + - containerPort: 9000 + name: api + - containerPort: 9001 + name: console + env: + - name: RUSTFS_ADDRESS + value: ":9000" + - name: RUSTFS_CONSOLE_ADDRESS + value: ":9001" + - name: RUSTFS_CONSOLE_ENABLE + value: "true" + - name: RUSTFS_VOLUMES + value: "/data" + - name: RUSTFS_ACCESS_KEY + value: {{ .Values.rustfs.accessKey | quote }} + - name: RUSTFS_SECRET_KEY + value: {{ .Values.rustfs.secretKey | quote }} + volumeMounts: + - name: data + mountPath: /data + volumes: + - name: data + persistentVolumeClaim: + claimName: {{ include "substrate.fullname" (list "rustfs-data" .) }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "substrate.fullname" (list "rustfs-bucket-init" .) }} + namespace: {{ .Release.Namespace }} +spec: + backoffLimit: 10 + template: + spec: + restartPolicy: OnFailure + containers: + - name: create-bucket + image: {{ .Values.images.awsCli }} + env: + - name: AWS_ACCESS_KEY_ID + value: {{ .Values.rustfs.accessKey | quote }} + - name: AWS_SECRET_ACCESS_KEY + value: {{ .Values.rustfs.secretKey | quote }} + - name: AWS_REGION + value: us-east-1 + - name: AWS_ENDPOINT_URL + value: http://{{ include "substrate.fullname" (list "rustfs" .) }}.{{ .Release.Namespace }}.svc:9000 + command: + - /bin/sh + - -c + - | + set -e + for i in $(seq 1 60); do + if aws s3api head-bucket --bucket {{ .Values.rustfs.bucket }} 2>/dev/null; then + echo "bucket {{ .Values.rustfs.bucket }} already exists" + exit 0 + fi + if aws s3api create-bucket --bucket {{ .Values.rustfs.bucket }} 2>/dev/null; then + echo "bucket {{ .Values.rustfs.bucket }} created" + exit 0 + fi + echo "waiting for rustfs to become available... ($i/60)" + sleep 2 + done + echo "timed out waiting for rustfs" + exit 1 +{{- end }} diff --git a/charts/substrate/templates/valkey.yaml b/charts/substrate/templates/valkey.yaml new file mode 100644 index 000000000..b8233eea9 --- /dev/null +++ b/charts/substrate/templates/valkey.yaml @@ -0,0 +1,253 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.valkey.enabled -}} +{{- $sts := include "substrate.fullname" (list "valkey-cluster" .) -}} +{{- $headless := include "substrate.fullname" (list "valkey-cluster-service" .) -}} +{{- $ns := .Release.Namespace -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "substrate.fullname" (list "valkey-config" .) }} + namespace: {{ .Release.Namespace }} +data: + valkey.conf: | +{{- if eq .Values.auth.mode "mtls" }} + # Enforce TLS and disable standard port + port 0 + tls-port 6379 + tls-cluster yes + tls-replication yes + + # Load certificates from projected volume + tls-cert-file /run/servicedns.podcert.ate.dev/credential-bundle.pem + tls-key-file /run/servicedns.podcert.ate.dev/credential-bundle.pem + tls-ca-cert-file /etc/valkey-ca/ca.crt + tls-auth-clients yes + + # Enable cluster mode +{{- else }} + # Plaintext: serve on the standard port, no TLS. + port 6379 + +{{- end }} + cluster-enabled yes + cluster-config-file nodes.conf + cluster-node-timeout 5000 + appendonly yes + protected-mode no +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ $headless }} + namespace: {{ .Release.Namespace }} +spec: + clusterIP: None + selector: + app: valkey-cluster + ports: + - name: valkey + port: 6379 + targetPort: 6379 + - name: bus + port: 16379 + targetPort: 16379 +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ $sts }} + namespace: {{ .Release.Namespace }} +spec: + selector: + app: valkey-cluster + ports: + - name: valkey + port: 6379 + targetPort: 6379 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ $sts }} + namespace: {{ .Release.Namespace }} +spec: + serviceName: {{ $headless }} + replicas: {{ .Values.valkey.replicas }} + podManagementPolicy: Parallel + selector: + matchLabels: + app: valkey-cluster + template: + metadata: + labels: + app: valkey-cluster + spec: + containers: + - name: valkey + image: {{ .Values.images.valkey }} + command: ["valkey-server", "/etc/valkey/valkey.conf"] + ports: + - name: valkey + containerPort: 6379 + - name: bus + containerPort: 16379 + volumeMounts: + - name: config + mountPath: /etc/valkey +{{- if eq .Values.auth.mode "mtls" }} + - name: servicedns + mountPath: /run/servicedns.podcert.ate.dev + - name: valkey-ca-certs + mountPath: /etc/valkey-ca + readOnly: true +{{- end }} + - name: data + mountPath: /data + volumes: + - name: config + configMap: + name: {{ include "substrate.fullname" (list "valkey-config" .) }} +{{- if eq .Values.auth.mode "mtls" }} + - name: servicedns + projected: + sources: + - podCertificate: + signerName: servicedns.podcert.ate.dev/identity + keyType: ECDSAP256 + credentialBundlePath: credential-bundle.pem + - name: valkey-ca-certs + projected: + sources: + - secret: + name: valkey-ca-certs + items: + - key: ca.crt + path: ca.crt +{{- end }} + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: {{ .Values.valkey.storageSize }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "substrate.fullname" (list "valkey-cluster-init" .) }} + namespace: {{ .Release.Namespace }} +spec: + template: + metadata: + labels: + app: valkey-cluster-init + spec: + restartPolicy: OnFailure + containers: + - name: init + image: {{ .Values.images.valkey }} +{{- if eq .Values.auth.mode "mtls" }} + volumeMounts: + - name: servicedns + mountPath: /run/servicedns.podcert.ate.dev + - name: valkey-ca-certs + mountPath: /etc/valkey-ca + readOnly: true +{{- end }} + command: + - /bin/sh + - -c + - | + set -e + echo "Waiting for all Valkey pods to resolve..." + for i in 0 1 2 3 4 5; do + until getent hosts {{ $sts }}-${i}.{{ $headless }}.{{ $ns }}.svc >/dev/null 2>&1; do + echo "Waiting for {{ $sts }}-${i} DNS..." + sleep 2 + done + done + + echo "All pods resolved. Getting IPs..." + POD_IPS="" + for i in 0 1 2 3 4 5; do + ip=$(getent hosts {{ $sts }}-${i}.{{ $headless }}.{{ $ns }}.svc | awk '{print $1}') + POD_IPS="${POD_IPS} ${ip}:6379" + done + + echo "Checking if Valkey cluster is already initialized..." +{{- if eq .Values.auth.mode "mtls" }} + until valkey-cli --tls --cacert /etc/valkey-ca/ca.crt --cert /run/servicedns.podcert.ate.dev/credential-bundle.pem --key /run/servicedns.podcert.ate.dev/credential-bundle.pem -h {{ $sts }}-0.{{ $headless }}.{{ $ns }}.svc ping >/dev/null 2>&1; do + echo "Waiting for {{ $sts }}-0 to respond to ping..." + sleep 2 + done + + INIT_STATUS=$(valkey-cli --tls --cacert /etc/valkey-ca/ca.crt --cert /run/servicedns.podcert.ate.dev/credential-bundle.pem --key /run/servicedns.podcert.ate.dev/credential-bundle.pem -h {{ $sts }}-0.{{ $headless }}.{{ $ns }}.svc cluster info 2>/dev/null | grep cluster_state || true) + + if [ -z "${INIT_STATUS}" ] || ! echo "${INIT_STATUS}" | grep -q "cluster_state:ok"; then + echo "Initializing Valkey cluster..." + valkey-cli --tls \ + --cacert /etc/valkey-ca/ca.crt \ + --cert /run/servicedns.podcert.ate.dev/credential-bundle.pem \ + --key /run/servicedns.podcert.ate.dev/credential-bundle.pem \ + --cluster create ${POD_IPS} \ + --cluster-replicas 1 \ + --cluster-yes + echo "Cluster initialization complete!" + else + echo "Cluster already initialized." + fi +{{- else }} + until valkey-cli -h {{ $sts }}-0.{{ $headless }}.{{ $ns }}.svc -p 6379 ping >/dev/null 2>&1; do + echo "Waiting for {{ $sts }}-0 to respond to ping..." + sleep 2 + done + + INIT_STATUS=$(valkey-cli -h {{ $sts }}-0.{{ $headless }}.{{ $ns }}.svc -p 6379 cluster info 2>/dev/null | grep cluster_state || true) + + if [ -z "${INIT_STATUS}" ] || ! echo "${INIT_STATUS}" | grep -q "cluster_state:ok"; then + echo "Initializing Valkey cluster..." + valkey-cli \ + --cluster create ${POD_IPS} \ + --cluster-replicas 1 \ + --cluster-yes + echo "Cluster initialization complete!" + else + echo "Cluster already initialized." + fi +{{- end }} +{{- if eq .Values.auth.mode "mtls" }} + volumes: + - name: servicedns + projected: + sources: + - podCertificate: + signerName: servicedns.podcert.ate.dev/identity + keyType: ECDSAP256 + credentialBundlePath: credential-bundle.pem + - name: valkey-ca-certs + projected: + sources: + - secret: + name: valkey-ca-certs + items: + - key: ca.crt + path: ca.crt +{{- end }} +{{- end }} diff --git a/charts/substrate/values.yaml b/charts/substrate/values.yaml new file mode 100644 index 000000000..7d6ed8bd8 --- /dev/null +++ b/charts/substrate/values.yaml @@ -0,0 +1,126 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default values for the substrate chart. +# +# The chart supports two installation modes via `auth.mode`: +# +# - "jwt" (default): No PodCertificateRequest / ClusterTrustBundle usage. Server +# certs and session signing pools are generated by the chart by default, +# and can be disabled when you want to provide your own key material. +# Clients authenticate to ateapi with a projected Kubernetes ServiceAccount +# token. Valkey runs plaintext. +# +# - "mtls": Server certs are issued by the in-cluster podcertcontroller via +# PodCertificateRequest + projected into pods via the ClusterTrustBundle / +# podCertificate projection sources. Valkey runs with full TLS + +# client-cert verification. REQUIRES the off-by-default Kubernetes feature +# gates: +# ClusterTrustBundle, ClusterTrustBundleProjection, PodCertificateRequest +# and the v1beta1 certificates API. + +auth: + mode: jwt # jwt | mtls + + jwt: + # OIDC issuer URL the cluster uses to mint SA tokens. The default matches + # stock kind/kubeadm-style clusters. Override this for managed clusters + # whose service account issuer is provider-specific. Examples: + # GKE: https://container.googleapis.com/v1/projects//locations//clusters/ + # kind: https://kubernetes.default.svc.cluster.local + # EKS: https://oidc.eks..amazonaws.com/id/ + issuer: https://kubernetes.default.svc.cluster.local + + # Audience SA tokens are minted for, and that ateapi expects. + audience: api.ate-system.svc + + bootstrap: + # Generate JWT-mode TLS and session-signing key material with Helm. + # Existing generated resources are reused on upgrade via lookup. + enabled: true + serverCert: + enabled: true + sessionPools: + enabled: true + + # Name of a kubernetes.io/tls Secret in the release namespace, with keys + # tls.crt and tls.key. Created by the chart when + # auth.jwt.bootstrap.serverCert.enabled=true. + serverCertSecret: ateapi-tls + + # Name of a ConfigMap in the release namespace with key "ca.crt" holding + # the CA(s) that signed serverCertSecret. Clients mount it to verify the + # ateapi server certificate. Created by the chart when + # auth.jwt.bootstrap.serverCert.enabled=true. + caBundleConfigMap: ateapi-ca + +# Set to true to have the chart create the release namespace. +# Off by default — most helm workflows expect the namespace to already exist +# (helm install -n --create-namespace). Enable for the generated +# manifests/ate-install/ install path (kubectl apply). +createNamespace: false + +valkey: + enabled: true + replicas: 6 + storageSize: 1Gi + +rustfs: + enabled: true + storageSize: 1Gi + bucket: ate-snapshots + accessKey: rustfsadmin + secretKey: rustfsadmin + +# atelet daemonset overrides. Defaults use the in-cluster RustFS deployment for +# snapshots. Set rustfs.enabled=false and override these fields when using +# external storage. +# extraArgs / extraEnv are appended verbatim for installer-specific knobs +# (e.g. registry replacement for kind). +atelet: + gcpAuthForImagePulls: false + storageBackend: s3 + extraArgs: [] + extraEnv: [] + +redis: + # Override the cluster address. Empty -> derived from valkey.enabled + # (defaults to "valkey-cluster.ate-system.svc:6379"). + clusterAddress: "" + # Google IAM auth (for managed Memorystore / cloud Valkey). + useIAMAuth: false + # Override TLS server name for Redis hostname verification (mtls mode). + tlsServerName: "" + # File path for Redis client TLS credential bundle (mtls mode). + clientCert: "" + +# Name of a ConfigMap in the release namespace that supplies per-environment +# overrides for ate-api-server (ATE_API_REDIS_*, ATE_API_K8SJWT_ISSUER, ...). +# Mounted via envFrom with optional=true. Created by the chart from these values. +ateApiServerEnvVarsConfigMap: ate-api-server-envvars + +otel: + endpoint: "" + +image: + registry: ghcr.io/kagent-dev/substrate + tag: "" + +images: + valkey: valkey/valkey:8.0 + rustfs: rustfs/rustfs:1.0.0-beta.3@sha256:378642b05b7dcb4849fb77ebe6aca4ced1c3f66e7e504247df95a5c9018d3358 + awsCli: amazon/aws-cli:2.17.0@sha256:643507c10ada7964ca6157b3d799f030b90577643da9955d319a77399ed80d73 + agentgateway: cr.agentgateway.dev/agentgateway:v1.3.0-alpha.1 + coredns: coredns/coredns:1.11.1 + busybox: busybox:1.36 diff --git a/cmd/ateapi/internal/controlapi/workflow_suspend.go b/cmd/ateapi/internal/controlapi/workflow_suspend.go index d8210574b..6ce1ebfbf 100644 --- a/cmd/ateapi/internal/controlapi/workflow_suspend.go +++ b/cmd/ateapi/internal/controlapi/workflow_suspend.go @@ -141,29 +141,8 @@ func (s *CallAteletSuspendStep) Execute(ctx context.Context, input *SuspendInput ActorTemplateName: state.Actor.GetActorTemplateName(), ActorId: state.Actor.GetActorId(), Runsc: runscCfg, - Spec: &ateletpb.WorkloadSpec{ - PauseImage: state.ActorTemplate.Spec.PauseImage, - }, - SnapshotUriPrefix: state.Actor.GetInProgressSnapshot(), - } - for _, ctr := range state.ActorTemplate.Spec.Containers { - ateletCtr := &ateletpb.Container{ - Name: ctr.Name, - Image: ctr.Image, - Command: ctr.Command, - } - for _, env := range ctr.Env { - var val string - if env.Value != nil { - val = *env.Value - } - ateletEnv := &ateletpb.EnvEntry{ - Name: env.Name, - Value: val, - } - ateletCtr.Env = append(ateletCtr.Env, ateletEnv) - } - req.Spec.Containers = append(req.Spec.Containers, ateletCtr) + Spec: checkpointWorkloadSpecFromActorTemplate(state.ActorTemplate), + SnapshotUriPrefix: state.Actor.GetInProgressSnapshot(), } _, err = client.Checkpoint(ctx, req) if err != nil { diff --git a/cmd/ateapi/internal/controlapi/workload_spec.go b/cmd/ateapi/internal/controlapi/workload_spec.go index 018fb444f..feb0ed62d 100644 --- a/cmd/ateapi/internal/controlapi/workload_spec.go +++ b/cmd/ateapi/internal/controlapi/workload_spec.go @@ -21,6 +21,7 @@ import ( "time" "github.com/agent-substrate/substrate/internal/proto/ateletpb" + "github.com/agent-substrate/substrate/internal/proto/egresspb" atev1alpha1 "github.com/agent-substrate/substrate/pkg/api/v1alpha1" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -34,7 +35,8 @@ const envSecretCacheTTL = 30 * time.Second func workloadSpecFromActorTemplate(ctx context.Context, kubeClient kubernetes.Interface, secretCache *envSecretCache, actorTemplate *atev1alpha1.ActorTemplate) (*ateletpb.WorkloadSpec, error) { workloadSpec := &ateletpb.WorkloadSpec{ - PauseImage: actorTemplate.Spec.PauseImage, + PauseImage: actorTemplate.Spec.PauseImage, + EgressPolicy: buildAteletEgressPolicy(actorTemplate.Spec.EgressPolicy), } resolver := envResolver{ kubeClient: kubeClient, @@ -63,6 +65,123 @@ func workloadSpecFromActorTemplate(ctx context.Context, kubeClient kubernetes.In return workloadSpec, nil } +func checkpointWorkloadSpecFromActorTemplate(actorTemplate *atev1alpha1.ActorTemplate) *ateletpb.WorkloadSpec { + workloadSpec := &ateletpb.WorkloadSpec{ + PauseImage: actorTemplate.Spec.PauseImage, + EgressPolicy: buildAteletEgressPolicy(actorTemplate.Spec.EgressPolicy), + } + for _, ctr := range actorTemplate.Spec.Containers { + ateletCtr := &ateletpb.Container{ + Name: ctr.Name, + Image: ctr.Image, + Command: ctr.Command, + } + for _, env := range ctr.Env { + var val string + if env.Value != nil { + val = *env.Value + } + ateletCtr.Env = append(ateletCtr.Env, &ateletpb.EnvEntry{ + Name: env.Name, + Value: val, + }) + } + workloadSpec.Containers = append(workloadSpec.Containers, ateletCtr) + } + return workloadSpec +} + +func buildAteletEgressPolicy(policy *atev1alpha1.EgressPolicy) *egresspb.EgressPolicy { + if policy == nil { + return nil + } + return &egresspb.EgressPolicy{ + DefaultAction: string(policy.DefaultAction), + Allow: buildAteletEgressPolicyRules(policy.Allow), + Deny: buildAteletEgressPolicyRules(policy.Deny), + Audit: buildAteletEgressAuditPolicy(policy.Audit), + } +} + +func buildAteletEgressAuditPolicy(policy *atev1alpha1.EgressAuditPolicy) *egresspb.EgressAuditPolicy { + if policy == nil { + return nil + } + return &egresspb.EgressAuditPolicy{ + Logs: policy.Logs, + Traces: policy.Traces, + RedactHeaders: append([]string(nil), policy.RedactHeaders...), + } +} + +func buildAteletEgressPolicyRules(rules []atev1alpha1.EgressPolicyRule) []*egresspb.EgressPolicyRule { + out := make([]*egresspb.EgressPolicyRule, 0, len(rules)) + for _, rule := range rules { + outRule := &egresspb.EgressPolicyRule{} + for _, dest := range rule.To { + outDest := &egresspb.EgressPolicyDestination{Host: dest.Host} + if dest.IPBlock != nil { + outDest.Cidr = dest.IPBlock.CIDR + } + outRule.To = append(outRule.To, outDest) + } + for _, port := range rule.Ports { + outRule.Ports = append(outRule.Ports, &egresspb.EgressPort{ + Port: uint32(port.Port), + Protocol: string(port.Protocol), + }) + } + outRule.Tls = buildAteletEgressTLSPolicy(rule.TLS) + outRule.Credentials = buildAteletEgressCredentialPolicy(rule.Credentials) + out = append(out, outRule) + } + return out +} + +func buildAteletEgressTLSPolicy(policy *atev1alpha1.EgressTLSPolicy) *egresspb.EgressTLSPolicy { + if policy == nil { + return nil + } + out := &egresspb.EgressTLSPolicy{ + Mode: string(policy.Mode), + Required: policy.Required, + } + if policy.Intercept != nil { + out.Intercept = &egresspb.EgressTLSInterceptPolicy{ + ValidateUpstream: policy.Intercept.ValidateUpstream, + } + if policy.Intercept.IssuerSecretRef != nil { + out.Intercept.IssuerSecretRef = &egresspb.SecretReference{ + Name: policy.Intercept.IssuerSecretRef.Name, + Namespace: policy.Intercept.IssuerSecretRef.Namespace, + } + } + } + return out +} + +func buildAteletEgressCredentialPolicy(policy *atev1alpha1.EgressCredentialPolicy) *egresspb.EgressCredentialPolicy { + if policy == nil { + return nil + } + out := &egresspb.EgressCredentialPolicy{} + for _, injection := range policy.Inject { + outInjection := &egresspb.EgressCredentialInjection{ + Header: injection.Header, + } + if injection.ValueFrom.SecretKeyRef != nil { + outInjection.ValueFrom = &egresspb.EgressCredentialValueFrom{ + SecretKeyRef: &egresspb.SecretKeySelector{ + Name: injection.ValueFrom.SecretKeyRef.Name, + Key: injection.ValueFrom.SecretKeyRef.Key, + }, + } + } + out.Inject = append(out.Inject, outInjection) + } + return out +} + type envResolver struct { kubeClient kubernetes.Interface namespace string diff --git a/cmd/ateapi/internal/sessionidentity/sessionidentity.go b/cmd/ateapi/internal/sessionidentity/sessionidentity.go index 71fa2a6bc..c69fbbb48 100644 --- a/cmd/ateapi/internal/sessionidentity/sessionidentity.go +++ b/cmd/ateapi/internal/sessionidentity/sessionidentity.go @@ -21,6 +21,7 @@ import ( "crypto/x509/pkix" "fmt" "log/slog" + "net/http" "net/url" "os" "path" @@ -51,17 +52,19 @@ type Server struct { sessionIDCAPoolFile string workerCACerts string + httpClient *http.Client } var _ ateapipb.SessionIdentityServer = (*Server)(nil) -func New(clientJWTIssuer, clientJWTAudience, sessionIDJWTPoolFile, sessionIDCAPoolFile, workerCACerts string) *Server { +func New(clientJWTIssuer, clientJWTAudience, sessionIDJWTPoolFile, sessionIDCAPoolFile, workerCACerts string, httpClient *http.Client) *Server { return &Server{ clientJWTIssuer: clientJWTIssuer, clientJWTAudience: clientJWTAudience, sessionIDJWTPoolFile: sessionIDJWTPoolFile, sessionIDCAPoolFile: sessionIDCAPoolFile, workerCACerts: workerCACerts, + httpClient: httpClient, } } @@ -78,7 +81,7 @@ func (s *Server) MintJWT(ctx context.Context, req *ateapipb.MintJWTRequest) (*at clientJWT := strings.TrimPrefix(authorization[0], "Bearer ") - clientClaims, err := k8sjwt.Verify(ctx, clientJWT, s.clientJWTIssuer, s.clientJWTAudience, time.Now()) + clientClaims, err := k8sjwt.Verify(ctx, s.httpClient, clientJWT, s.clientJWTIssuer, s.clientJWTAudience, time.Now()) if err != nil { slog.ErrorContext(ctx, "Error while verifying client JWT", slog.Any("err", err)) return nil, status.Errorf(codes.Unauthenticated, "Unauthenticated") diff --git a/cmd/ateapi/main.go b/cmd/ateapi/main.go index 87d5a7433..76705db6e 100644 --- a/cmd/ateapi/main.go +++ b/cmd/ateapi/main.go @@ -21,12 +21,15 @@ import ( "fmt" "log/slog" "net" + "net/http" "os" + "strings" "time" "github.com/agent-substrate/substrate/cmd/ateapi/internal/controlapi" "github.com/agent-substrate/substrate/cmd/ateapi/internal/sessionidentity" "github.com/agent-substrate/substrate/cmd/ateapi/internal/store/ateredis" + "github.com/agent-substrate/substrate/internal/ateapiauth" "github.com/agent-substrate/substrate/internal/ateinterceptors" "github.com/agent-substrate/substrate/internal/credbundle" "github.com/agent-substrate/substrate/internal/serverboot" @@ -56,6 +59,7 @@ var ( redisUseIAMAuth = pflag.String("redis-use-iam-auth", "true", "Whether to use Google IAM authentication for Redis/Valkey.") redisTLSServerName = pflag.String("redis-tls-server-name", "", "The ServerName to use for Redis TLS hostname verification.") redisClientCert = pflag.String("redis-client-cert", "", "The file containing client TLS certificate/key credential bundle for Redis/Valkey.") + redisNoTLS = pflag.Bool("redis-no-tls", false, "If true, connect to Redis/Valkey in plaintext (no TLS). For development / installs that don't enable Valkey TLS.") clientJWTIssuer = pflag.String("client-jwt-issuer", "", "The expected issuer URL for client JWTs.") clientJWTAudience = pflag.String("client-jwt-audience", "", "The expected audience for client JWTs.") @@ -64,7 +68,9 @@ var ( sessionIDCAPoolFile = pflag.String("session-id-ca-pool", "", "The file that contains the CA pool for signing session JWTs") workerpoolCACerts = pflag.String("workerpool-ca-certs", "", "The file that contains the CA for verifying workerpool client certificates.") - showVersion = pflag.Bool("version", false, "Print version and exit.") + showVersion = pflag.Bool("version", false, "Print version and exit.") + authMode = pflag.String("auth-mode", "mtls", "Auth mode for incoming gRPC: mtls|jwt. 'mtls' (default) relies on transport-level mTLS for client identity. 'jwt' additionally requires a Kubernetes ServiceAccount Bearer token on every RPC.") + clientJWTCAFile = pflag.String("client-jwt-ca-cert", "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt", "CA cert file used to verify TLS when fetching the OIDC discovery document and JWKS for JWT authentication. Defaults to the in-cluster service account CA.") ) func main() { @@ -94,6 +100,11 @@ func main() { loadFlagsFromEnv() logFlagValues(ctx) + authModeParsed, err := ateapiauth.ParseMode(*authMode) + if err != nil { + serverboot.Fatal(ctx, "Invalid --auth-mode", err) + } + redisClient, err := connectRedis(ctx) if err != nil { serverboot.Fatal(ctx, "Failed to set up Redis/Valkey", err) @@ -133,7 +144,9 @@ func main() { dialer := controlapi.NewAteletDialer(workerPodInformer.GetIndexer(), ateletPodInformer.GetIndexer()) sm := controlapi.NewService(redisPersistence, actorTemplateLister, dialer, clientset) - sessionIdentitySrv := sessionidentity.New(*clientJWTIssuer, *clientJWTAudience, *sessionIDJWTPoolFile, *sessionIDCAPoolFile, *workerpoolCACerts) + jwtHTTPClient := buildJWTHTTPClient(ctx, *clientJWTCAFile) + + sessionIdentitySrv := sessionidentity.New(*clientJWTIssuer, *clientJWTAudience, *sessionIDJWTPoolFile, *sessionIDCAPoolFile, *workerpoolCACerts, jwtHTTPClient) lisCfg := &net.ListenConfig{} lis, err := lisCfg.Listen(ctx, "tcp", *listenAddr) @@ -141,10 +154,23 @@ func main() { serverboot.Fatal(ctx, "Failed to start listener", err) } + authCfg := ateapiauth.ServerConfig{ + Mode: authModeParsed, + Issuer: *clientJWTIssuer, + Audience: *clientJWTAudience, + HTTPClient: jwtHTTPClient, + } + mux := grpc.NewServer( grpc.Creds(serverCreds), grpc.StatsHandler(otelgrpc.NewServerHandler()), - grpc.UnaryInterceptor(ateinterceptors.ServerUnaryInterceptor), + grpc.ChainUnaryInterceptor( + ateapiauth.UnaryServerInterceptor(authCfg), + ateinterceptors.ServerUnaryInterceptor, + ), + grpc.ChainStreamInterceptor( + ateapiauth.StreamServerInterceptor(authCfg), + ), ) reflection.Register(mux) ateapipb.RegisterControlServer(mux, sm) @@ -191,25 +217,30 @@ func logFlagValues(ctx context.Context) { slog.String("redis-use-iam-auth", *redisUseIAMAuth), slog.String("redis-tls-server-name", *redisTLSServerName), slog.String("redis-client-cert", *redisClientCert), + slog.Bool("redis-no-tls", *redisNoTLS), slog.String("client-jwt-issuer", *clientJWTIssuer), slog.String("client-jwt-audience", *clientJWTAudience), slog.String("session-id-jwt-pool", *sessionIDJWTPoolFile), slog.String("session-id-ca-pool", *sessionIDCAPoolFile), slog.String("workerpool-ca-certs", *workerpoolCACerts), + slog.String("auth-mode", *authMode), ) } // connectRedis builds the Redis/Valkey TLS config, plumbs IAM auth if // requested, opens the cluster client, and pings with retries. func connectRedis(ctx context.Context) (*redis.ClusterClient, error) { - tlsConfig, err := buildRedisTLSConfig(ctx) - if err != nil { - return nil, err - } - clusterOpts := &redis.ClusterOptions{ - Addrs: []string{*redisClusterAddress}, - TLSConfig: tlsConfig, + Addrs: []string{*redisClusterAddress}, + } + if *redisNoTLS { + slog.InfoContext(ctx, "Connecting to Redis/Valkey without TLS (--redis-no-tls=true)") + } else { + tlsConfig, err := buildRedisTLSConfig(ctx) + if err != nil { + return nil, err + } + clusterOpts.TLSConfig = tlsConfig } if *redisUseIAMAuth != "false" { @@ -325,3 +356,48 @@ func buildServerCreds(ctx context.Context) (credentials.TransportCredentials, er ClientCAs: clientCAs, }), nil } + +const saTokenFile = "/var/run/secrets/kubernetes.io/serviceaccount/token" + +// buildJWTHTTPClient returns an *http.Client that trusts caFile for TLS +// verification and injects the pod's ServiceAccount Bearer token, used when +// fetching the OIDC discovery document and JWKS from the in-cluster Kubernetes +// API server. Returns nil (use http.DefaultClient) if caFile is empty or unreadable. +func buildJWTHTTPClient(ctx context.Context, caFile string) *http.Client { + if caFile == "" { + return nil + } + ca, err := os.ReadFile(caFile) + if err != nil { + slog.WarnContext(ctx, "Could not read JWT CA cert file; OIDC discovery will use system trust", slog.String("path", caFile), slog.Any("err", err)) + return nil + } + pool := x509.NewCertPool() + if !pool.AppendCertsFromPEM(ca) { + slog.WarnContext(ctx, "Could not parse JWT CA cert file; OIDC discovery will use system trust", slog.String("path", caFile)) + return nil + } + return &http.Client{ + Transport: &saTokenTransport{ + base: &http.Transport{ + TLSClientConfig: &tls.Config{RootCAs: pool}, + }, + }, + } +} + +// saTokenTransport injects the pod's ServiceAccount Bearer token on every +// request. Reads the token file fresh on each request so token rotation is +// handled automatically. +type saTokenTransport struct { + base http.RoundTripper +} + +func (t *saTokenTransport) RoundTrip(req *http.Request) (*http.Response, error) { + token, err := os.ReadFile(saTokenFile) + if err == nil && len(token) > 0 { + req = req.Clone(req.Context()) + req.Header.Set("Authorization", "Bearer "+strings.TrimSpace(string(token))) + } + return t.base.RoundTrip(req) +} diff --git a/cmd/atecontroller/main.go b/cmd/atecontroller/main.go index f7e922273..4db44cc02 100644 --- a/cmd/atecontroller/main.go +++ b/cmd/atecontroller/main.go @@ -14,15 +14,14 @@ package main import ( - "crypto/tls" "os" + "github.com/agent-substrate/substrate/internal/ateapiauth" "github.com/agent-substrate/substrate/internal/controllers" clientv1alpha1 "github.com/agent-substrate/substrate/pkg/api/v1alpha1" "github.com/agent-substrate/substrate/pkg/proto/ateapipb" "github.com/spf13/pflag" "google.golang.org/grpc" - "google.golang.org/grpc/credentials" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" @@ -39,6 +38,11 @@ var ( setupLog = ctrl.Log.WithName("setup") ateAPIConnSpec = pflag.String("ateapi-conn-spec", "dns:///api.ate-system.svc:443", "") + + ateapiAuthMode = pflag.String("ateapi-auth", "mtls", "Client auth to ateapi: mtls|jwt. 'mtls' (default) dials with insecure TLS and relies on pod-projected mTLS credentials for identity. 'jwt' verifies the server cert and sends a Bearer SA token.") + ateapiCAFile = pflag.String("ateapi-ca-file", "", "PEM file with CAs trusted to verify the ateapi server cert. Required for jwt.") + ateapiServerName = pflag.String("ateapi-server-name", "", "SNI / hostname expected on the ateapi server cert. Optional.") + ateapiTokenFile = pflag.String("ateapi-token-file", "", "Projected SA token file used as Bearer credential. Required for jwt.") ) func init() { @@ -47,15 +51,27 @@ func init() { } func main() { + pflag.Parse() ctrl.SetLogger(zap.New(zap.UseDevMode(true))) - // TODO: Verify server certificate, pass client certificate. - clientTLSConfig := &tls.Config{ - InsecureSkipVerify: true, // Temporarily bypass standard checks + mode, err := ateapiauth.ParseMode(*ateapiAuthMode) + if err != nil { + setupLog.Error(err, "invalid --ateapi-auth") + os.Exit(1) + } + + dialOpts, err := ateapiauth.DialOptions(ateapiauth.ClientConfig{ + Mode: mode, + CAFile: *ateapiCAFile, + ServerName: *ateapiServerName, + TokenFile: *ateapiTokenFile, + }) + if err != nil { + setupLog.Error(err, "building ateapi dial options") + os.Exit(1) } - clientCreds := credentials.NewTLS(clientTLSConfig) - ateapiConn, err := grpc.NewClient(*ateAPIConnSpec, grpc.WithTransportCredentials(clientCreds)) + ateapiConn, err := grpc.NewClient(*ateAPIConnSpec, dialOpts...) if err != nil { setupLog.Error(err, "Error creating grpc connection to ate api") os.Exit(1) diff --git a/cmd/atelet/main.go b/cmd/atelet/main.go index c4425bade..11f59d6d2 100644 --- a/cmd/atelet/main.go +++ b/cmd/atelet/main.go @@ -587,7 +587,9 @@ func (s *AteomHerder) dialAteom(ctx context.Context, targetAteomUid string) (ate // buildAteomWorkloadSpec projects the atelet-facing workload spec onto // the ateom-facing one — currently just the container names. func buildAteomWorkloadSpec(spec *ateletpb.WorkloadSpec) *ateompb.WorkloadSpec { - out := &ateompb.WorkloadSpec{} + out := &ateompb.WorkloadSpec{ + EgressPolicy: spec.GetEgressPolicy(), + } for _, ctr := range spec.GetContainers() { out.Containers = append(out.Containers, &ateompb.Container{Name: ctr.GetName()}) } diff --git a/cmd/atenet/internal/app/router/agentgateway.go b/cmd/atenet/internal/app/router/agentgateway.go new file mode 100644 index 000000000..af7fabe5a --- /dev/null +++ b/cmd/atenet/internal/app/router/agentgateway.go @@ -0,0 +1,151 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package router + +import ( + "context" + "fmt" + "strings" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +type agentgatewayProvider struct { + cfg RouterConfig +} + +func (p agentgatewayProvider) Name() string { + return NetworkingModeAgentgateway +} + +func (p agentgatewayProvider) RequiresXDS() bool { + return false +} + +func (p agentgatewayProvider) ConfigMapData() map[string]string { + return map[string]string{"config.yaml": p.localConfig()} +} + +func (p agentgatewayProvider) Container() corev1.Container { + ports := []corev1.ContainerPort{ + {Name: "http", ContainerPort: int32(p.cfg.HttpPort)}, + {Name: "readiness", ContainerPort: 15021}, + {Name: "metrics", ContainerPort: 15020}, + } + if p.cfg.HttpsPort > 0 && tlsCertPath(p.cfg) != "" { + ports = append(ports, corev1.ContainerPort{Name: "https", ContainerPort: int32(p.cfg.HttpsPort)}) + } + + return corev1.Container{ + Name: "agentgateway", + Image: p.cfg.AgentgatewayImage, + Args: []string{"-f", "/etc/agentgateway/config.yaml"}, + Ports: ports, + VolumeMounts: []corev1.VolumeMount{ + {Name: "proxy-config", MountPath: "/etc/agentgateway"}, + }, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/healthz/ready", + Port: intstr.FromInt32(15021), + }, + }, + PeriodSeconds: 10, + }, + } +} + +func (p agentgatewayProvider) ServicePorts() []corev1.ServicePort { + ports := []corev1.ServicePort{ + {Name: "http", Port: int32(p.cfg.HttpPort), TargetPort: intstr.FromString("http")}, + } + if p.cfg.HttpsPort > 0 && tlsCertPath(p.cfg) != "" { + ports = append(ports, corev1.ServicePort{Name: "https", Port: int32(p.cfg.HttpsPort), TargetPort: intstr.FromString("https")}) + } + return ports +} + +func (p agentgatewayProvider) CheckReady(ctx context.Context) (bool, string) { + return checkHTTPReady(ctx, "http://127.0.0.1:15021/healthz/ready", "") +} + +func (p agentgatewayProvider) localConfig() string { + httpRoute := p.routeBlock("substrate-http") + config := fmt.Sprintf(`# yaml-language-server: $schema=https://agentgateway.dev/schema/config +config: + adminAddr: "127.0.0.1:15000" + readinessAddr: "0.0.0.0:15021" + statsAddr: "0.0.0.0:15020" +binds: +- port: %d + listeners: + - name: http + protocol: HTTP + routes: +%s`, p.cfg.HttpPort, indent(httpRoute, 4)) + + if p.cfg.HttpsPort > 0 && tlsCertPath(p.cfg) != "" { + cert := tlsCertPath(p.cfg) + key := tlsKeyPath(p.cfg) + config += fmt.Sprintf(`- port: %d + listeners: + - name: https + protocol: HTTPS + tls: + cert: %q + key: %q + routes: +%s`, p.cfg.HttpsPort, cert, key, indent(p.routeBlock("substrate-https"), 4)) + } + + return config +} + +func (p agentgatewayProvider) routeBlock(name string) string { + extprocHost := fmt.Sprintf("%s:%d", p.cfg.ExtprocAddr, p.cfg.ExtprocPort) + // processingOptions limit ext_proc to request headers only; agentgateway defaults + // break WebSocket upgrades because this server only handles headers. + return fmt.Sprintf(`- name: %s + matches: + - path: + pathPrefix: / + policies: + extProc: + host: %q + failureMode: failClosed + processingOptions: + requestBodyMode: none + responseBodyMode: none + requestHeaderMode: send + responseHeaderMode: skip + requestTrailerMode: skip + responseTrailerMode: skip + backends: + - dynamic: {} +`, name, extprocHost) +} + +func indent(s string, spaces int) string { + prefix := strings.Repeat(" ", spaces) + lines := strings.Split(s, "\n") + for i, line := range lines { + if line != "" { + lines[i] = prefix + line + } + } + return strings.Join(lines, "\n") +} diff --git a/cmd/atenet/internal/app/router/controller.go b/cmd/atenet/internal/app/router/controller.go index 1b0a1ed18..d9a31d7eb 100644 --- a/cmd/atenet/internal/app/router/controller.go +++ b/cmd/atenet/internal/app/router/controller.go @@ -31,9 +31,10 @@ type Controller struct { cfg RouterConfig xdsSrv *XdsServer extprocSrv *ExtProcServer + provider proxyProvider atStore atStore - envoyRunner *envoyrunner + proxyRunner *proxyrunner } func NewController( @@ -42,8 +43,11 @@ func NewController( cfg RouterConfig, xdsSrv *XdsServer, extprocSrv *ExtProcServer, + provider proxyProvider, ) *Controller { - xdsSrv.SetConfig(cfg.HttpPort, cfg.ExtprocPort, cfg.ExtprocAddr) + if xdsSrv != nil { + xdsSrv.SetConfig(cfg.HttpPort, cfg.ExtprocPort, cfg.ExtprocAddr) + } var store atStore if cfg.TemplatesFile != "" { @@ -58,9 +62,10 @@ func NewController( cfg: cfg, xdsSrv: xdsSrv, extprocSrv: extprocSrv, + provider: provider, atStore: store, - envoyRunner: newEnvoyRunner(k8sClient, cfg), + proxyRunner: newProxyRunner(k8sClient, cfg, provider), } } @@ -92,16 +97,18 @@ func (c *Controller) reconcile(ctx context.Context) error { return err } - if err := c.xdsSrv.UpdateSnapshot(); err != nil { - slog.ErrorContext(ctx, "xDS Configuration generation problem", slog.String("err", err.Error())) - return err + if c.provider.RequiresXDS() { + if err := c.xdsSrv.UpdateSnapshot(); err != nil { + slog.ErrorContext(ctx, "xDS Configuration generation problem", slog.String("err", err.Error())) + return err + } } if !c.cfg.Standalone && c.cfg.TemplatesFile == "" { - // Reconcile Envoy router Deployment and Kubernetes cluster entities - err := c.envoyRunner.reconcile(ctx) + // Reconcile router proxy Deployment and Kubernetes cluster entities. + err := c.proxyRunner.reconcile(ctx) if err != nil { - slog.ErrorContext(ctx, "Error during Envoy router reconciliation", slog.String("err", err.Error())) + slog.ErrorContext(ctx, "Error during router proxy reconciliation", slog.String("err", err.Error())) return err } } diff --git a/cmd/atenet/internal/app/router/dashboard.html b/cmd/atenet/internal/app/router/dashboard.html index 41f3d93af..025ffa9dc 100644 --- a/cmd/atenet/internal/app/router/dashboard.html +++ b/cmd/atenet/internal/app/router/dashboard.html @@ -252,12 +252,16 @@

atenet Router Status

Namespace Context {{ .Namespace }} +
Component Network Allocation