converged-computing · vsoch · Jun 20, 2026 · Jun 19, 2026 · Jun 19, 2026
diff --git a/.github/workflows/e2e-tests.yaml b/.github/workflows/e2e-tests.yaml
@@ -36,7 +36,7 @@ jobs:
       - name: Create k8s Kind Cluster
         uses: helm/kind-action@v1.10.0
         with:
-          version: v0.32.0              # Define your custom KinD CLI version here
+          version: v0.32.0              # required for gang
           node_image: kindest/node:v1.36.1
           config: ./deploy/kind-config.yaml
 
@@ -78,6 +78,8 @@ jobs:
           done
           [ -n "$POD" ] || { echo "ERROR: no Running fluence pod found"; exit 1; }
           echo "Using pod: $POD"
+          # Brief sleep to let the container runtime stabilize before exec
+          sleep 5
           kubectl -n kube-system exec "$POD" -- ls /tmp/
           kubectl -n kube-system logs "$POD"
           kubectl -n kube-system exec "$POD" -- /bin/bash -c "cat /tmp/fluence-graph-*.json"
@@ -107,6 +109,8 @@ jobs:
           done
           [ -n "$POD" ] || { echo "ERROR: no Running fluence pod found after restart"; exit 1; }
           echo "Using pod: $POD"
+          # Brief sleep to let the container runtime stabilize before exec
+          sleep 5
           kubectl -n kube-system exec "$POD" -- /bin/bash -c "cat /tmp/fluence-graph-*.json"
 
       - name: Wait for webhook
@@ -129,6 +133,9 @@ jobs:
       #- name: E2E - restart recovery (no double-book)
       #  run: bash test/e2e/03-restart-recovery.sh
 
+      - name: E2E - sidecar ungate
+        run: bash test/e2e/04-sidecar-ungate.sh
+
       - name: Dump diagnostics on failure
         if: failure()
         run: |

diff --git a/.github/workflows/sidecar-build-deploy.yaml b/.github/workflows/sidecar-build-deploy.yaml
@@ -0,0 +1,71 @@
+name: sidecar-build-deploy
+
+on:
+  push:
+    branches: [main]
+    tags: ["v*"]
+    paths:
+      - "sidecars/**"
+      - ".github/workflows/sidecar-build-deploy.yaml"
+  pull_request:
+    branches: [main]
+    paths:
+      - "sidecars/**"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  REGISTRY: ghcr.io
+
+jobs:
+  build-deploy:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+
+    strategy:
+      matrix:
+        sidecar:
+          - braket
+          # - qrmi   # uncomment when implemented
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GHCR
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Image metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ github.repository }}-sidecar-${{ matrix.sidecar }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=pr
+            type=semver,pattern={{version}}
+            type=sha
+            type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
+
+      - name: Build and push ${{ matrix.sidecar }} sidecar
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: ./sidecars/${{ matrix.sidecar }}/Dockerfile
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
diff --git a/Makefile b/Makefile
@@ -25,6 +25,12 @@ build: ## Build all binaries (scheduler needs flux-sched; helpers are pure Go)
 	CGO_ENABLED=0 go build -o bin/fluence-deviceplugin ./cmd/deviceplugin
 	CGO_ENABLED=0 go build -o bin/fluence-webhook ./cmd/webhook
 
+.PHONY: sidecars
+sidecars:
+	docker build -f sidecars/braket/Dockerfile -t ghcr.io/converged-computing/fluence-sidecar-braket:latest .
+	docker push ghcr.io/converged-computing/fluence-sidecar-braket:latest
+	# kind load docker-image ghcr.io/converged-computing/fluence-sidecar-braket:latest
+
 .PHONY: test
 test:
 	CGO_ENABLED=1 CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" \

diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ native Kubernetes PodGroup (Gang) API — no sidecar, and no
 proven out first in
 [fluxion-quantum](https://github.com/converged-computing/fluxion-quantum).
 
-## How the pieces fit together
+## How does it work?
 
 ```console
  resources.yaml --+ 
@@ -80,8 +80,6 @@ virtual resource (`virtual=true`) cannot be co-selected in one match -- one woul
 prune the other. A pod needing both produces **two** match-allocate calls, held
 together all-or-nothing.
 
----
-
 ## Components
 
 ### `pkg/jgf` — JGF graph builder
@@ -193,6 +191,39 @@ admission. The real gating is Fluxion (and the backend's own limits); since a
 virtual backend is reachable from any node, each type is advertised at a large
 ceiling. Types come from the same config as the graph, so they can't drift.
 
+### `sidecars/` — quantum coordination sidecars
+
+Vendor-specific sidecar containers injected by the webhook into leader pods
+of quantum workflow groups. Each sidecar discovers the QPU task submitted by
+the leader, polls the vendor queue, and ungates worker pods when the task
+reaches position==1.
+
+```console
+sidecars/
+  lib/ungate.py          shared gate removal + ARN annotation logic
+  braket/
+    sidecar.py           AWS Braket sidecar (tag search, queue polling, ungate)
+    fluence_braket_intercept.py  AwsDevice.run() monkey-patch (PYTHONSTARTUP)
+    Dockerfile           build context is sidecars/ to include lib/
+    design.md            full design document
+    test/integration.sh  local integration test (requires AWS credentials)
+```
+
+The sidecar is injected automatically — users only need the group label:
+
+```yaml
+metadata:
+  labels:
+    fluence.flux-framework.org/group: my-workflow
+spec:
+  schedulerName: fluence
+```
+
+Fluence creates the PodGroup, injects the sidecar, creates per-namespace
+RBAC, and gates all non-leader pods. See `sidecars/braket/design.md` for
+the full design including the SDK interceptor, queue position polling, and
+the two-queue problem motivation.
+
 ### `pkg/webhook` — environment injection
 
 A mutating webhook that surfaces scheduler-chosen values to a workload. Container
@@ -210,7 +241,9 @@ workload reads these normalized names regardless of which backend it matched.
 - `cmd/deviceplugin` — the extended-resource DaemonSet.
 - `cmd/webhook` — the env-injection webhook.
 - `cmd/recovery-probe` — verifies allocation replay survives a graph rebuild
-  (what a restart does); see `make test-restore`. Note this was implemented but removed because the code in fluxion is only part of a PR branch, and I feel nervous about depending on it.
+  (what a restart does); see `make test-restore`. 
+
+Note that the recovery probe (and graph restore) was implemented but removed because the code in fluxion is only part of a PR branch, and I feel nervous about depending on it.
 
 ## Configuration
 
@@ -335,6 +368,38 @@ Submission is **not** done by the scheduler — the workload container holds the
 user's credentials and submits via qrmi-go. Fluence only schedules and hands off
 the backend. (When we control local quantum devices this will change.)
 
+### 3. Quantum workflow groups (leader + workers)
+
+For workflows where a leader pod submits quantum work and worker pods process
+the results, add the group label to all pods. Fluence gates the workers until
+the QPU task reaches position==1 in the vendor queue:
+
+```yaml
+# All pods in the group get the same label
+metadata:
+  labels:
+    fluence.flux-framework.org/group: my-qaoa-workflow
+spec:
+  schedulerName: fluence
+```
+
+The first pod admitted becomes the leader — Fluence injects the sidecar and
+creates a PodGroup with `minCount: 1`. All subsequent pods get a
+`quantum.braket/ready` scheduling gate and consume no node resources during
+the QPU queue wait. When the sidecar observes `queue_position == 1`, it
+patches the task ARN onto each worker pod's annotations and removes their
+gates atomically with setting `fluence-quantum-classical` priority class.
+
+Per-namespace RBAC (`fluence-sidecar` ServiceAccount/Role/RoleBinding) and
+the interceptor ConfigMap are created automatically by the webhook on first
+use — no manual setup required.
+
+```bash
+# Apply per-namespace RBAC is NOT needed — webhook creates it automatically.
+# Just apply your pods with the group label and schedulerName: fluence.
+kubectl apply -f my-quantum-workflow.yaml
+```
+
 ### Notes
 
 - **Deletion hangs.** A PodGroup can hang on delete via finalizers if the workload
@@ -350,4 +415,4 @@ See [LICENSE](LICENSE), [COPYRIGHT](COPYRIGHT), and [NOTICE](NOTICE).
 
 SPDX-License-Identifier: MIT
 
-LLNL-CODE-842614
+LLNL-CODE-842614
diff --git a/cmd/webhook/main.go b/cmd/webhook/main.go
@@ -83,7 +83,11 @@ func main() {
 			log.Printf("no resources config at %s (%v); injecting FLUXION_BACKEND only", path, rerr)
 		}
 	}
-	mutator := &webhook.Mutator{AttributeKeys: attrKeys}
+	mutator := &webhook.Mutator{
+		AttributeKeys: attrKeys,
+		Client:        client,
+		SidecarImage:  env("FLUENCE_SIDECAR_IMAGE", ""),
+	}
 	log.Printf("[fluence-webhook] env contract injected into fluxion pods: %v", mutator.EnvVarNames())
 
 	mux := http.NewServeMux()

diff --git a/deploy/fluence-test.yaml b/deploy/fluence-test.yaml
@@ -58,7 +58,7 @@ metadata:
 rules:
   - apiGroups: ["scheduling.k8s.io"]
     resources: ["podgroups", "workloads", "podgroups/status", "workloads/status"]
-    verbs: ["get", "list", "watch", "update", "patch"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
   - apiGroups: ["coordination.k8s.io"]
     resources: ["leases"]
     verbs: ["create", "get", "update", "list", "watch"]
@@ -72,6 +72,17 @@ rules:
   - apiGroups: ["admissionregistration.k8s.io"]
     resources: ["mutatingwebhookconfigurations"]
     verbs: ["get", "list", "watch", "patch"]
+  # The webhook creates per-namespace sidecar RBAC on demand when a leader
+  # pod is admitted, so users do not need to apply RBAC manually.
+  - apiGroups: [""]
+    resources: ["serviceaccounts"]
+    verbs: ["get", "create"]
+  - apiGroups: [""]
+    resources: ["configmaps"]
+    verbs: ["get", "create"]
+  - apiGroups: ["rbac.authorization.k8s.io"]
+    resources: ["roles", "rolebindings"]
+    verbs: ["get", "create"]
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
@@ -177,6 +188,11 @@ spec:
           # Allows for kind load
           imagePullPolicy: Never
           command: ["/bin/fluence-webhook"]
+          env:
+            # Use busybox as sidecar image in tests — avoids pulling the real
+            # sidecar image which is large and not cached in CI.
+            - name: FLUENCE_SIDECAR_IMAGE
+              value: "busybox:latest"
           ports:
             - containerPort: 8443
           readinessProbe:
@@ -222,3 +238,80 @@ webhooks:
         - key: kubernetes.io/metadata.name
           operator: NotIn
           values: ["kube-system"]
+---
+# fluence-sidecar.yaml
+#
+# RBAC and supporting resources for the Fluence quantum sidecar.
+#
+# The sidecar runs inside a leader pod and needs:
+#   - patch/annotate on pods in its own namespace (to ungate workers and
+#     propagate the task ARN annotation)
+#
+# The sidecar ServiceAccount is namespace-scoped — it only has permissions
+# in the namespace where the workflow runs. The webhook sets
+# spec.serviceAccountName on the leader pod to fluence-sidecar.
+#
+# The SDK interceptor ConfigMap holds fluence_braket_intercept.py which
+# the webhook mounts into user containers as a Python sitecustomize hook,
+# transparently tagging every device.run() call with the pod UID.
+#
+# Apply with:
+#   kubectl apply -f deploy/fluence-sidecar.yaml
+
+
+---
+# PriorityClass for classical pods paired with quantum work.
+# Applied to worker pods by the webhook when they are gated.
+# When ungated, high priority triggers preemption of lower-priority work
+# so workers get nodes immediately as the QPU result arrives.
+apiVersion: scheduling.k8s.io/v1
+kind: PriorityClass
+metadata:
+  name: fluence-quantum-classical
+  labels:
+    app: fluence
+value: 1000000
+globalDefault: false
+preemptionPolicy: PreemptLowerPriority
+description: "High priority for classical pods paired with quantum work. Set by Fluence webhook."
+---
+# SDK interceptor ConfigMap — holds the Python sitecustomize hook that
+# patches AwsDevice.run() to tag every quantum task with the pod UID.
+# The webhook mounts this into user containers at Python's site-packages
+# path so it runs automatically before any user code.
+#
+# Mounted at: /etc/fluence/fluence_braket_intercept.py
+# PYTHONSTARTUP is set to this path by the webhook so any Python version loads it.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: fluence-braket-interceptor
+  namespace: kube-system
+  labels:
+    app: fluence
+data:
+  fluence_braket_intercept.py: |
+    # Injected by the Fluence webhook into every pod requesting a QPU resource.
+    # Patches AwsDevice.run() to automatically tag every quantum task submission
+    # with the pod UID, enabling the fluence-sidecar to find the task without
+    # any user application changes.
+    import os
+
+    def _install_interceptor():
+        try:
+            from braket.aws import AwsDevice
+            _original_run = AwsDevice.run
+
+            def _patched_run(self, task_specification, *args, **kwargs):
+                pod_uid = os.environ.get("FLUENCE_POD_UID", "")
+                if pod_uid:
+                    tags = kwargs.get("tags", {})
+                    tags["fluence-pod-uid"] = pod_uid
+                    kwargs["tags"] = tags
+                return _original_run(self, task_specification, *args, **kwargs)
+
+            AwsDevice.run = _patched_run
+        except ImportError:
+            pass
+
+    _install_interceptor()