From bbc4a079b47d40208c421b1e7595f2fc93fb5222 Mon Sep 17 00:00:00 2001 From: Michele Baldessari Date: Wed, 3 Jun 2026 14:04:00 +0200 Subject: [PATCH] Align custom healthcheck with clustergroup Tested with: ```sh oc get argocd -n vp-gitops vp-gitops -o jsonpath='{.spec.resourceHealthChecks}' | jq -r '.[].kind' Subscription PersistentVolumeClaim ``` --- templates/_helpers.tpl | 170 ++++++++++++---------- templates/policies/ocp-gitops-policy.yaml | 5 +- tests/ocp_gitops_policy_test.yaml | 36 +++-- 3 files changed, 123 insertions(+), 88 deletions(-) diff --git a/templates/_helpers.tpl b/templates/_helpers.tpl index 1e1e588..79f4b98 100644 --- a/templates/_helpers.tpl +++ b/templates/_helpers.tpl @@ -135,92 +135,112 @@ predicates: {{- end -}} {{- end -}} {{- /*acm.app.clusterSelector */}} -{{/* -Subscription health check Lua script for ArgoCD resource health checks -*/}} -{{- define "acm.subscription.healthcheck.lua" -}} -local health_status = {} -if obj.status ~= nil then - if obj.status.conditions ~= nil then - local numDegraded = 0 - local numPending = 0 - local msg = "" +{{/* Please make sure that these healthchecks are the same in the operator code */}} +{{- define "acm.default.healthchecks" -}} +- group: operators.coreos.com + kind: Subscription + check: | + local health_status = {} + if obj.status ~= nil then + if obj.status.conditions ~= nil then + local numDegraded = 0 + local numPending = 0 + local msg = "" - -- Check if this is a manual approval scenario where InstallPlanPending is expected - -- and the operator is already installed (upgrade pending, not initial install) - local isManualApprovalPending = false - if obj.spec ~= nil and obj.spec.installPlanApproval == "Manual" then - for _, condition in pairs(obj.status.conditions) do - if condition.type == "InstallPlanPending" and condition.status == "True" and condition.reason == "RequiresApproval" then - -- Only treat as expected healthy state if the operator is already installed - -- (installedCSV is present), meaning this is an upgrade pending approval - if obj.status.installedCSV ~= nil then - isManualApprovalPending = true + -- Check if this is a manual approval scenario where InstallPlanPending is expected + -- and the operator is already installed (upgrade pending, not initial install) + local isManualApprovalPending = false + if obj.spec ~= nil and obj.spec.installPlanApproval == "Manual" then + for _, condition in pairs(obj.status.conditions) do + if condition.type == "InstallPlanPending" and condition.status == "True" and condition.reason == "RequiresApproval" then + -- Only treat as expected healthy state if the operator is already installed + -- (installedCSV is present), meaning this is an upgrade pending approval + if obj.status.installedCSV ~= nil then + isManualApprovalPending = true + end + break + end end - break end - end - end - for i, condition in pairs(obj.status.conditions) do - -- Skip InstallPlanPending condition when manual approval is pending (expected behavior) - if isManualApprovalPending and condition.type == "InstallPlanPending" then - -- Do not include in message or count as pending - else - msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. "\n" - if condition.type == "InstallPlanPending" and condition.status == "True" then + for i, condition in pairs(obj.status.conditions) do + -- Skip InstallPlanPending condition when manual approval is pending (expected behavior) + if isManualApprovalPending and condition.type == "InstallPlanPending" then + -- Do not include in message or count as pending + else + msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. "\n" + if condition.type == "InstallPlanPending" and condition.status == "True" then + numPending = numPending + 1 + elseif (condition.type == "InstallPlanMissing" and condition.reason ~= "ReferencedInstallPlanNotFound") then + numDegraded = numDegraded + 1 + elseif (condition.type == "CatalogSourcesUnhealthy" or condition.type == "InstallPlanFailed" or condition.type == "ResolutionFailed") and condition.status == "True" then + numDegraded = numDegraded + 1 + end + end + end + + -- Available states: undef/nil, UpgradeAvailable, UpgradePending, UpgradeFailed, AtLatestKnown + -- Source: https://github.com/openshift/operator-framework-olm/blob/5e2c73b7663d0122c9dc3e59ea39e515a31e2719/staging/api/pkg/operators/v1alpha1/subscription_types.go#L17-L23 + if obj.status.state == nil then numPending = numPending + 1 - elseif (condition.type == "InstallPlanMissing" and condition.reason ~= "ReferencedInstallPlanNotFound") then - numDegraded = numDegraded + 1 - elseif (condition.type == "CatalogSourcesUnhealthy" or condition.type == "InstallPlanFailed" or condition.type == "ResolutionFailed") and condition.status == "True" then + msg = msg .. ".status.state not yet known\n" + elseif obj.status.state == "" or obj.status.state == "UpgradeAvailable" then + numPending = numPending + 1 + msg = msg .. ".status.state is '" .. obj.status.state .. "'\n" + elseif obj.status.state == "UpgradePending" then + -- UpgradePending with manual approval is expected behavior, treat as healthy + if isManualApprovalPending then + msg = msg .. ".status.state is 'AtLatestKnown'\n" + else + numPending = numPending + 1 + msg = msg .. ".status.state is '" .. obj.status.state .. "'\n" + end + elseif obj.status.state == "UpgradeFailed" then numDegraded = numDegraded + 1 + msg = msg .. ".status.state is '" .. obj.status.state .. "'\n" + else + -- Last possiblity of .status.state: AtLatestKnown + msg = msg .. ".status.state is '" .. obj.status.state .. "'\n" end - end - end - -- Available states: undef/nil, UpgradeAvailable, UpgradePending, UpgradeFailed, AtLatestKnown - -- Source: https://github.com/openshift/operator-framework-olm/blob/5e2c73b7663d0122c9dc3e59ea39e515a31e2719/staging/api/pkg/operators/v1alpha1/subscription_types.go#L17-L23 - if obj.status.state == nil then - numPending = numPending + 1 - msg = msg .. ".status.state not yet known\n" - elseif obj.status.state == "" or obj.status.state == "UpgradeAvailable" then - numPending = numPending + 1 - msg = msg .. ".status.state is '" .. obj.status.state .. "'\n" - elseif obj.status.state == "UpgradePending" then - -- UpgradePending with manual approval is expected behavior, treat as healthy - if isManualApprovalPending then - msg = msg .. ".status.state is 'AtLatestKnown'\n" - else - numPending = numPending + 1 - msg = msg .. ".status.state is '" .. obj.status.state .. "'\n" + if numDegraded == 0 and numPending == 0 then + health_status.status = "Healthy" + health_status.message = msg + return health_status + elseif numPending > 0 and numDegraded == 0 then + health_status.status = "Progressing" + health_status.message = msg + return health_status + else + health_status.status = "Degraded" + health_status.message = msg + return health_status + end end - elseif obj.status.state == "UpgradeFailed" then - numDegraded = numDegraded + 1 - msg = msg .. ".status.state is '" .. obj.status.state .. "'\n" - else - -- Last possiblity of .status.state: AtLatestKnown - msg = msg .. ".status.state is '" .. obj.status.state .. "'\n" end - - if numDegraded == 0 and numPending == 0 then - health_status.status = "Healthy" - health_status.message = msg - return health_status - elseif numPending > 0 and numDegraded == 0 then - health_status.status = "Progressing" - health_status.message = msg - return health_status - else - health_status.status = "Degraded" - health_status.message = msg - return health_status + health_status.status = "Progressing" + health_status.message = "An install plan for a subscription is pending installation" + return health_status +- kind: PersistentVolumeClaim + check: | + hs = {} + if obj.status ~= nil then + if obj.status.phase ~= nil then + if obj.status.phase == "Pending" then + hs.status = "Healthy" + hs.message = obj.status.phase + return hs + elseif obj.status.phase == "Bound" then + hs.status = "Healthy" + hs.message = obj.status.phase + return hs + end + end end - end -end -health_status.status = "Progressing" -health_status.message = "An install plan for a subscription is pending installation" -return health_status -{{- end }} {{- /*acm.subscription.healthcheck.lua */}} + hs.status = "Progressing" + hs.message = "Waiting for PVC" + return hs +{{- end }} {{- /*acm.default.healthchecks */}} {{/* Determines if the current cluster is a hub cluster. diff --git a/templates/policies/ocp-gitops-policy.yaml b/templates/policies/ocp-gitops-policy.yaml index 34c810c..8f61d88 100644 --- a/templates/policies/ocp-gitops-policy.yaml +++ b/templates/policies/ocp-gitops-policy.yaml @@ -267,10 +267,7 @@ spec: - PipelineRun # Custom Subscription healthcheck to handle manual approval scenarios resourceHealthChecks: - - group: operators.coreos.com - kind: Subscription - check: | -{{- include "acm.subscription.healthcheck.lua" . | nindent 24 }} +{{- include "acm.default.healthchecks" . | nindent 20 }} {{- range $.Values.acm.extraResourceHealthChecks }} - group: {{ .group }} kind: {{ .kind }} diff --git a/tests/ocp_gitops_policy_test.yaml b/tests/ocp_gitops_policy_test.yaml index ed9bf93..5b165f5 100644 --- a/tests/ocp_gitops_policy_test.yaml +++ b/tests/ocp_gitops_policy_test.yaml @@ -96,7 +96,7 @@ tests: path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.channel value: gitops-priority - - it: Should have only the default Subscription healthcheck when no extra healthchecks are configured + - it: Should have the default Subscription and PVC healthchecks when no extra healthchecks are configured values: - ./clusterselector_values.yaml asserts: @@ -105,7 +105,7 @@ tests: value: group-one-gitops-policy-argocd lengthEqual: path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks - count: 1 + count: 2 - documentSelector: path: metadata.name value: group-one-gitops-policy-argocd @@ -118,6 +118,12 @@ tests: equal: path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[0].kind value: Subscription + - documentSelector: + path: metadata.name + value: group-one-gitops-policy-argocd + equal: + path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].kind + value: PersistentVolumeClaim - it: Should append a single extra resource healthcheck values: @@ -137,7 +143,7 @@ tests: value: group-one-gitops-policy-argocd lengthEqual: path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks - count: 2 + count: 3 - documentSelector: path: metadata.name value: group-one-gitops-policy-argocd @@ -148,13 +154,19 @@ tests: path: metadata.name value: group-one-gitops-policy-argocd equal: - path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].group + path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].kind + value: PersistentVolumeClaim + - documentSelector: + path: metadata.name + value: group-one-gitops-policy-argocd + equal: + path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[2].group value: argoproj.io - documentSelector: path: metadata.name value: group-one-gitops-policy-argocd equal: - path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].kind + path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[2].kind value: Application - it: Should append multiple extra resource healthchecks @@ -183,22 +195,28 @@ tests: value: group-one-gitops-policy-argocd lengthEqual: path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks - count: 3 + count: 4 - documentSelector: path: metadata.name value: group-one-gitops-policy-argocd equal: - path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].group - value: argoproj.io + path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].kind + value: PersistentVolumeClaim - documentSelector: path: metadata.name value: group-one-gitops-policy-argocd equal: path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[2].group + value: argoproj.io + - documentSelector: + path: metadata.name + value: group-one-gitops-policy-argocd + equal: + path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[3].group value: batch - documentSelector: path: metadata.name value: group-one-gitops-policy-argocd equal: - path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[2].kind + path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[3].kind value: Job