Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 95 additions & 75 deletions templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -135,92 +135,112 @@ predicates:
{{- end -}}
{{- end -}} {{- /*acm.app.clusterSelector */}}

{{/*
Subscription health check Lua script for ArgoCD resource health checks
*/}}
{{- define "acm.subscription.healthcheck.lua" -}}
local health_status = {}
if obj.status ~= nil then
if obj.status.conditions ~= nil then
local numDegraded = 0
local numPending = 0
local msg = ""
{{/* Please make sure that these healthchecks are the same in the operator code */}}
{{- define "acm.default.healthchecks" -}}
- group: operators.coreos.com
kind: Subscription
check: |
local health_status = {}
if obj.status ~= nil then
if obj.status.conditions ~= nil then
local numDegraded = 0
local numPending = 0
local msg = ""

-- Check if this is a manual approval scenario where InstallPlanPending is expected
-- and the operator is already installed (upgrade pending, not initial install)
local isManualApprovalPending = false
if obj.spec ~= nil and obj.spec.installPlanApproval == "Manual" then
for _, condition in pairs(obj.status.conditions) do
if condition.type == "InstallPlanPending" and condition.status == "True" and condition.reason == "RequiresApproval" then
-- Only treat as expected healthy state if the operator is already installed
-- (installedCSV is present), meaning this is an upgrade pending approval
if obj.status.installedCSV ~= nil then
isManualApprovalPending = true
-- Check if this is a manual approval scenario where InstallPlanPending is expected
-- and the operator is already installed (upgrade pending, not initial install)
local isManualApprovalPending = false
if obj.spec ~= nil and obj.spec.installPlanApproval == "Manual" then
for _, condition in pairs(obj.status.conditions) do
if condition.type == "InstallPlanPending" and condition.status == "True" and condition.reason == "RequiresApproval" then
-- Only treat as expected healthy state if the operator is already installed
-- (installedCSV is present), meaning this is an upgrade pending approval
if obj.status.installedCSV ~= nil then
isManualApprovalPending = true
end
break
end
end
break
end
end
end

for i, condition in pairs(obj.status.conditions) do
-- Skip InstallPlanPending condition when manual approval is pending (expected behavior)
if isManualApprovalPending and condition.type == "InstallPlanPending" then
-- Do not include in message or count as pending
else
msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. "\n"
if condition.type == "InstallPlanPending" and condition.status == "True" then
for i, condition in pairs(obj.status.conditions) do
-- Skip InstallPlanPending condition when manual approval is pending (expected behavior)
if isManualApprovalPending and condition.type == "InstallPlanPending" then
-- Do not include in message or count as pending
else
msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. "\n"
if condition.type == "InstallPlanPending" and condition.status == "True" then
numPending = numPending + 1
elseif (condition.type == "InstallPlanMissing" and condition.reason ~= "ReferencedInstallPlanNotFound") then
numDegraded = numDegraded + 1
elseif (condition.type == "CatalogSourcesUnhealthy" or condition.type == "InstallPlanFailed" or condition.type == "ResolutionFailed") and condition.status == "True" then
numDegraded = numDegraded + 1
end
end
end

-- Available states: undef/nil, UpgradeAvailable, UpgradePending, UpgradeFailed, AtLatestKnown
-- Source: https://github.com/openshift/operator-framework-olm/blob/5e2c73b7663d0122c9dc3e59ea39e515a31e2719/staging/api/pkg/operators/v1alpha1/subscription_types.go#L17-L23
if obj.status.state == nil then
numPending = numPending + 1
elseif (condition.type == "InstallPlanMissing" and condition.reason ~= "ReferencedInstallPlanNotFound") then
numDegraded = numDegraded + 1
elseif (condition.type == "CatalogSourcesUnhealthy" or condition.type == "InstallPlanFailed" or condition.type == "ResolutionFailed") and condition.status == "True" then
msg = msg .. ".status.state not yet known\n"
elseif obj.status.state == "" or obj.status.state == "UpgradeAvailable" then
numPending = numPending + 1
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
elseif obj.status.state == "UpgradePending" then
-- UpgradePending with manual approval is expected behavior, treat as healthy
if isManualApprovalPending then
msg = msg .. ".status.state is 'AtLatestKnown'\n"
else
numPending = numPending + 1
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
end
elseif obj.status.state == "UpgradeFailed" then
numDegraded = numDegraded + 1
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
else
-- Last possiblity of .status.state: AtLatestKnown
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
end
end
end

-- Available states: undef/nil, UpgradeAvailable, UpgradePending, UpgradeFailed, AtLatestKnown
-- Source: https://github.com/openshift/operator-framework-olm/blob/5e2c73b7663d0122c9dc3e59ea39e515a31e2719/staging/api/pkg/operators/v1alpha1/subscription_types.go#L17-L23
if obj.status.state == nil then
numPending = numPending + 1
msg = msg .. ".status.state not yet known\n"
elseif obj.status.state == "" or obj.status.state == "UpgradeAvailable" then
numPending = numPending + 1
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
elseif obj.status.state == "UpgradePending" then
-- UpgradePending with manual approval is expected behavior, treat as healthy
if isManualApprovalPending then
msg = msg .. ".status.state is 'AtLatestKnown'\n"
else
numPending = numPending + 1
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
if numDegraded == 0 and numPending == 0 then
health_status.status = "Healthy"
health_status.message = msg
return health_status
elseif numPending > 0 and numDegraded == 0 then
health_status.status = "Progressing"
health_status.message = msg
return health_status
else
health_status.status = "Degraded"
health_status.message = msg
return health_status
end
end
elseif obj.status.state == "UpgradeFailed" then
numDegraded = numDegraded + 1
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
else
-- Last possiblity of .status.state: AtLatestKnown
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
end

if numDegraded == 0 and numPending == 0 then
health_status.status = "Healthy"
health_status.message = msg
return health_status
elseif numPending > 0 and numDegraded == 0 then
health_status.status = "Progressing"
health_status.message = msg
return health_status
else
health_status.status = "Degraded"
health_status.message = msg
return health_status
health_status.status = "Progressing"
health_status.message = "An install plan for a subscription is pending installation"
return health_status
- kind: PersistentVolumeClaim
check: |
hs = {}
if obj.status ~= nil then
if obj.status.phase ~= nil then
if obj.status.phase == "Pending" then
hs.status = "Healthy"
hs.message = obj.status.phase
return hs
elseif obj.status.phase == "Bound" then
hs.status = "Healthy"
hs.message = obj.status.phase
return hs
end
end
end
end
end
health_status.status = "Progressing"
health_status.message = "An install plan for a subscription is pending installation"
return health_status
{{- end }} {{- /*acm.subscription.healthcheck.lua */}}
hs.status = "Progressing"
hs.message = "Waiting for PVC"
return hs
{{- end }} {{- /*acm.default.healthchecks */}}

{{/*
Determines if the current cluster is a hub cluster.
Expand Down
5 changes: 1 addition & 4 deletions templates/policies/ocp-gitops-policy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -267,10 +267,7 @@ spec:
- PipelineRun
# Custom Subscription healthcheck to handle manual approval scenarios
resourceHealthChecks:
- group: operators.coreos.com
kind: Subscription
check: |
{{- include "acm.subscription.healthcheck.lua" . | nindent 24 }}
{{- include "acm.default.healthchecks" . | nindent 20 }}
{{- range $.Values.acm.extraResourceHealthChecks }}
- group: {{ .group }}
kind: {{ .kind }}
Expand Down
36 changes: 27 additions & 9 deletions tests/ocp_gitops_policy_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ tests:
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.channel
value: gitops-priority

- it: Should have only the default Subscription healthcheck when no extra healthchecks are configured
- it: Should have the default Subscription and PVC healthchecks when no extra healthchecks are configured
values:
- ./clusterselector_values.yaml
asserts:
Expand All @@ -105,7 +105,7 @@ tests:
value: group-one-gitops-policy-argocd
lengthEqual:
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks
count: 1
count: 2
- documentSelector:
path: metadata.name
value: group-one-gitops-policy-argocd
Expand All @@ -118,6 +118,12 @@ tests:
equal:
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[0].kind
value: Subscription
- documentSelector:
path: metadata.name
value: group-one-gitops-policy-argocd
equal:
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].kind
value: PersistentVolumeClaim

- it: Should append a single extra resource healthcheck
values:
Expand All @@ -137,7 +143,7 @@ tests:
value: group-one-gitops-policy-argocd
lengthEqual:
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks
count: 2
count: 3
- documentSelector:
path: metadata.name
value: group-one-gitops-policy-argocd
Expand All @@ -148,13 +154,19 @@ tests:
path: metadata.name
value: group-one-gitops-policy-argocd
equal:
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].group
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].kind
value: PersistentVolumeClaim
- documentSelector:
path: metadata.name
value: group-one-gitops-policy-argocd
equal:
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[2].group
value: argoproj.io
- documentSelector:
path: metadata.name
value: group-one-gitops-policy-argocd
equal:
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].kind
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[2].kind
value: Application

- it: Should append multiple extra resource healthchecks
Expand Down Expand Up @@ -183,22 +195,28 @@ tests:
value: group-one-gitops-policy-argocd
lengthEqual:
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks
count: 3
count: 4
- documentSelector:
path: metadata.name
value: group-one-gitops-policy-argocd
equal:
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].group
value: argoproj.io
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].kind
value: PersistentVolumeClaim
- documentSelector:
path: metadata.name
value: group-one-gitops-policy-argocd
equal:
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[2].group
value: argoproj.io
- documentSelector:
path: metadata.name
value: group-one-gitops-policy-argocd
equal:
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[3].group
value: batch
- documentSelector:
path: metadata.name
value: group-one-gitops-policy-argocd
equal:
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[2].kind
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[3].kind
value: Job