Improve platform detection to reduce unknown telemetry values#1655
Improve platform detection to reduce unknown telemetry values#1655hors wants to merge 2 commits into
Conversation
Replace CRD-based detection (which relied on optional addons being installed) with discovery API group checks and API server URL patterns. Add detection for AKS, DOKS, OKE, ACK, NKP, Platform9, Tanzu, and Rancher alongside the existing GKE, EKS, and OpenShift detections.
There was a problem hiding this comment.
Pull request overview
This PR updates the operator’s platform/managed-Kubernetes detection used for telemetry, moving away from CRD-based heuristics toward API discovery group checks and API server host pattern checks to reduce “unknown” platform values.
Changes:
- Added a
hasAPIGrouphelper that detects platforms by checking for the presence of API groups via the discovery API. - Updated existing platform detection (notably GKE/EKS) to use API groups and API server host patterns rather than optional addon CRDs.
- Added detection and telemetry labels for AKS, DOKS, OKE, ACK, NKP, Platform9, Tanzu, and Rancher.
| // hasAPIGroup returns true if the cluster exposes the given API group name. | ||
| // Uses the discovery API which is available to all authenticated users with no extra RBAC. | ||
| func hasAPIGroup(ctx context.Context, cfg *rest.Config, groupName string) bool { | ||
| client, err := discovery.NewDiscoveryClientForConfig(cfg) | ||
| assertNoError(err) | ||
|
|
||
| if err != nil { | ||
| return false | ||
| } | ||
| groups, err := client.ServerGroups() | ||
| if err != nil { | ||
| assertNoError(err) | ||
| return false |
| func isEKS(ctx context.Context, cfg *rest.Config) bool { | ||
| log := logging.FromContext(ctx) | ||
| // EKS API server hostnames always end with .eks.amazonaws.com. | ||
| if strings.Contains(cfg.Host, ".eks.amazonaws.com") { | ||
| logging.FromContext(ctx).Info("detected EKS environment") | ||
| return true | ||
| } | ||
| return false | ||
| } |
| case isTanzu(ctx, cfg): | ||
| return "tanzu" | ||
| case isRancher(ctx, cfg): | ||
| return "rancher" |
44406c3 to
6cbdb1e
Compare
| func isEKS(ctx context.Context, cfg *rest.Config) bool { | ||
| log := logging.FromContext(ctx) | ||
| // crd.k8s.amazonaws.com is registered on all EKS clusters by the AWS controllers. | ||
| if hasAPIGroup(ctx, cfg, "crd.k8s.amazonaws.com") { | ||
| logging.FromContext(ctx).Info("detected EKS environment") | ||
| return true |
| client, err := discovery.NewDiscoveryClientForConfig(cfg) | ||
| assertNoError(err) | ||
|
|
||
| if err != nil { | ||
| log.V(1).Info("platform detection: could not create discovery client", "error", err.Error()) | ||
| return false | ||
| } |
| // hasAPIGroup returns true if the cluster exposes the given API group name. | ||
| // Uses the discovery API; note that hardened clusters may restrict discovery access. | ||
| func hasAPIGroup(ctx context.Context, cfg *rest.Config, groupName string) bool { |
| func isACK(ctx context.Context, cfg *rest.Config) bool { | ||
| // ACK API server hostnames always end with .aliyuncs.com. | ||
| if strings.Contains(cfg.Host, ".aliyuncs.com") { | ||
| logging.FromContext(ctx).Info("detected ACK environment") | ||
| return true | ||
| } | ||
| return false | ||
| } |
There was a problem hiding this comment.
I'm hesitating a little to add these without ensuring that the API group method works for them
| case isAKS(ctx, cfg): | ||
| return "aks" | ||
| case isDOKS(ctx, cfg): | ||
| return "doks" | ||
| case isOKE(ctx, cfg): | ||
| return "oke" | ||
| case isACK(ctx, cfg): | ||
| return "ack" | ||
| case isNKP(ctx, cfg): | ||
| return "nkp" | ||
| case isPlatform9(ctx, cfg): | ||
| return "platform9" | ||
| case isTanzu(ctx, cfg): | ||
| return "tanzu" | ||
| case isRancher(ctx, cfg): | ||
| return "rancher" |
There was a problem hiding this comment.
Maybe we can also avoid this repetition here cause every time we will have to add a function and a call here. We could have a generic logic for calling the hasAPIGroup(ctx, ...) and then iterate on the platforms that can be defined like this:
var platformProbes = []platformProbe{
{name: "gke", label: "GKE", apiGroups: []string{"networking.gke.io"}},
{name: "eks", label: "EKS", apiGroups: []string{"crd.k8s.amazonaws.com"}},
{name: "aks", label: "AKS", hosts: []string{".azmk8s.io"}},....
6cbdb1e to
ef39113
Compare
commit: ef39113 |
Replace CRD-based detection (which relied on optional addons being installed) with discovery API group checks and API server URL patterns.
Add detection for AKS, DOKS, OKE, ACK, NKP, Platform9, Tanzu, and Rancher alongside the existing GKE, EKS, and OpenShift detections.
CHANGE DESCRIPTION
Problem:
Short explanation of the problem.
Cause:
Short explanation of the root cause of the issue if applicable.
Solution:
Short explanation of the solution we are providing with this PR.
CHECKLIST
Jira
Needs Doc) and QA (Needs QA)?Tests
Config/Logging/Testability