fix(tls): cilium-gateway-cert STAGING/PROD issuer selectable via tofu (#1481)
clusters/_template/sovereign-tls/cilium-gateway-cert.yaml hardcoded letsencrypt-dns01-prod-powerdns regardless of qa_test_session_enabled. On high-cadence QA reprov cycles this hits the LE PROD 5/168h rate limit (caught on prov #76 at 13:45 UTC, retry-after 16:49 UTC) and the wildcard Certificate sticks Ready=False — Cilium Gateway has no valid TLS secret → envoy listener never binds → public TLS handshake to console.<fqdn> dies with SSL_ERROR_SYSCALL. Add tofu local.wildcard_cert_issuer = qa_test_session_enabled ? staging : prod. Thread WILDCARD_CERT_ISSUER through the sovereign- tls Kustomization postBuild.substitute. cilium-gateway-cert.yaml references it as ${WILDCARD_CERT_ISSUER}. Default behaviour unchanged for non-QA (production) Sovereigns — they still resolve to letsencrypt-dns01-prod-powerdns. Co-authored-by: e3mrah <catalyst@openova.io> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
6edb8b4635
commit
a88e132be9
@ -60,7 +60,14 @@ metadata:
|
||||
spec:
|
||||
secretName: sovereign-wildcard-tls
|
||||
issuerRef:
|
||||
name: letsencrypt-dns01-prod-powerdns
|
||||
# Resolved by Flux postBuild to either
|
||||
# `letsencrypt-dns01-prod-powerdns` (default) or
|
||||
# `letsencrypt-dns01-staging-powerdns` (qaTestEnabled=true) per
|
||||
# tofu local.wildcard_cert_issuer. PROD has a 5/168h rate limit
|
||||
# per exact set of identifiers — high-cadence QA reprovs hit it
|
||||
# within hours and pin the Cilium Gateway listener to a Ready=False
|
||||
# Certificate; STAGING is rate-limit-free for QA iteration.
|
||||
name: ${WILDCARD_CERT_ISSUER}
|
||||
kind: ClusterIssuer
|
||||
commonName: "*.${SOVEREIGN_FQDN}"
|
||||
dnsNames:
|
||||
|
||||
@ -1017,6 +1017,13 @@ write_files:
|
||||
metadata:
|
||||
name: sovereign-tls
|
||||
namespace: flux-system
|
||||
annotations:
|
||||
# WILDCARD_CERT_ISSUER selector (Fix #176 — qa-loop iter-1 LE
|
||||
# rate-limit unblock for the cilium-gateway-cert.yaml path).
|
||||
# When wildcard_cert_use_staging=true the issuer string below
|
||||
# routes the Certificate to LE STAGING (no 5/168h rate limit);
|
||||
# default false → real-trusted production certs.
|
||||
openova.io/wildcard-cert-issuer-tag: "${wildcard_cert_use_staging}"
|
||||
spec:
|
||||
# Carries the cert-manager Certificate that backs Cilium Gateway's
|
||||
# wildcard-TLS listener. Split out of bootstrap-kit so its
|
||||
@ -1060,6 +1067,13 @@ write_files:
|
||||
# bp-catalyst-platform into clusters/_template/sovereign-tls/
|
||||
# has access to the parent-zone list without a config copy.
|
||||
PARENT_DOMAINS_YAML: '${parent_domains_yaml}'
|
||||
# WILDCARD_CERT_ISSUER (Fix #176 — qa-loop iter-1 LE
|
||||
# rate-limit unblock). cilium-gateway-cert.yaml references
|
||||
# this via ${WILDCARD_CERT_ISSUER}. When
|
||||
# wildcard_cert_use_staging=true → STAGING ClusterIssuer
|
||||
# (no 5/168h limit); default → PROD. Locals in main.tf
|
||||
# render the final string so this template stays declarative.
|
||||
WILDCARD_CERT_ISSUER: "${wildcard_cert_issuer}"
|
||||
---
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
|
||||
@ -115,6 +115,19 @@ resource "hcloud_ssh_key" "main" {
|
||||
locals {
|
||||
control_plane_count = var.ha_enabled ? 3 : 1
|
||||
|
||||
# Wildcard cert ClusterIssuer selector (Fix #176 — qa-loop iter-1 LE
|
||||
# PROD rate-limit unblock for clusters/_template/sovereign-tls/cilium-
|
||||
# gateway-cert.yaml). The sovereign-tls Kustomization's
|
||||
# postBuild.substitute WILDCARD_CERT_ISSUER below resolves to:
|
||||
# - letsencrypt-dns01-staging-powerdns when qa_test_session_enabled (or
|
||||
# wildcard_cert_use_staging) is "true" → fast iteration, no rate limit
|
||||
# - letsencrypt-dns01-prod-powerdns when "false" → real-trusted cert
|
||||
# Both ClusterIssuers are shipped by bp-cert-manager-powerdns-webhook
|
||||
# (bootstrap-kit slot 49). Without this, cilium-gateway-cert.yaml
|
||||
# always hits PROD even on qaTestEnabled Sovereigns, and the 5/168h
|
||||
# rate limit pins the Gateway to a `Ready=False` Certificate.
|
||||
wildcard_cert_issuer = var.wildcard_cert_use_staging == "true" ? "letsencrypt-dns01-staging-powerdns" : "letsencrypt-dns01-prod-powerdns"
|
||||
|
||||
# ── Effective singular-path SKU selection (Fix #157) ─────────────────────
|
||||
# When qa_fixtures_enabled='true', the Sovereign is a QA-loop matrix
|
||||
# consumer carrying the full bp-* stack PLUS qaFixtures (Continuum +
|
||||
@ -364,6 +377,7 @@ locals {
|
||||
qa_fixtures_namespace = var.qa_fixtures_namespace
|
||||
qa_organization = var.qa_organization
|
||||
wildcard_cert_use_staging = var.wildcard_cert_use_staging
|
||||
wildcard_cert_issuer = local.wildcard_cert_issuer
|
||||
cluster_mesh_name = var.cluster_mesh_name
|
||||
cluster_mesh_id = var.cluster_mesh_id
|
||||
|
||||
@ -879,6 +893,7 @@ locals {
|
||||
qa_fixtures_namespace = var.qa_fixtures_namespace
|
||||
qa_organization = var.qa_organization
|
||||
wildcard_cert_use_staging = var.wildcard_cert_use_staging
|
||||
wildcard_cert_issuer = local.wildcard_cert_issuer
|
||||
# Per-secondary-region ClusterMesh anchors. id is incremented per
|
||||
# peer index so each secondary region gets a unique slot in the
|
||||
# mesh registry; primary region keeps var.cluster_mesh_id.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user