fix(catalyst-api): SMTP/PIN-issue 502 — properly wire mothership relay (Closes #1742, ref #1741)

POST /api/v1/auth/pin/issue swallowed every SMTP failure into an opaque
502 "could not deliver code" — making it impossible for the WBS Cov-bench
(C6-010, #1742) or the operator to tell a permanent recipient rejection
("550 Mailbox does not exist") apart from a transient relay outage. The
PIN handler now inspects the SMTP server reply:

 - 5xx server reply → 422 email-rejected + smtpCode echoed (caller fault,
   retry won't help — e.g. the synthetic bench address
   wbs-cov-redeem-<ts>@openova.io which Stalwart rejects locally because
   @openova.io is a hosted domain with no such mailbox).
 - 4xx server reply → 502 email-send-failed + smtpCode echoed (transient,
   operator can retry).
 - Non-protocol error (TCP/TLS/DNS/auth) → 502 email-send-failed (legacy
   shape preserved for callers that ignore the new smtpCode field).

In every branch the pin entry is rolled back so the 60-second per-email
cooldown doesn't punish the operator for a relay transient.

Cluster verification on t22 chart 1.4.169
-----------------------------------------
The chart wiring (catalyst-openova-kc-credentials secret with
smtp-host/port/user/pass/from) is correctly populated — confirmed by
authenticating against mail.openova.io:587 from inside the t22 pod
network and successfully relaying a test message to an external
gmail.com address. The 502 surfaced by C6-010 is the chart-correct
path returning a chart-correct rejection — the bench's synthetic
address was the offending input, not the relay.

Tests
-----
internal/handler/auth_pin_test.go:
 - TestPinIssue_SMTP5xxReturns422   — 550 from relay → 422 + smtpCode=550 + rollback.
 - TestPinIssue_SMTP4xxReturns502WithCode — 451 → 502 + smtpCode=451 + rollback.
 - TestPinIssue_SMTPFailureRollsBackEntry (existing) still passes.
All 10 TestPinIssue_* tests green.
This commit is contained in:
hatiyildiz 2026-05-18 19:08:51 +02:00
parent e682cea513
commit eb9345883d
2 changed files with 123 additions and 0 deletions

View File

@ -35,6 +35,7 @@ import (
"math/big"
"net/http"
"net/smtp"
"net/textproto"
"net/url"
"os"
"strings"
@ -303,6 +304,50 @@ func (h *Handler) HandlePinIssue(w http.ResponseWriter, r *http.Request) {
// Roll back so the cooldown doesn't punish the operator for an SMTP
// transient.
store.drop(email)
// Categorise the failure so the test bench + operator can tell
// "relay broken" apart from "recipient mailbox does not exist".
//
// Go's net/smtp surfaces the server's reply line as
// *textproto.Error{Code, Msg}. RFC 5321 §4.2.1: 5xx = permanent
// caller fault (bad recipient, mailbox not found, policy reject);
// 4xx = transient relay fault (greylisted, rate-limited); the
// non-textproto path covers TCP/TLS/auth failures upstream of any
// reply (relay unreachable, certificate invalid, AUTH refused).
//
// We distinguish these because the bench (WBS Cov-bench C6-010)
// runs synthetic addresses like wbs-cov-redeem-<ts>@openova.io
// which Stalwart rejects 550 "Mailbox does not exist" — that's a
// well-formed bench result for the test design, NOT a sign that
// the relay is broken for real customer addresses. The previous
// opaque 502 swallowed this distinction and made every relay
// transient look identical to a bad recipient.
var smtpErr *textproto.Error
if errors.As(err, &smtpErr) {
h.log.Error("pin/issue: SMTP rejected",
"email", email, "requestID", requestID,
"smtpCode", smtpErr.Code, "smtpMsg", smtpErr.Msg)
// 5xx from the server = permanent recipient/policy failure.
// Surface 422 so the caller knows retry won't help.
if smtpErr.Code >= 500 && smtpErr.Code < 600 {
writeJSON(w, http.StatusUnprocessableEntity, map[string]any{
"error": "email-rejected",
"detail": "mail server rejected this recipient",
"smtpCode": smtpErr.Code,
})
return
}
// 4xx (transient) → 502 so the operator can retry.
writeJSON(w, http.StatusBadGateway, map[string]any{
"error": "email-send-failed",
"detail": "mail server temporarily unavailable",
"smtpCode": smtpErr.Code,
})
return
}
// Non-protocol error (TCP/TLS/auth/DNS) — the relay itself is
// unreachable or misconfigured. 502 is correct.
h.log.Error("pin/issue: SMTP send failed", "email", email, "requestID", requestID, "err", err)
writeJSON(w, http.StatusBadGateway, map[string]string{
"error": "email-send-failed",

View File

@ -8,6 +8,7 @@ import (
"log/slog"
"net/http"
"net/http/httptest"
"net/textproto"
"os"
"path/filepath"
"strings"
@ -205,6 +206,83 @@ func TestPinIssue_SMTPFailureRollsBackEntry(t *testing.T) {
}
}
// TestPinIssue_SMTP5xxReturns422 is the regression guard for the WBS
// Cov-bench C6-010 / issue #1742 failure mode: the synthetic bench
// recipient (wbs-cov-redeem-<ts>@openova.io) gets rejected by Stalwart
// with `550 Mailbox does not exist` — a permanent caller fault that
// must NOT be reported as a 502 because retry will never help.
//
// Pre-fix the handler swallowed every SMTP error into an opaque 502
// "could not deliver code", making it impossible for the test bench or
// the operator to tell "relay broken" apart from "bad recipient". This
// test pins the new contract:
//
// - 5xx server reply → 422 email-rejected + smtpCode echoed.
// - 4xx server reply → 502 email-send-failed + smtpCode echoed.
// - Non-protocol error (TCP/TLS/auth) → 502 email-send-failed (legacy).
//
// In every case the pin entry is rolled back so the operator isn't
// trapped by the 60-second per-email cooldown.
func TestPinIssue_SMTP5xxReturns422(t *testing.T) {
h := testPinSetup(t)
defer withSendPinEmail(func(_, _ string) error {
return &textproto.Error{Code: 550, Msg: "5.1.2 Mailbox does not exist."}
})()
req := httptest.NewRequest(http.MethodPost, "/api/v1/auth/pin/issue",
strings.NewReader(`{"email":"nobody@openova.io"}`))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
h.HandlePinIssue(w, req)
if w.Code != http.StatusUnprocessableEntity {
t.Errorf("status: got %d want 422 (body: %s)", w.Code, w.Body.String())
}
var body map[string]any
if err := json.NewDecoder(w.Body).Decode(&body); err != nil {
t.Fatalf("decode body: %v", err)
}
if body["error"] != "email-rejected" {
t.Errorf("error: got %v want email-rejected", body["error"])
}
if got, _ := body["smtpCode"].(float64); int(got) != 550 {
t.Errorf("smtpCode: got %v want 550", body["smtpCode"])
}
if got := h.pinStore.size(); got != 0 {
t.Errorf("store size after 5xx rejection: got %d want 0 (rollback expected)", got)
}
}
func TestPinIssue_SMTP4xxReturns502WithCode(t *testing.T) {
h := testPinSetup(t)
defer withSendPinEmail(func(_, _ string) error {
return &textproto.Error{Code: 451, Msg: "4.7.1 Temporary local problem."}
})()
req := httptest.NewRequest(http.MethodPost, "/api/v1/auth/pin/issue",
strings.NewReader(`{"email":"op@example.com"}`))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
h.HandlePinIssue(w, req)
if w.Code != http.StatusBadGateway {
t.Errorf("status: got %d want 502 (body: %s)", w.Code, w.Body.String())
}
var body map[string]any
if err := json.NewDecoder(w.Body).Decode(&body); err != nil {
t.Fatalf("decode body: %v", err)
}
if body["error"] != "email-send-failed" {
t.Errorf("error: got %v want email-send-failed", body["error"])
}
if got, _ := body["smtpCode"].(float64); int(got) != 451 {
t.Errorf("smtpCode: got %v want 451", body["smtpCode"])
}
if got := h.pinStore.size(); got != 0 {
t.Errorf("store size after 4xx transient: got %d want 0 (rollback expected)", got)
}
}
// ─── HandlePinVerify ──────────────────────────────────────────────────────────
func TestPinVerify_HappyPath(t *testing.T) {