feat: mesh peer sync with health tracking and separate peer credentials 🔗

- Separate peer_sync.auth_username/password from the DA-facing credentials
  so /internal/* uses its own basic auth; a compromised peer cannot push
  zones or access the admin API
- Per-peer health tracking: consecutive failure count, degraded/recovered
  log events at FAILURE_THRESHOLD (3) and on first successful contact after
  degradation
- Gossip-lite mesh discovery: each sync pass calls /internal/peers on every
  known peer and adds newly discovered node URLs automatically; a linear
  chain of initial connections is sufficient to form a full mesh
- /internal/peers endpoint returns the node's live peer URL list
- Support DADNS_PEER_SYNC_PEER_N_URL/USERNAME/PASSWORD numbered env vars
  for multi-peer env-var-only deployments (up to 9); original single-peer
  DADNS_PEER_SYNC_PEER_URL retained for backward compatibility
This commit is contained in:
2026-02-25 16:08:26 +13:00
parent 0b31b75789
commit 3f6a061ffe
5 changed files with 293 additions and 23 deletions

View File

@@ -79,6 +79,118 @@ def test_env_peer_not_duplicated_when_also_in_config(monkeypatch):
assert urls.count("http://ddo-2:2222") == 1
def test_numbered_env_peers(monkeypatch):
"""DADNS_PEER_SYNC_PEER_1_URL and _2_URL add multiple peers."""
monkeypatch.setenv("DADNS_PEER_SYNC_PEER_1_URL", "http://node-a:2222")
monkeypatch.setenv("DADNS_PEER_SYNC_PEER_1_USERNAME", "peersync")
monkeypatch.setenv("DADNS_PEER_SYNC_PEER_1_PASSWORD", "s3cr3t")
monkeypatch.setenv("DADNS_PEER_SYNC_PEER_2_URL", "http://node-b:2222")
worker = PeerSyncWorker({"enabled": True})
urls = [p["url"] for p in worker.peers]
assert "http://node-a:2222" in urls
assert "http://node-b:2222" in urls
assert len(urls) == 2
def test_numbered_env_peers_not_duplicated(monkeypatch):
"""Numbered env var peers are deduplicated against the config file list."""
monkeypatch.setenv("DADNS_PEER_SYNC_PEER_1_URL", "http://ddo-2:2222")
worker = PeerSyncWorker(BASE_CONFIG)
urls = [p["url"] for p in worker.peers]
assert urls.count("http://ddo-2:2222") == 1
def test_get_peer_urls():
worker = PeerSyncWorker(BASE_CONFIG)
assert worker.get_peer_urls() == ["http://ddo-2:2222"]
# ---------------------------------------------------------------------------
# Health tracking
# ---------------------------------------------------------------------------
def test_peer_health_starts_healthy():
worker = PeerSyncWorker(BASE_CONFIG)
h = worker._health("http://ddo-2:2222")
assert h["healthy"] is True
assert h["consecutive_failures"] == 0
def test_record_failure_increments_count():
worker = PeerSyncWorker(BASE_CONFIG)
worker._record_failure("http://ddo-2:2222", ConnectionError("down"))
assert worker._health("http://ddo-2:2222")["consecutive_failures"] == 1
assert worker._health("http://ddo-2:2222")["healthy"] is True
def test_record_failure_marks_degraded_at_threshold():
from directdnsonly.app.peer_sync import FAILURE_THRESHOLD
worker = PeerSyncWorker(BASE_CONFIG)
for _ in range(FAILURE_THRESHOLD):
worker._record_failure("http://ddo-2:2222", ConnectionError("down"))
assert worker._health("http://ddo-2:2222")["healthy"] is False
def test_record_success_resets_health():
from directdnsonly.app.peer_sync import FAILURE_THRESHOLD
worker = PeerSyncWorker(BASE_CONFIG)
for _ in range(FAILURE_THRESHOLD):
worker._record_failure("http://ddo-2:2222", ConnectionError("down"))
assert not worker._health("http://ddo-2:2222")["healthy"]
worker._record_success("http://ddo-2:2222")
assert worker._health("http://ddo-2:2222")["healthy"] is True
assert worker._health("http://ddo-2:2222")["consecutive_failures"] == 0
# ---------------------------------------------------------------------------
# Peer discovery (_discover_peers_from)
# ---------------------------------------------------------------------------
def test_discover_peers_adds_new_peer(monkeypatch):
"""New peer URL returned by /internal/peers is added to the peer list."""
worker = PeerSyncWorker(BASE_CONFIG)
def mock_get(url, auth=None, timeout=10, params=None):
resp = MagicMock()
resp.status_code = 200
resp.json.return_value = ["http://node-c:2222"]
return resp
monkeypatch.setattr("directdnsonly.app.peer_sync.requests.get", mock_get)
worker._discover_peers_from(BASE_CONFIG["peers"][0])
urls = [p["url"] for p in worker.peers]
assert "http://node-c:2222" in urls
def test_discover_peers_skips_known(monkeypatch):
"""Already-known peer URLs are not re-added."""
worker = PeerSyncWorker(BASE_CONFIG)
def mock_get(url, auth=None, timeout=10, params=None):
resp = MagicMock()
resp.status_code = 200
resp.json.return_value = ["http://ddo-2:2222"] # already known
return resp
monkeypatch.setattr("directdnsonly.app.peer_sync.requests.get", mock_get)
worker._discover_peers_from(BASE_CONFIG["peers"][0])
assert len(worker.peers) == 1 # unchanged
def test_discover_peers_tolerates_failure(monkeypatch):
"""Network error during discovery does not propagate."""
worker = PeerSyncWorker(BASE_CONFIG)
def mock_get(*args, **kwargs):
raise ConnectionError("peer down")
monkeypatch.setattr("directdnsonly.app.peer_sync.requests.get", mock_get)
# Should not raise
worker._discover_peers_from(BASE_CONFIG["peers"][0])
def test_start_skips_when_disabled(caplog):
worker = PeerSyncWorker({"enabled": False})
worker.start()