You've already forked directdnsonly
feat: operational status endpoint + reconciler/peer state tracking 📊
- ReconciliationWorker._last_run stores per-pass stats (da_servers_polled, zones_in_da/db, orphans_found/queued, hostnames_backfilled/migrated, zones_healed, duration_seconds, dry_run flag) - ReconciliationWorker.get_status() exposes state for API/UI consumption - _heal_backends() now returns healed count - PeerSyncWorker.get_peer_status() serialises _peer_health to JSON-safe dict (url, healthy, consecutive_failures, last_seen) with summary totals - WorkerManager tracks dead-letter count; queue_status() now returns nested reconciler/peer_sync dicts replacing flat reconciler_alive/peer_syncer_alive - New GET /status endpoint (StatusAPI) aggregates queue depths, worker liveness, reconciler last-run, peer health, and live zone count; computes ok/degraded/error - .gitignore: exclude .claude/, .vscode/, .env (always local) - app.yml: add documented datastore section (SQLite default + MySQL commented) - 164 tests passing (23 new tests added)
This commit is contained in:
@@ -38,4 +38,5 @@ def patch_connect(db_session, monkeypatch):
|
||||
monkeypatch.setattr("directdnsonly.app.utils.connect", _factory)
|
||||
monkeypatch.setattr("directdnsonly.app.reconciler.connect", _factory)
|
||||
monkeypatch.setattr("directdnsonly.app.peer_sync.connect", _factory)
|
||||
monkeypatch.setattr("directdnsonly.app.api.status.connect", _factory)
|
||||
return db_session
|
||||
|
||||
@@ -394,3 +394,53 @@ def test_sync_empty_peer_list(patch_connect, monkeypatch):
|
||||
monkeypatch.setattr("directdnsonly.app.peer_sync.requests.get", mock_get)
|
||||
|
||||
worker._sync_from_peer(_make_peer())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_peer_status
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_get_peer_status_no_contact_yet():
|
||||
worker = PeerSyncWorker(BASE_CONFIG)
|
||||
status = worker.get_peer_status()
|
||||
|
||||
assert status["enabled"] is True
|
||||
assert status["total"] == 1
|
||||
assert status["healthy"] == 1
|
||||
assert status["degraded"] == 0
|
||||
assert status["peers"][0]["url"] == "http://ddo-2:2222"
|
||||
assert status["peers"][0]["healthy"] is True
|
||||
assert status["peers"][0]["last_seen"] is None
|
||||
|
||||
|
||||
def test_get_peer_status_after_success():
|
||||
worker = PeerSyncWorker(BASE_CONFIG)
|
||||
worker._record_success("http://ddo-2:2222")
|
||||
status = worker.get_peer_status()
|
||||
|
||||
assert status["healthy"] == 1
|
||||
assert status["degraded"] == 0
|
||||
assert status["peers"][0]["last_seen"] is not None
|
||||
|
||||
|
||||
def test_get_peer_status_after_degraded():
|
||||
from directdnsonly.app.peer_sync import FAILURE_THRESHOLD
|
||||
|
||||
worker = PeerSyncWorker(BASE_CONFIG)
|
||||
for _ in range(FAILURE_THRESHOLD):
|
||||
worker._record_failure("http://ddo-2:2222", Exception("timeout"))
|
||||
|
||||
status = worker.get_peer_status()
|
||||
assert status["healthy"] == 0
|
||||
assert status["degraded"] == 1
|
||||
assert status["peers"][0]["healthy"] is False
|
||||
|
||||
|
||||
def test_get_peer_status_disabled():
|
||||
worker = PeerSyncWorker({})
|
||||
status = worker.get_peer_status()
|
||||
|
||||
assert status["enabled"] is False
|
||||
assert status["total"] == 0
|
||||
assert status["peers"] == []
|
||||
|
||||
@@ -317,3 +317,83 @@ def test_heal_skipped_when_no_registry(delete_queue, patch_connect):
|
||||
w._reconcile_all()
|
||||
|
||||
assert save_queue.empty()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_status — last-run state
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_get_status_before_any_run(worker):
|
||||
status = worker.get_status()
|
||||
assert status["enabled"] is True
|
||||
assert status["alive"] is False
|
||||
assert status["last_run"] == {}
|
||||
|
||||
|
||||
def test_get_status_after_run(worker, patch_connect):
|
||||
with _patch_da(set()):
|
||||
worker._reconcile_all()
|
||||
|
||||
s = worker.get_status()
|
||||
assert s["enabled"] is True
|
||||
lr = s["last_run"]
|
||||
assert lr["status"] == "ok"
|
||||
assert "started_at" in lr
|
||||
assert "completed_at" in lr
|
||||
assert "duration_seconds" in lr
|
||||
assert lr["da_servers_polled"] == 1
|
||||
assert lr["da_servers_unreachable"] == 0
|
||||
assert lr["dry_run"] is False
|
||||
|
||||
|
||||
def test_get_status_counts_unreachable_server(worker, patch_connect):
|
||||
with _patch_da(None):
|
||||
worker._reconcile_all()
|
||||
|
||||
lr = worker.get_status()["last_run"]
|
||||
assert lr["da_servers_polled"] == 1
|
||||
assert lr["da_servers_unreachable"] == 1
|
||||
|
||||
|
||||
def test_get_status_counts_orphans(worker, delete_queue, patch_connect):
|
||||
patch_connect.add(
|
||||
Domain(domain="orphan.com", hostname="da1.example.com", username="admin")
|
||||
)
|
||||
patch_connect.commit()
|
||||
|
||||
with _patch_da(set()):
|
||||
worker._reconcile_all()
|
||||
|
||||
lr = worker.get_status()["last_run"]
|
||||
assert lr["orphans_found"] == 1
|
||||
assert lr["orphans_queued"] == 1
|
||||
|
||||
|
||||
def test_get_status_dry_run_orphans_not_queued_in_stats(dry_run_worker, patch_connect):
|
||||
patch_connect.add(
|
||||
Domain(domain="orphan.com", hostname="da1.example.com", username="admin")
|
||||
)
|
||||
patch_connect.commit()
|
||||
|
||||
with _patch_da(set()):
|
||||
dry_run_worker._reconcile_all()
|
||||
|
||||
lr = dry_run_worker.get_status()["last_run"]
|
||||
assert lr["dry_run"] is True
|
||||
assert lr["orphans_found"] == 1
|
||||
assert lr["orphans_queued"] == 0
|
||||
|
||||
|
||||
def test_get_status_zones_in_db_counted(worker, patch_connect):
|
||||
for d in ["a.com", "b.com", "c.com"]:
|
||||
patch_connect.add(Domain(domain=d, hostname="da1.example.com", username="admin"))
|
||||
patch_connect.commit()
|
||||
|
||||
with _patch_da({"a.com", "b.com", "c.com"}):
|
||||
worker._reconcile_all()
|
||||
|
||||
lr = worker.get_status()["last_run"]
|
||||
assert lr["zones_in_db"] == 3
|
||||
assert lr["zones_in_da"] == 3
|
||||
assert lr["orphans_found"] == 0
|
||||
|
||||
162
tests/test_status_api.py
Normal file
162
tests/test_status_api.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""Tests for directdnsonly.app.api.status — StatusAPI."""
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import cherrypy
|
||||
import pytest
|
||||
|
||||
from directdnsonly.app.api.status import StatusAPI
|
||||
from directdnsonly.app.db.models import Domain
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_RECONCILER_OK = {
|
||||
"enabled": True,
|
||||
"alive": True,
|
||||
"dry_run": False,
|
||||
"interval_minutes": 60,
|
||||
"last_run": {},
|
||||
}
|
||||
_PEER_SYNC_OFF = {
|
||||
"enabled": False,
|
||||
"alive": False,
|
||||
"peers": [],
|
||||
"total": 0,
|
||||
"healthy": 0,
|
||||
"degraded": 0,
|
||||
}
|
||||
|
||||
|
||||
def _qs(**overrides):
|
||||
base = {
|
||||
"save_queue_size": 0,
|
||||
"delete_queue_size": 0,
|
||||
"retry_queue_size": 0,
|
||||
"dead_letters": 0,
|
||||
"save_worker_alive": True,
|
||||
"delete_worker_alive": True,
|
||||
"retry_worker_alive": True,
|
||||
"reconciler": _RECONCILER_OK,
|
||||
"peer_sync": _PEER_SYNC_OFF,
|
||||
}
|
||||
base.update(overrides)
|
||||
return base
|
||||
|
||||
|
||||
def _api(qs=None):
|
||||
wm = MagicMock()
|
||||
wm.queue_status.return_value = qs or _qs()
|
||||
return StatusAPI(wm)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _compute_overall
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_overall_ok_all_healthy():
|
||||
assert StatusAPI._compute_overall(_qs()) == "ok"
|
||||
|
||||
|
||||
def test_overall_error_save_worker_dead():
|
||||
assert StatusAPI._compute_overall(_qs(save_worker_alive=False)) == "error"
|
||||
|
||||
|
||||
def test_overall_error_delete_worker_dead():
|
||||
assert StatusAPI._compute_overall(_qs(delete_worker_alive=False)) == "error"
|
||||
|
||||
|
||||
def test_overall_degraded_retries_pending():
|
||||
assert StatusAPI._compute_overall(_qs(retry_queue_size=3)) == "degraded"
|
||||
|
||||
|
||||
def test_overall_degraded_dead_letters():
|
||||
assert StatusAPI._compute_overall(_qs(dead_letters=1)) == "degraded"
|
||||
|
||||
|
||||
def test_overall_degraded_peer_unhealthy():
|
||||
ps = {**_PEER_SYNC_OFF, "degraded": 1}
|
||||
assert StatusAPI._compute_overall(_qs(peer_sync=ps)) == "degraded"
|
||||
|
||||
|
||||
def test_overall_error_takes_priority_over_degraded():
|
||||
"""error > degraded when both conditions are true."""
|
||||
assert (
|
||||
StatusAPI._compute_overall(
|
||||
_qs(save_worker_alive=False, retry_queue_size=5)
|
||||
)
|
||||
== "error"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _build — structure and zone count
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_build_structure(patch_connect):
|
||||
api = _api()
|
||||
result = api._build()
|
||||
|
||||
assert "status" in result
|
||||
assert "queues" in result
|
||||
assert "workers" in result
|
||||
assert "reconciler" in result
|
||||
assert "peer_sync" in result
|
||||
assert "zones" in result
|
||||
|
||||
|
||||
def test_build_zone_count_zero(patch_connect):
|
||||
api = _api()
|
||||
result = api._build()
|
||||
assert result["zones"]["total"] == 0
|
||||
|
||||
|
||||
def test_build_zone_count_with_domains(patch_connect):
|
||||
for d in ["a.com", "b.com", "c.com"]:
|
||||
patch_connect.add(Domain(domain=d, hostname="da1.example.com", username="admin"))
|
||||
patch_connect.commit()
|
||||
|
||||
api = _api()
|
||||
result = api._build()
|
||||
assert result["zones"]["total"] == 3
|
||||
|
||||
|
||||
def test_build_queues_forwarded(patch_connect):
|
||||
api = _api(_qs(save_queue_size=2, delete_queue_size=1, retry_queue_size=3, dead_letters=1))
|
||||
result = api._build()
|
||||
|
||||
assert result["queues"]["save"] == 2
|
||||
assert result["queues"]["delete"] == 1
|
||||
assert result["queues"]["retry"] == 3
|
||||
assert result["queues"]["dead_letters"] == 1
|
||||
|
||||
|
||||
def test_build_workers_forwarded(patch_connect):
|
||||
api = _api()
|
||||
result = api._build()
|
||||
|
||||
assert result["workers"]["save"] is True
|
||||
assert result["workers"]["delete"] is True
|
||||
assert result["workers"]["retry_drain"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# index — JSON encoding
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_index_returns_valid_json(patch_connect):
|
||||
api = _api()
|
||||
with MagicMock() as mock_resp:
|
||||
cherrypy.response = mock_resp
|
||||
cherrypy.response.headers = {}
|
||||
body = api.index()
|
||||
|
||||
data = json.loads(body)
|
||||
assert data["status"] == "ok"
|
||||
assert isinstance(data["zones"]["total"], int)
|
||||
Reference in New Issue
Block a user