From 3a80fd77de98af31fb59508f1fc64dc5babbb450 Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Fri, 5 Jun 2026 06:09:58 -0500 Subject: [PATCH 01/26] feat(admin): authentication gate for the admin console (26.18) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The admin console serves the full host/port inventory, /export.{json,csv}, the /api/v1/* query API, and the POST /scan trigger, but had no auth and no off-loopback guard — unlike the health server. Binding admin.addr off-loopback (e.g. 0.0.0.0, or via INVENTORY_ADMIN_ADDR) exposed everything on the network. - Add admin.auth_token (env INVENTORY_ADMIN_TOKEN): a shared secret gating every route. Accepts Authorization: Bearer or HTTP Basic auth with the token as the password (browsers get a native prompt). Constant-time compare; no-op when unset so the loopback default stays credential-free. - Add admin.ServerOptions{AuthToken} threaded through NewServer + runtime. - Validation refuses off-loopback admin binds without a token, mirroring the existing health.addr rule; chmod-600 enforcement extended to the new token. - Tests cover Bearer/Basic accept+reject, export & POST /scan gating (auth precedes CSRF), the loopback no-token regression, and config validation. Co-Authored-By: Claude Opus 4.8 --- cmd/internal/runtime/runtime.go | 1 + internal/admin/auth_test.go | 115 ++++++++++++++++++++++++++++++++ internal/admin/middleware.go | 34 +++++++++- internal/admin/server.go | 13 ++++ internal/admin/server_test.go | 6 +- internal/config/config.go | 20 +++++- internal/config/config_test.go | 50 +++++++++++++- 7 files changed, 230 insertions(+), 9 deletions(-) create mode 100644 internal/admin/auth_test.go diff --git a/cmd/internal/runtime/runtime.go b/cmd/internal/runtime/runtime.go index ce77024..8002fc2 100644 --- a/cmd/internal/runtime/runtime.go +++ b/cmd/internal/runtime/runtime.go @@ -148,6 +148,7 @@ func Run(opts Options) int { cfg.Admin.Addr, opts.Name, db.Hosts(), db.Ports(), db.Scans(), tracker.Get, a.Trigger, + admin.ServerOptions{AuthToken: cfg.Admin.AuthToken}, ) if err != nil { slog.Error("failed to create admin server", "err", err) diff --git a/internal/admin/auth_test.go b/internal/admin/auth_test.go new file mode 100644 index 0000000..438602d --- /dev/null +++ b/internal/admin/auth_test.go @@ -0,0 +1,115 @@ +package admin_test + +import ( + "context" + "net/http" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/Ronin48/NetworkInventoryAgent/internal/admin" +) + +const testAdminToken = "s3cr3t-admin-token" + +// newAuthedServer starts an admin server gated by testAdminToken. A non-nil +// trigger is wired so POST /scan exercises the auth gate (which must run before +// the CSRF check). +func newAuthedServer(t *testing.T) *admin.Server { + t.Helper() + srv, err := admin.NewServer(":0", "test-agent", + &mockHostStore{}, &mockPortStore{}, &mockScanStore{}, + healthyStatus, func() bool { return true }, + admin.ServerOptions{AuthToken: testAdminToken}, + ) + require.NoError(t, err) + require.NoError(t, srv.Start()) + t.Cleanup(func() { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + _ = srv.Shutdown(ctx) + }) + return srv +} + +// doReq issues a request to the server with optional mutation (headers/auth). +func doReq(t *testing.T, srv *admin.Server, method, path string, mut func(*http.Request)) *http.Response { + t.Helper() + req, err := http.NewRequest(method, "http://"+srv.Addr()+path, nil) + require.NoError(t, err) + if mut != nil { + mut(req) + } + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + return resp +} + +func TestAuth_NoCredentials_401WithChallenge(t *testing.T) { + srv := newAuthedServer(t) + resp := doReq(t, srv, http.MethodGet, "/", nil) + defer func() { _ = resp.Body.Close() }() + + assert.Equal(t, http.StatusUnauthorized, resp.StatusCode) + assert.Contains(t, resp.Header.Get("WWW-Authenticate"), "Basic") +} + +func TestAuth_CorrectBearer_200(t *testing.T) { + srv := newAuthedServer(t) + resp := doReq(t, srv, http.MethodGet, "/", func(r *http.Request) { + r.Header.Set("Authorization", "Bearer "+testAdminToken) + }) + defer func() { _ = resp.Body.Close() }() + assert.Equal(t, http.StatusOK, resp.StatusCode) +} + +func TestAuth_CorrectBasicPassword_200(t *testing.T) { + srv := newAuthedServer(t) + resp := doReq(t, srv, http.MethodGet, "/", func(r *http.Request) { + r.SetBasicAuth("anyuser", testAdminToken) + }) + defer func() { _ = resp.Body.Close() }() + assert.Equal(t, http.StatusOK, resp.StatusCode) +} + +func TestAuth_WrongBearer_401(t *testing.T) { + srv := newAuthedServer(t) + resp := doReq(t, srv, http.MethodGet, "/", func(r *http.Request) { + r.Header.Set("Authorization", "Bearer wrong") + }) + defer func() { _ = resp.Body.Close() }() + assert.Equal(t, http.StatusUnauthorized, resp.StatusCode) +} + +func TestAuth_WrongBasicPassword_401(t *testing.T) { + srv := newAuthedServer(t) + resp := doReq(t, srv, http.MethodGet, "/", func(r *http.Request) { + r.SetBasicAuth("anyuser", "wrong") + }) + defer func() { _ = resp.Body.Close() }() + assert.Equal(t, http.StatusUnauthorized, resp.StatusCode) +} + +// Export and the scan trigger must be behind the same gate. The unauthenticated +// POST /scan must be rejected by auth (401) before CSRF runs (403). +func TestAuth_GatesExportAndScanTrigger(t *testing.T) { + srv := newAuthedServer(t) + + exp := doReq(t, srv, http.MethodGet, "/export.json", nil) + _ = exp.Body.Close() + assert.Equal(t, http.StatusUnauthorized, exp.StatusCode, "export must require auth") + + scan := doReq(t, srv, http.MethodPost, "/scan", nil) + _ = scan.Body.Close() + assert.Equal(t, http.StatusUnauthorized, scan.StatusCode, "auth must precede CSRF on POST /scan") +} + +// Regression guard: the loopback default (no token) stays credential-free. +func TestAuth_EmptyToken_NoGate(t *testing.T) { + srv := newTestServer(t, &mockHostStore{}, &mockPortStore{}, &mockScanStore{}) + resp := doReq(t, srv, http.MethodGet, "/", nil) + defer func() { _ = resp.Body.Close() }() + assert.Equal(t, http.StatusOK, resp.StatusCode) +} diff --git a/internal/admin/middleware.go b/internal/admin/middleware.go index be2400c..ec37235 100644 --- a/internal/admin/middleware.go +++ b/internal/admin/middleware.go @@ -4,13 +4,16 @@ import ( "crypto/subtle" "log/slog" "net/http" + "strings" "time" ) -// middleware wraps the mux with three cross-cutting concerns: +// middleware wraps the mux with four cross-cutting concerns: // - per-request access logging (one slog record per response) // - baseline security headers (defence-in-depth for the loopback console; // non-trivial once operators bind it to 0.0.0.0) +// - shared-secret authentication (no-op when no token is configured, so the +// loopback default stays credential-free) // - CSRF protection on state-changing methods (POST/PUT/PATCH/DELETE) // // CSP keeps 'unsafe-inline' for styles because the templates embed a single @@ -31,6 +34,12 @@ func (s *Server) middleware(next http.Handler) http.Handler { "form-action 'self'; "+ "frame-ancestors 'none'") + if !s.checkAuth(r) { + w.Header().Set("WWW-Authenticate", `Basic realm="inventory-admin"`) + http.Error(w, "unauthorized", http.StatusUnauthorized) + return + } + if !s.checkCSRF(r) { http.Error(w, "csrf token mismatch", http.StatusForbidden) return @@ -70,6 +79,29 @@ func (s *Server) checkCSRF(r *http.Request) bool { return subtle.ConstantTimeCompare([]byte(got), []byte(s.csrfToken)) == 1 } +// checkAuth enforces the shared-secret gate on every request. It is a no-op +// when the server was constructed without an auth token, so the loopback +// default needs no credentials. +// +// The console is a browser UI, so two credential carriers are accepted: a +// `Authorization: Bearer ` header (curl, the JSON API, exports) and +// HTTP Basic auth with the token as the password and any username (so a +// browser shows its native login dialog). Both are compared in constant time +// to deny timing-based bisection of the token. +func (s *Server) checkAuth(r *http.Request) bool { + if s.authToken == "" { + return true + } + want := []byte(s.authToken) + if got, ok := strings.CutPrefix(r.Header.Get("Authorization"), "Bearer "); ok { + return subtle.ConstantTimeCompare([]byte(got), want) == 1 + } + if _, pass, ok := r.BasicAuth(); ok { + return subtle.ConstantTimeCompare([]byte(pass), want) == 1 + } + return false +} + // statusRecorder lets the access-log middleware capture the response status // that handlers chose. Defaults to 200 because http.ResponseWriter only // records WriteHeader when an explicit status is set. diff --git a/internal/admin/server.go b/internal/admin/server.go index 1743900..71bedd1 100644 --- a/internal/admin/server.go +++ b/internal/admin/server.go @@ -30,6 +30,16 @@ var templateFS embed.FS // already pending. Pass a nil Trigger to omit the endpoint. type Trigger func() bool +// ServerOptions carries the optional configuration for NewServer. The zero +// value produces the loopback-friendly default: no auth, so a browser or curl +// reaches the console without credentials. +type ServerOptions struct { + // AuthToken, when non-empty, gates every route. Clients present it as + // `Authorization: Bearer ` or via HTTP Basic auth using the token + // as the password (any username). Mismatches return 401 in constant time. + AuthToken string +} + // Server is the admin web console HTTP server. type Server struct { agentName string @@ -39,6 +49,7 @@ type Server struct { status func() health.Status trigger Trigger csrfToken string + authToken string srv *http.Server tmpl *template.Template } @@ -54,6 +65,7 @@ func NewServer( scans store.ScanStore, status func() health.Status, trigger Trigger, + opts ServerOptions, ) (*Server, error) { tmpl, err := template.New("").Funcs(funcMap).ParseFS(templateFS, "templates/*.html") if err != nil { @@ -73,6 +85,7 @@ func NewServer( status: status, trigger: trigger, csrfToken: csrf, + authToken: opts.AuthToken, tmpl: tmpl, } diff --git a/internal/admin/server_test.go b/internal/admin/server_test.go index 03d6298..fca1f89 100644 --- a/internal/admin/server_test.go +++ b/internal/admin/server_test.go @@ -141,7 +141,7 @@ func healthyStatus() health.Status { func newTestServer(t *testing.T, hosts *mockHostStore, ports *mockPortStore, scans *mockScanStore) *admin.Server { t.Helper() - srv, err := admin.NewServer(":0", "test-agent", hosts, ports, scans, healthyStatus, nil) + srv, err := admin.NewServer(":0", "test-agent", hosts, ports, scans, healthyStatus, nil, admin.ServerOptions{}) require.NoError(t, err) require.NoError(t, srv.Start()) t.Cleanup(func() { @@ -162,7 +162,7 @@ func get(t *testing.T, srv *admin.Server, path string) *http.Response { // --- tests --- func TestNewServer_ParsesTemplates(t *testing.T) { - _, err := admin.NewServer(":0", "agent", &mockHostStore{}, &mockPortStore{}, &mockScanStore{}, healthyStatus, nil) + _, err := admin.NewServer(":0", "agent", &mockHostStore{}, &mockPortStore{}, &mockScanStore{}, healthyStatus, nil, admin.ServerOptions{}) require.NoError(t, err, "template parsing should succeed on a clean build") } @@ -373,7 +373,7 @@ func TestAllPages_ContentType(t *testing.T) { } func TestServer_Shutdown(t *testing.T) { - srv, err := admin.NewServer(":0", "agent", &mockHostStore{}, &mockPortStore{}, &mockScanStore{}, healthyStatus, nil) + srv, err := admin.NewServer(":0", "agent", &mockHostStore{}, &mockPortStore{}, &mockScanStore{}, healthyStatus, nil, admin.ServerOptions{}) require.NoError(t, err) require.NoError(t, srv.Start()) diff --git a/internal/config/config.go b/internal/config/config.go index c823c30..e26bda2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -308,9 +308,17 @@ type HealthConfig struct { type AdminConfig struct { // Addr is the address the admin web console listens on. - // Default is 127.0.0.1:9090 (loopback only). Bind to 0.0.0.0 only in - // trusted network environments, as the console is unauthenticated (OWASP A01/A05). + // Default is 127.0.0.1:9090 (loopback only). When bound off-loopback the + // agent refuses to start unless AuthToken is also set, because the console + // exposes the full inventory, exports, the JSON API, and POST /scan + // (OWASP A01/A05). Addr string `json:"addr"` + // AuthToken is the shared secret required to reach the admin console when + // Addr is not a loopback bind. Clients authenticate with either + // `Authorization: Bearer ` (curl/API) or HTTP Basic auth using the + // token as the password (so browsers get a native login prompt). Leave + // empty (and the file chmod 600) for the loopback-only default deployment. + AuthToken string `json:"auth_token,omitempty"` } type WatchdogConfig struct { @@ -429,7 +437,7 @@ func Load(path string) (*Config, error) { // token is readable by group or other. The SECURITY.md advice is chmod 600; // catching this at startup beats discovering it after a token leak. func (c *Config) checkSecretsPerm(path string, mode os.FileMode) error { - hasSecret := c.Health.AuthToken != "" || c.Watchdog.PeerToken != "" + hasSecret := c.Health.AuthToken != "" || c.Watchdog.PeerToken != "" || c.Admin.AuthToken != "" if !hasSecret { return nil } @@ -459,6 +467,9 @@ func (c *Config) validate() error { if !isLoopbackBind(c.Health.Addr) && c.Health.AuthToken == "" { return fmt.Errorf("health.addr %q is not loopback; set health.auth_token to gate /health and /status (the endpoints expose host counts; binding off-loopback without a token is OWASP A01/A05)", c.Health.Addr) } + if !isLoopbackBind(c.Admin.Addr) && c.Admin.AuthToken == "" { + return fmt.Errorf("admin.addr %q is not loopback; set admin.auth_token (or INVENTORY_ADMIN_TOKEN) to gate the console, exports, JSON API, and POST /scan (binding off-loopback without a token is OWASP A01/A05)", c.Admin.Addr) + } return nil } @@ -518,6 +529,9 @@ func applyEnv(cfg *Config) { if v := os.Getenv("INVENTORY_PEER_TOKEN"); v != "" { cfg.Watchdog.PeerToken = v } + if v := os.Getenv("INVENTORY_ADMIN_TOKEN"); v != "" { + cfg.Admin.AuthToken = v + } // Listener addresses also come from env so containerised deployments // can repoint without rewriting the JSON file (e.g. // INVENTORY_HEALTH_ADDR=0.0.0.0:18080 + INVENTORY_AUTH_TOKEN=... in diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 8011435..02384c2 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -3,6 +3,7 @@ package config_test import ( "encoding/json" "os" + "path/filepath" "testing" "time" @@ -67,8 +68,10 @@ func TestLoad_ValidFile(t *testing.T) { func TestLoad_AdminConfig(t *testing.T) { data := map[string]any{ - "log": map[string]any{"level": "info", "format": "text"}, - "admin": map[string]any{"addr": "0.0.0.0:9090"}, + "log": map[string]any{"level": "info", "format": "text"}, + // An off-loopback bind now requires a token (see the off-loopback + // validation tests below); include one so this parse check passes. + "admin": map[string]any{"addr": "0.0.0.0:9090", "auth_token": "tok"}, } cfg, err := config.Load(writeTempConfig(t, data)) require.NoError(t, err) @@ -197,6 +200,49 @@ func TestLoad_InvalidPeerAddr_NoHost(t *testing.T) { assert.Contains(t, err.Error(), "peer_addr") } +func TestLoad_AdminOffLoopbackWithoutToken_Error(t *testing.T) { + data := map[string]any{ + "log": map[string]any{"level": "info", "format": "text"}, + "admin": map[string]any{"addr": "0.0.0.0:9090"}, + } + _, err := config.Load(writeTempConfig(t, data)) + require.Error(t, err) + assert.Contains(t, err.Error(), "admin.addr") +} + +func TestLoad_AdminOffLoopbackWithToken_OK(t *testing.T) { + data := map[string]any{ + "log": map[string]any{"level": "info", "format": "text"}, + "admin": map[string]any{"addr": "0.0.0.0:9090", "auth_token": "tok"}, + } + cfg, err := config.Load(writeTempConfig(t, data)) + require.NoError(t, err) + assert.Equal(t, "tok", cfg.Admin.AuthToken) +} + +func TestLoad_AdminToken_EnvOverride(t *testing.T) { + t.Setenv("INVENTORY_ADMIN_ADDR", "0.0.0.0:9090") + t.Setenv("INVENTORY_ADMIN_TOKEN", "env-tok") + + cfg, err := config.Load("/nonexistent/config.json") + require.NoError(t, err, "off-loopback admin bind is satisfied by the env token") + assert.Equal(t, "env-tok", cfg.Admin.AuthToken) +} + +func TestLoad_AdminTokenWorldReadable_Error(t *testing.T) { + b, err := json.Marshal(map[string]any{ + "log": map[string]any{"level": "info", "format": "text"}, + "admin": map[string]any{"addr": "127.0.0.1:9090", "auth_token": "tok"}, + }) + require.NoError(t, err) + path := filepath.Join(t.TempDir(), "config.json") + require.NoError(t, os.WriteFile(path, b, 0o644)) + + _, err = config.Load(path) + require.Error(t, err) + assert.Contains(t, err.Error(), "chmod 600") +} + func TestDuration_UnmarshalJSON_String(t *testing.T) { var d config.Duration require.NoError(t, json.Unmarshal([]byte(`"5m"`), &d)) From 543b99fb3cab6c103033827fea52ba631488b1d6 Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Fri, 5 Jun 2026 06:09:58 -0500 Subject: [PATCH 02/26] =?UTF-8?q?docs:=20admin=20auth=20gate=20=E2=80=94?= =?UTF-8?q?=20README,=20SECURITY,=20ChangeLog=2026.18?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document admin.auth_token / INVENTORY_ADMIN_TOKEN in the config and env tables, revise the admin-console endpoint note, update the OWASP A01/A07 rows and operator guidance, and add the 26.18 ChangeLog entry. Co-Authored-By: Claude Opus 4.8 --- ChangeLog.md | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 4 +++- SECURITY.md | 8 +++++--- 3 files changed, 62 insertions(+), 4 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 86bb4fc..f73e229 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -17,6 +17,60 @@ _No unreleased changes._ --- +## 26.18 — 2026-06-05 + +Admin console authentication gate. The admin console — which serves the +full host/port inventory, `/export.{json,csv}`, the `/api/v1/*` query API, +and the `POST /scan` trigger — previously had no authentication and no +off-loopback guard, unlike the health server. Binding `admin.addr` to a +non-loopback address (e.g. `0.0.0.0` for Docker, or via +`INVENTORY_ADMIN_ADDR`) exposed everything to anyone on the network. This +closes that asymmetry. Post-backlog security hardening (no `Planning.md` +number). + +### Added + +- **`admin.auth_token` config field** (env: `INVENTORY_ADMIN_TOKEN`) — a + shared secret that gates every admin route. When set, clients + authenticate with either `Authorization: Bearer ` (curl, the JSON + API, exports, scripts) or HTTP Basic auth using the token as the + password and any username (so browsers show a native login prompt). + Tokens are compared in constant time. Empty/unset is a no-op, so the + loopback default stays credential-free. +- **`admin.ServerOptions`** — carries `AuthToken` into `admin.NewServer`, + mirroring `health.ServerOptions`. + +### Changed + +- **`admin.NewServer` takes a trailing `ServerOptions` argument.** In-tree + callers (runtime + tests) updated. The admin package has no external + callers, so no compatibility shim was added. +- **Off-loopback admin binds now require a token.** Config validation + refuses to start when `admin.addr` is non-loopback and no + `admin.auth_token` is set — the same rule already enforced for + `health.addr`. +- **`chmod 600` enforcement extended** — a config file carrying + `admin.auth_token` must not be group/world-readable, matching the + existing check for `health.auth_token` and `watchdog.peer_token`. + +### Tests + +- `internal/admin/auth_test.go` — no-creds → 401 + `WWW-Authenticate`; + correct Bearer → 200; correct Basic password → 200; wrong Bearer/Basic + → 401; exports and `POST /scan` gated (auth precedes CSRF); empty token + → ungated (loopback regression guard). +- `internal/config/config_test.go` — off-loopback admin without token → + error; with token → ok; `INVENTORY_ADMIN_TOKEN` override satisfies the + rule; world-readable file carrying an admin token → refused. + +### Notes + +- `go test ./...`, `go vet ./...`, and `golangci-lint run ./...` all green + (0 issues). The shipped `configs/*.json` use loopback binds, so no token + is required for the default local/paired deployments. + +--- + ## 26.17 — 2026-05-27 Documentation catch-up. No behaviour change — closes the gap between diff --git a/README.md b/README.md index 60f0c6a..162dc0c 100644 --- a/README.md +++ b/README.md @@ -366,6 +366,7 @@ Each agent reads a JSON config file and then applies environment variable overri | `health.client_ca_path` | — | When set, requires mTLS (clients must present a cert signed by this CA). | | **Admin console** | | | | `admin.addr` | `127.0.0.1:9090` | Listen address for the admin console + `/api/v1/*`. | +| `admin.auth_token` | — | Shared secret gating the whole console. Required when `admin.addr` is off-loopback. Clients send `Authorization: Bearer ` or HTTP Basic with the token as the password. | | **Watchdog** | | | | `watchdog.peer_addr` | — | Base URL of the partner agent's health server. | | `watchdog.peer_token` | — | Bearer token sent to the peer. Must match peer's `health.auth_token`. | @@ -421,6 +422,7 @@ fails fast if both are set. | `INVENTORY_ADMIN_ADDR` | `admin.addr` | | `INVENTORY_AUTH_TOKEN` | `health.auth_token` | | `INVENTORY_PEER_TOKEN` | `watchdog.peer_token` | +| `INVENTORY_ADMIN_TOKEN` | `admin.auth_token` | ## Health endpoints @@ -434,7 +436,7 @@ Both agents expose two HTTP endpoints used by the watchdog and for external moni | `/status` | GET | JSON-encoded status snapshot (see below) | | `/metrics` | GET | Prometheus text exposition format — counters for scans, probes, DB, watchdog, alerts; gauges for host count + peer-up state | -**Admin console** (default `127.0.0.1:9090`, unauthenticated — keep loopback unless on a trusted segment): +**Admin console** (default `127.0.0.1:9090`). Unauthenticated on the loopback default; set `admin.auth_token` (or `INVENTORY_ADMIN_TOKEN`) to gate every route below. A token is **required** when binding off-loopback — the agent refuses to start otherwise. Authenticate with `Authorization: Bearer ` or HTTP Basic auth using the token as the password (browsers get a native login prompt): | Endpoint | Method | Response | |----------|--------|----------| diff --git a/SECURITY.md b/SECURITY.md index afd0cf8..ee19855 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -43,13 +43,13 @@ The following table documents the project's posture against the [OWASP Top 10 (2 | # | Category | Status | Notes | |---|----------|--------|-------| -| A01 | Broken Access Control | ⚠️ Partial | `/health` and `/status` are intentionally unauthenticated for simplicity. The default bind address is `127.0.0.1` (loopback only). Operators who expose these endpoints on a wider interface accept responsibility for network-level access control. | +| A01 | Broken Access Control | ⚠️ Partial | `/health` and `/status` are unauthenticated on the loopback default but require `health.auth_token` when bound off-loopback (enforced at startup). The admin console (full inventory, exports, JSON API, `POST /scan`) likewise requires `admin.auth_token` when bound off-loopback — the agent refuses to start otherwise. Both default to `127.0.0.1` (loopback only). | | A02 | Cryptographic Failures | ✅ Pass | Peer-to-peer watchdog traffic supports TLS (with optional mTLS) — set `watchdog.tls.ca_cert_path` and `health.tls_cert_path`/`tls_key_path` in the configs. TLS 1.2+ enforced. Database is stored unencrypted; operators should apply filesystem-level encryption where needed. | | A03 | Injection | ✅ Pass | All SQL queries use parameterized `?` placeholders. No shell commands are invoked; the scanner uses `net.Dialer` directly. | | A04 | Insecure Design | ✅ Pass | Health server binds to loopback by default. `peer_addr` is validated to `http`/`https` schemes only, preventing SSRF via alternate URI schemes. No user-controlled input reaches internal APIs without validation. | | A05 | Security Misconfiguration | ✅ Pass | Default `health.addr` is `127.0.0.1:8080` (loopback only). HTTP server has explicit read, write, and idle timeouts. Response bodies from peers are capped at 1 MiB. | | A06 | Vulnerable Components | ✅ Pass | All dependencies are pure Go (no C libraries). `go.sum` is committed and verified on every build. `govulncheck` is required before dependency PRs (see CONTRIBUTING.md). | -| A07 | Auth Failures | ⚠️ Partial | No authentication on health endpoints by design. Mitigated by loopback-only default and operator guidance in this document. | +| A07 | Auth Failures | ⚠️ Partial | Loopback-only defaults are unauthenticated by design. Off-loopback binds of both the health server and the admin console require a shared bearer/Basic token, enforced at startup; tokens are compared in constant time. | | A08 | Data Integrity | ✅ Pass | `go.sum` provides cryptographic verification of all module downloads. Config validation rejects malformed or unexpected values at startup. | | A09 | Logging & Monitoring | ✅ Pass | Structured `log/slog` output in text or JSON format. All three watchdog failure conditions (liveness, freshness, consistency) are logged at `WARN` or `ERROR` level with structured fields. | | A10 | SSRF | ✅ Pass | `peer_addr` is validated to `http`/`https` only at config load time. Response bodies from external HTTP calls are limited to 1 MiB via `io.LimitReader`. Scanner targets come from operator-controlled config, not external input. | @@ -62,7 +62,9 @@ The OWASP AI Top 10 is **not applicable** to this project. NetworkInventoryAgent NetworkInventoryAgent is designed to run on a trusted internal network. Before deploying, consider the following: -**Health endpoints are unauthenticated.** The `/health` and `/status` endpoints expose agent name, scan counts, host counts, and timestamps to anyone who can reach the listening address. The default bind address is `127.0.0.1` (loopback only). Do not change this to `0.0.0.0` unless the network segment is trusted or access is controlled at the firewall. +**Health endpoints are unauthenticated.** The `/health` and `/status` endpoints expose agent name, scan counts, host counts, and timestamps to anyone who can reach the listening address. The default bind address is `127.0.0.1` (loopback only). Binding off-loopback requires `health.auth_token` (or `INVENTORY_AUTH_TOKEN`); the agent refuses to start without it. + +**The admin console is gated off-loopback.** The console at `admin.addr` (default `127.0.0.1:9090`) serves the full host/port inventory, `/export.json|csv`, the `/api/v1/*` query API, and the `POST /scan` trigger. On the loopback default it is unauthenticated for convenience; binding it off-loopback (e.g. `0.0.0.0` for Docker, or via `INVENTORY_ADMIN_ADDR`) requires `admin.auth_token` (or `INVENTORY_ADMIN_TOKEN`) and the agent refuses to start without it. Clients authenticate with `Authorization: Bearer ` or HTTP Basic auth using the token as the password. **Peer communication can use TLS.** Watchdog checks between Wintermute and Neuromancer default to plain HTTP for the loopback case. For off-loopback deployments, switch `watchdog.peer_addr` to `https://…`, set `watchdog.tls.ca_cert_path` to the CA that signs the peer's cert, and set `health.tls_cert_path` / `health.tls_key_path` on the peer. For full mutual auth, set `health.client_ca_path` on both sides and `watchdog.tls.client_cert_path` / `client_key_path` on the dialer side. Bearer tokens stack on top of TLS. From ff34b407b79f14c4f04d0bb5fc127477971efdfd Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Fri, 5 Jun 2026 06:19:45 -0500 Subject: [PATCH 03/26] feat(config): scheme-validate alert sink URLs (26.19) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit watchdog.peer_addr was scheme-validated at config load, but alerts.webhook.url reached the HTTP client with no validation — a config value like file:///etc/passwd or gopher://… was used verbatim. Syslog addresses were only validated when the sink dialed. - Add validateSinkURL helper; validate alerts.webhook.url (http/https) and alerts.syslog.addr (udp/tcp) at config load, mirroring validatePeerAddr. - Fail-fast at boot on an invalid value instead of failing on first event. - Private/internal hosts are intentionally not blocked (internal receivers are legitimate, consistent with peer_addr allowing loopback); the guard targets scheme confusion (OWASP A10), not egress policy. - Tests cover webhook/syslog bad-scheme, no-host, and valid cases. Co-Authored-By: Claude Opus 4.8 --- internal/config/config.go | 33 ++++++++++++++++++++++ internal/config/config_test.go | 50 ++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/internal/config/config.go b/internal/config/config.go index e26bda2..da77721 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -470,6 +470,16 @@ func (c *Config) validate() error { if !isLoopbackBind(c.Admin.Addr) && c.Admin.AuthToken == "" { return fmt.Errorf("admin.addr %q is not loopback; set admin.auth_token (or INVENTORY_ADMIN_TOKEN) to gate the console, exports, JSON API, and POST /scan (binding off-loopback without a token is OWASP A01/A05)", c.Admin.Addr) } + if c.Alerts.Webhook.URL != "" { + if err := validateSinkURL(c.Alerts.Webhook.URL, "http", "https"); err != nil { + return fmt.Errorf("alerts.webhook.url: %w", err) + } + } + if c.Alerts.Syslog.Addr != "" { + if err := validateSinkURL(c.Alerts.Syslog.Addr, "udp", "tcp"); err != nil { + return fmt.Errorf("alerts.syslog.addr: %w", err) + } + } return nil } @@ -510,6 +520,29 @@ func validatePeerAddr(raw string) error { return nil } +// validateSinkURL rejects alert-sink targets (webhook.url, syslog.addr) whose +// scheme is not in the allowed set, or that carry no host. This is the same +// scheme-confusion guard applied to watchdog.peer_addr (OWASP A10): without it +// a webhook URL like file:///etc/passwd or gopher://… reaches the HTTP client +// verbatim. It does not block private/internal hosts — internal receivers +// (an in-cluster collector, localhost) are a legitimate, common deployment. +func validateSinkURL(raw string, schemes ...string) error { + u, err := url.Parse(raw) + if err != nil { + return fmt.Errorf("invalid URL %q: %w", raw, err) + } + scheme := strings.ToLower(u.Scheme) + for _, allowed := range schemes { + if scheme == allowed { + if u.Host == "" { + return fmt.Errorf("missing host in %q", raw) + } + return nil + } + } + return fmt.Errorf("scheme %q not allowed; must be one of %v", scheme, schemes) +} + func applyEnv(cfg *Config) { if v := os.Getenv("INVENTORY_DB_PATH"); v != "" { cfg.Database.Path = v diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 02384c2..b50cc93 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -243,6 +243,56 @@ func TestLoad_AdminTokenWorldReadable_Error(t *testing.T) { assert.Contains(t, err.Error(), "chmod 600") } +func TestLoad_WebhookURL_BadScheme(t *testing.T) { + data := map[string]any{ + "log": map[string]any{"level": "info", "format": "text"}, + "alerts": map[string]any{"webhook": map[string]any{"url": "file:///etc/passwd"}}, + } + _, err := config.Load(writeTempConfig(t, data)) + require.Error(t, err) + assert.Contains(t, err.Error(), "alerts.webhook.url") +} + +func TestLoad_WebhookURL_NoHost(t *testing.T) { + data := map[string]any{ + "log": map[string]any{"level": "info", "format": "text"}, + "alerts": map[string]any{"webhook": map[string]any{"url": "https://"}}, + } + _, err := config.Load(writeTempConfig(t, data)) + require.Error(t, err) + assert.Contains(t, err.Error(), "alerts.webhook.url") +} + +func TestLoad_WebhookURL_Valid(t *testing.T) { + data := map[string]any{ + "log": map[string]any{"level": "info", "format": "text"}, + "alerts": map[string]any{"webhook": map[string]any{"url": "https://hook.example.com/events"}}, + } + cfg, err := config.Load(writeTempConfig(t, data)) + require.NoError(t, err) + assert.Equal(t, "https://hook.example.com/events", cfg.Alerts.Webhook.URL) +} + +func TestLoad_SyslogAddr_BadScheme(t *testing.T) { + data := map[string]any{ + "log": map[string]any{"level": "info", "format": "text"}, + "alerts": map[string]any{"syslog": map[string]any{"addr": "http://syslog.example:514"}}, + } + _, err := config.Load(writeTempConfig(t, data)) + require.Error(t, err) + assert.Contains(t, err.Error(), "alerts.syslog.addr") +} + +func TestLoad_SyslogAddr_Valid(t *testing.T) { + data := map[string]any{ + "log": map[string]any{"level": "info", "format": "text"}, + "alerts": map[string]any{"syslog": map[string]any{"addr": "udp://syslog.example:514"}}, + } + cfg, err := config.Load(writeTempConfig(t, data)) + require.NoError(t, err) + assert.Equal(t, "udp://syslog.example:514", cfg.Alerts.Syslog.Addr) +} + func TestDuration_UnmarshalJSON_String(t *testing.T) { var d config.Duration require.NoError(t, json.Unmarshal([]byte(`"5m"`), &d)) From e6859ba92fec8bc98e02324b0621387ba571f708 Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Fri, 5 Jun 2026 06:19:45 -0500 Subject: [PATCH 04/26] =?UTF-8?q?docs:=20alert-sink=20URL=20validation=20?= =?UTF-8?q?=E2=80=94=20README,=20SECURITY,=20ChangeLog=2026.19?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note scheme validation on alerts.webhook.url / alerts.syslog.addr in the config table, update the OWASP A10 row to cover all outbound sink targets, and add the 26.19 ChangeLog entry. Co-Authored-By: Claude Opus 4.8 --- ChangeLog.md | 36 ++++++++++++++++++++++++++++++++++++ README.md | 4 ++-- SECURITY.md | 2 +- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index f73e229..ec59aa9 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -17,6 +17,42 @@ _No unreleased changes._ --- +## 26.19 — 2026-06-05 + +Alert-sink URL validation. `watchdog.peer_addr` was scheme-validated at +config load, but `alerts.webhook.url` was passed to the HTTP client with +no validation at all — a typo or hostile config value like +`file:///etc/passwd` or `gopher://…` reached the client verbatim. Syslog +addresses were validated only when the sink dialed. This extends the +existing peer-address guard to every outbound sink target (OWASP A10). +Post-backlog security hardening (no `Planning.md` number). + +### Added + +- **`validateSinkURL`** in `internal/config` — a shared scheme/host check + used for both alert sinks. + +### Changed + +- **`alerts.webhook.url` is now scheme-validated at config load** — + rejected unless `http` or `https` with a non-empty host. The agent + refuses to start on an invalid value instead of failing silently on the + first event. +- **`alerts.syslog.addr` is now scheme-validated at config load** too + (`udp`/`tcp` + host), giving a clear boot-time error before any network + dial is attempted. The eager-dial check in `NewSyslogSink` remains as + defence in depth. + +### Notes + +- Private/internal hosts are deliberately **not** blocked — internal + webhook receivers and localhost syslog are legitimate, common + deployments, consistent with `peer_addr` allowing loopback. The guard + targets scheme confusion, not network egress policy. +- `go test ./...`, `go vet ./...`, and `golangci-lint run ./...` all green. + +--- + ## 26.18 — 2026-06-05 Admin console authentication gate. The admin console — which serves the diff --git a/README.md b/README.md index 162dc0c..2213a2f 100644 --- a/README.md +++ b/README.md @@ -380,9 +380,9 @@ Each agent reads a JSON config file and then applies environment variable overri | **Tracing** | | | | `tracing.endpoint` | — | OTLP/HTTP collector URL. Empty = no-op exporter (instrumentation active, spans discarded). | | **Alerts** | | | -| `alerts.webhook.url` | — | HTTP POST target for host.discovered / host.vanished events. | +| `alerts.webhook.url` | — | HTTP POST target for host.discovered / host.vanished events. Must be `http`/`https`; scheme-validated at startup. | | `alerts.webhook.auth_header` | — | Verbatim `Authorization` header (e.g. `Bearer abc123`). | -| `alerts.syslog.addr` | — | `udp://host:514` or `tcp://host:514`. RFC 5424. | +| `alerts.syslog.addr` | — | `udp://host:514` or `tcp://host:514`. RFC 5424. Scheme-validated at startup. | | `alerts.syslog.tag` | `network-inventory` | APP-NAME field. | | `alerts.syslog.facility` | `16` (local0) | RFC 5424 facility number 0..23. | diff --git a/SECURITY.md b/SECURITY.md index ee19855..acac634 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -52,7 +52,7 @@ The following table documents the project's posture against the [OWASP Top 10 (2 | A07 | Auth Failures | ⚠️ Partial | Loopback-only defaults are unauthenticated by design. Off-loopback binds of both the health server and the admin console require a shared bearer/Basic token, enforced at startup; tokens are compared in constant time. | | A08 | Data Integrity | ✅ Pass | `go.sum` provides cryptographic verification of all module downloads. Config validation rejects malformed or unexpected values at startup. | | A09 | Logging & Monitoring | ✅ Pass | Structured `log/slog` output in text or JSON format. All three watchdog failure conditions (liveness, freshness, consistency) are logged at `WARN` or `ERROR` level with structured fields. | -| A10 | SSRF | ✅ Pass | `peer_addr` is validated to `http`/`https` only at config load time. Response bodies from external HTTP calls are limited to 1 MiB via `io.LimitReader`. Scanner targets come from operator-controlled config, not external input. | +| A10 | SSRF | ✅ Pass | All outbound targets are scheme-validated at config load: `watchdog.peer_addr` and `alerts.webhook.url` to `http`/`https`, `alerts.syslog.addr` to `udp`/`tcp`. This blocks scheme-confusion vectors (`file://`, `gopher://`, …) before the URL reaches a client. Response bodies from external HTTP calls are limited to 1 MiB via `io.LimitReader`. Scanner targets come from operator-controlled config, not external input. | ## OWASP AI Top 10 From b137e72382c6d0f83c5c83a7af93ec20dd6fc7b9 Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Fri, 5 Jun 2026 06:27:54 -0500 Subject: [PATCH 05/26] feat(agent): scan-history retention via scanner.scan_history_ttl (26.20) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The scans table grew without bound — hosts had host_ttl pruning, but scan records accumulated forever (~105K rows/year at the 5m default), bloating the DB and the unbounded /scans view. - Add store.ScanStore.DeleteBefore(ctx, cutoff) + SQLite impl (single bounded DELETE returning the row count). - Add scanner.scan_history_ttl config; zero (default) keeps full history. - Agent.pruneScans runs each cycle after the host prune; the Agent retains the ScanStore already passed to New (no signature change). - Add inventory_scans_pruned_total metric. - Tests: sqlite DeleteBefore (match + no-match), agent prune on/off, config parse + default. Mock ScanStores gain DeleteBefore. Co-Authored-By: Claude Opus 4.8 --- internal/admin/server_test.go | 19 ++++++++ internal/agent/agent.go | 27 ++++++++++++ internal/agent/agent_test.go | 74 ++++++++++++++++++++++++++++++++ internal/config/config.go | 6 +++ internal/config/config_test.go | 15 +++++++ internal/metrics/metrics.go | 1 + internal/scanner/scanner_test.go | 13 ++++++ internal/sqlite/scan.go | 14 ++++++ internal/sqlite/scan_test.go | 40 +++++++++++++++++ internal/store/store.go | 5 +++ 10 files changed, 214 insertions(+) diff --git a/internal/admin/server_test.go b/internal/admin/server_test.go index fca1f89..fa8ad77 100644 --- a/internal/admin/server_test.go +++ b/internal/admin/server_test.go @@ -129,6 +129,25 @@ func (m *mockScanStore) List(_ context.Context) ([]*models.Scan, error) { return m.scans, m.err } +func (m *mockScanStore) DeleteBefore(_ context.Context, cutoff time.Time) (int64, error) { + m.mu.Lock() + defer m.mu.Unlock() + if m.err != nil { + return 0, m.err + } + kept := m.scans[:0] + var deleted int64 + for _, s := range m.scans { + if s.StartedAt.Before(cutoff) { + deleted++ + continue + } + kept = append(kept, s) + } + m.scans = kept + return deleted, nil +} + // --- helpers --- func healthyStatus() health.Status { diff --git a/internal/agent/agent.go b/internal/agent/agent.go index 32dd4a3..0fb3c2f 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -22,6 +22,7 @@ type Agent struct { name string cfg config.ScannerConfig hosts store.HostStore + scans store.ScanStore scanner *scanner.Scanner tracker *health.Tracker alerts alerts.Emitter @@ -64,6 +65,7 @@ func New( name: name, cfg: cfg, hosts: hosts, + scans: scans, scanner: scanner.New(scanner.Options{ Hosts: hosts, Ports: ports, @@ -205,6 +207,11 @@ func (a *Agent) runCycle(ctx context.Context, log *slog.Logger, forceAll bool) { log.Info("pruned stale hosts", "count", pruned) } + if pruned := a.pruneScans(ctx, log, started); pruned > 0 { + metrics.ScansPrunedTotal.Add(pruned) + log.Info("pruned old scan history", "count", pruned) + } + // Diff and fire events. Only meaningful when the cycle didn't // itself fail mid-way — declaring hosts "vanished" because of a // transient DB error would be alert spam. @@ -277,6 +284,26 @@ func (a *Agent) pruneStale(ctx context.Context, log *slog.Logger, now time.Time) return pruned } +// pruneScans deletes scan-history rows older than the configured +// ScanHistoryTTL and returns the number removed. Disabled when +// ScanHistoryTTL is 0 (the default), so existing deployments keep full +// history. Unlike host pruning this is a single bounded DELETE, not a +// list-then-delete loop. +func (a *Agent) pruneScans(ctx context.Context, log *slog.Logger, now time.Time) int64 { + ttl := a.cfg.ScanHistoryTTL.Duration + if ttl <= 0 { + return 0 + } + cutoff := now.Add(-ttl) + n, err := a.scans.DeleteBefore(ctx, cutoff) + if err != nil { + metrics.DBErrorsTotal.Inc() + log.Warn("prune: delete old scans failed", "err", err) + return 0 + } + return n +} + // snapshotByIP lists the current host inventory keyed by IP. Used pre- // cycle so the change-detection diff has a stable view to compare // against. A List failure logs and returns nil — the diff will then diff --git a/internal/agent/agent_test.go b/internal/agent/agent_test.go index 48b3440..cd9224d 100644 --- a/internal/agent/agent_test.go +++ b/internal/agent/agent_test.go @@ -170,6 +170,19 @@ func (m *mockScanStore) List(_ context.Context) ([]*models.Scan, error) { return out, nil } +func (m *mockScanStore) DeleteBefore(_ context.Context, cutoff time.Time) (int64, error) { + m.mu.Lock() + defer m.mu.Unlock() + var deleted int64 + for id, s := range m.scans { + if s.StartedAt.Before(cutoff) { + delete(m.scans, id) + deleted++ + } + } + return deleted, nil +} + // --- tests --- // TestAgent_TriggerCoalesces verifies the buffered trigger channel: the first @@ -322,6 +335,67 @@ func TestAgent_PruneDisabledWithoutTTL(t *testing.T) { assert.Len(t, remaining, 1, "with HostTTL=0 no pruning should happen") } +// TestAgent_PrunesOldScans verifies the ScanHistoryTTL retention logic. +func TestAgent_PrunesOldScans(t *testing.T) { + scans := newMockScanStore() + _, err := scans.Create(context.Background(), &models.Scan{Subnet: "10.0.0.0/24", StartedAt: time.Now()}) + require.NoError(t, err) + _, err = scans.Create(context.Background(), &models.Scan{Subnet: "10.0.0.0/24", StartedAt: time.Now().Add(-24 * time.Hour)}) + require.NoError(t, err) + + a, err := agent.New( + "test", + config.ScannerConfig{ + Subnets: nil, + ScanInterval: config.Duration{Duration: 50 * time.Millisecond}, + ScanHistoryTTL: config.Duration{Duration: 1 * time.Hour}, + }, + newMockHostStore(), + mockPortStore{}, + scans, + health.NewTracker("test"), + nil, + ) + require.NoError(t, err) + + ctx, cancel := context.WithTimeout(context.Background(), 75*time.Millisecond) + defer cancel() + a.Run(ctx) + + remaining, err := scans.List(context.Background()) + require.NoError(t, err) + assert.Len(t, remaining, 1, "the scan older than ScanHistoryTTL should have been pruned") +} + +// TestAgent_ScanPruneDisabledWithoutTTL verifies scan retention is off by default. +func TestAgent_ScanPruneDisabledWithoutTTL(t *testing.T) { + scans := newMockScanStore() + _, _ = scans.Create(context.Background(), &models.Scan{Subnet: "10.0.0.0/24", StartedAt: time.Now().Add(-24 * time.Hour)}) + + a, err := agent.New( + "test", + config.ScannerConfig{ + Subnets: nil, + ScanInterval: config.Duration{Duration: 50 * time.Millisecond}, + // ScanHistoryTTL left zero + }, + newMockHostStore(), + mockPortStore{}, + scans, + health.NewTracker("test"), + nil, + ) + require.NoError(t, err) + + ctx, cancel := context.WithTimeout(context.Background(), 75*time.Millisecond) + defer cancel() + a.Run(ctx) + + remaining, err := scans.List(context.Background()) + require.NoError(t, err) + assert.Len(t, remaining, 1, "with ScanHistoryTTL=0 no scan pruning should happen") +} + // TestAgent_EmitsHostVanishedOnPrune verifies that a host pruned via // the HostTTL path produces a host.vanished alert. The discovered path // is exercised indirectly: a host present in the pre-cycle snapshot but diff --git a/internal/config/config.go b/internal/config/config.go index da77721..17478f4 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -125,6 +125,12 @@ type ScannerConfig struct { // stale. Zero disables pruning. Pruning runs at the end of each scan // cycle and DELETEs rows where last_seen < now - HostTTL*ScanInterval. HostTTL Duration `json:"host_ttl,omitempty"` + // ScanHistoryTTL bounds how long completed scan records are retained. + // Zero (the default) keeps history forever. When set, the end of each + // cycle DELETEs scan rows whose started_at is older than now - + // ScanHistoryTTL, so the scans table and the /scans view stay bounded + // on long-running deployments. + ScanHistoryTTL Duration `json:"scan_history_ttl,omitempty"` // DeepProbe enables a second-pass scan of DeepProbePorts on every host // confirmed alive by the liveness probe. Disabled by default — operators // must opt in because the worst case wall-clock budget per host grows diff --git a/internal/config/config_test.go b/internal/config/config_test.go index b50cc93..dec3f93 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -243,6 +243,21 @@ func TestLoad_AdminTokenWorldReadable_Error(t *testing.T) { assert.Contains(t, err.Error(), "chmod 600") } +func TestLoad_ScanHistoryTTL(t *testing.T) { + data := map[string]any{ + "log": map[string]any{"level": "info", "format": "text"}, + "scanner": map[string]any{"scan_history_ttl": "168h"}, + } + cfg, err := config.Load(writeTempConfig(t, data)) + require.NoError(t, err) + assert.Equal(t, 168*time.Hour, cfg.Scanner.ScanHistoryTTL.Duration) +} + +func TestDefault_ScanHistoryTTL_DisabledByDefault(t *testing.T) { + cfg := config.Default() + assert.Equal(t, time.Duration(0), cfg.Scanner.ScanHistoryTTL.Duration, "retention off by default") +} + func TestLoad_WebhookURL_BadScheme(t *testing.T) { data := map[string]any{ "log": map[string]any{"level": "info", "format": "text"}, diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index e86bb71..d499981 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -162,6 +162,7 @@ var ( WatchdogFailuresTotal = Default.Counter("inventory_watchdog_failures_total", "Watchdog ticks where the peer was unreachable") WatchdogPeerDownTotal = Default.Counter("inventory_watchdog_peer_down_total", "Times the peer has been declared DOWN") HostsPrunedTotal = Default.Counter("inventory_hosts_pruned_total", "Hosts deleted by the staleness pruner") + ScansPrunedTotal = Default.Counter("inventory_scans_pruned_total", "Scan-history rows deleted by the retention pruner") ScanTriggersTotal = Default.Counter("inventory_scan_triggers_total", "On-demand scans accepted via POST /scan") HostCount = Default.Gauge("inventory_host_count", "Current number of hosts in the inventory") diff --git a/internal/scanner/scanner_test.go b/internal/scanner/scanner_test.go index f6f30b2..8f4c32d 100644 --- a/internal/scanner/scanner_test.go +++ b/internal/scanner/scanner_test.go @@ -128,6 +128,19 @@ func (m *mockScanStore) List(_ context.Context) ([]*models.Scan, error) { return out, nil } +func (m *mockScanStore) DeleteBefore(_ context.Context, cutoff time.Time) (int64, error) { + m.mu.Lock() + defer m.mu.Unlock() + var deleted int64 + for id, s := range m.scans { + if s.StartedAt.Before(cutoff) { + delete(m.scans, id) + deleted++ + } + } + return deleted, nil +} + func (m *mockScanStore) get(id int64) *models.Scan { m.mu.Lock() defer m.mu.Unlock() diff --git a/internal/sqlite/scan.go b/internal/sqlite/scan.go index 1883d4e..179dd45 100644 --- a/internal/sqlite/scan.go +++ b/internal/sqlite/scan.go @@ -75,3 +75,17 @@ func (r *ScanRepo) List(ctx context.Context) ([]*models.Scan, error) { } return scans, rows.Err() } + +// DeleteBefore removes scans whose started_at is strictly older than cutoff +// and returns the number of rows deleted. +func (r *ScanRepo) DeleteBefore(ctx context.Context, cutoff time.Time) (int64, error) { + res, err := r.writer.ExecContext(ctx, `DELETE FROM scans WHERE started_at < ?`, cutoff) + if err != nil { + return 0, fmt.Errorf("delete scans before %s: %w", cutoff.Format(time.RFC3339), err) + } + n, err := res.RowsAffected() + if err != nil { + return 0, fmt.Errorf("delete scans rows affected: %w", err) + } + return n, nil +} diff --git a/internal/sqlite/scan_test.go b/internal/sqlite/scan_test.go index a364a6c..8a65dda 100644 --- a/internal/sqlite/scan_test.go +++ b/internal/sqlite/scan_test.go @@ -87,6 +87,46 @@ func TestScanRepo_List_OrderedNewestFirst(t *testing.T) { assert.Equal(t, "10.0.0.0/8", list[2].Subnet, "oldest scan should be last") } +func TestScanRepo_DeleteBefore(t *testing.T) { + db := openTestDB(t) + ctx := t.Context() + + base := time.Now().UTC().Truncate(time.Second) + // Three scans at base-2h, base-1h, base. + for i, age := range []time.Duration{-2 * time.Hour, -1 * time.Hour, 0} { + s := newTestScan("10.0.0.0/8") + s.StartedAt = base.Add(age) + _, err := db.Scans().Create(ctx, s) + require.NoError(t, err, "seed %d", i) + } + + // Cutoff at base-90m removes only the base-2h row. + deleted, err := db.Scans().DeleteBefore(ctx, base.Add(-90*time.Minute)) + require.NoError(t, err) + assert.Equal(t, int64(1), deleted) + + list, err := db.Scans().List(ctx) + require.NoError(t, err) + assert.Len(t, list, 2, "two newer scans should remain") +} + +func TestScanRepo_DeleteBefore_NoMatch(t *testing.T) { + db := openTestDB(t) + ctx := t.Context() + + s := newTestScan("10.0.0.0/8") + _, err := db.Scans().Create(ctx, s) + require.NoError(t, err) + + deleted, err := db.Scans().DeleteBefore(ctx, time.Now().UTC().Add(-24*time.Hour)) + require.NoError(t, err) + assert.Equal(t, int64(0), deleted, "nothing older than the cutoff") + + list, err := db.Scans().List(ctx) + require.NoError(t, err) + assert.Len(t, list, 1) +} + func TestScanRepo_Finish(t *testing.T) { db := openTestDB(t) ctx := t.Context() diff --git a/internal/store/store.go b/internal/store/store.go index 023b522..734a299 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -55,4 +55,9 @@ type ScanStore interface { // List returns all scans ordered by started_at descending (newest first). List(ctx context.Context) ([]*models.Scan, error) + + // DeleteBefore removes scan records whose started_at is strictly older + // than cutoff and returns the number deleted. Used by the scan-history + // retention pruner; the disabled (zero-TTL) case is handled by the caller. + DeleteBefore(ctx context.Context, cutoff time.Time) (int64, error) } From d348ea3772f40568df95852cf96cfd0a15824d08 Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Fri, 5 Jun 2026 06:27:54 -0500 Subject: [PATCH 06/26] =?UTF-8?q?docs:=20scan-history=20retention=20?= =?UTF-8?q?=E2=80=94=20README=20+=20ChangeLog=2026.20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document scanner.scan_history_ttl in the config table and add the 26.20 ChangeLog entry. Co-Authored-By: Claude Opus 4.8 --- ChangeLog.md | 39 +++++++++++++++++++++++++++++++++++++++ README.md | 1 + 2 files changed, 40 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index ec59aa9..6556a49 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -17,6 +17,45 @@ _No unreleased changes._ --- +## 26.20 — 2026-06-05 + +Scan-history retention. The `scans` table grew without bound: hosts had a +`host_ttl` pruner, but completed scan records accumulated forever (≈105K +rows/year at the 5-minute default), bloating the DB and the unbounded +`/scans` view. This adds an optional retention policy mirroring host +pruning. Post-backlog reliability work (no `Planning.md` number). + +### Added + +- **`scanner.scan_history_ttl` config** — when set, the end of each scan + cycle deletes scan rows whose `started_at` is older than `now - TTL`. + Zero (the default) keeps full history, so existing deployments are + unchanged. +- **`store.ScanStore.DeleteBefore(ctx, cutoff)`** + its SQLite + implementation (`DELETE FROM scans WHERE started_at < ?`, returning the + row count) — a single bounded DELETE, not a list-then-delete loop. +- **`inventory_scans_pruned_total`** Prometheus counter. + +### Changed + +- **The agent now runs a scan-history prune each cycle**, right after the + host prune (`Agent.pruneScans`). The `Agent` retains the `ScanStore` + passed to `New` for this; no constructor signature change. + +### Tests + +- `internal/sqlite/scan_test.go` — `DeleteBefore` removes only rows older + than the cutoff and reports the count; no-match returns 0. +- `internal/agent/agent_test.go` — old scans pruned when TTL is set; full + history kept when TTL is 0. +- `internal/config/config_test.go` — `scan_history_ttl` parses; default 0. + +### Notes + +- `go test ./...`, `go vet ./...`, and `golangci-lint run ./...` all green. + +--- + ## 26.19 — 2026-06-05 Alert-sink URL validation. `watchdog.peer_addr` was scheme-validated at diff --git a/README.md b/README.md index 2213a2f..71050dd 100644 --- a/README.md +++ b/README.md @@ -346,6 +346,7 @@ Each agent reads a JSON config file and then applies environment variable overri | `scanner.udp_ports` | `[]` | UDP ports to probe per live host. Empty disables UDP probing. | | `scanner.enrich_arp` | `false` | Populate Host.MACAddress + Vendor from `/proc/net/arp` (Linux). | | `scanner.host_ttl` | `0` (disabled) | Hosts not seen within this duration are deleted at the end of each cycle. | +| `scanner.scan_history_ttl` | `0` (disabled) | Scan-history rows older than this duration are deleted at the end of each cycle, bounding the `scans` table and `/scans` view. | | **Scanner — per-subnet profile (each item in `scanner.profiles`)** | | | | `subnet` | required | CIDR for this profile. Must be unique. | | `scan_interval` | inherits global | Per-profile scan cadence. | From 6bf905c82a8878f955c8ff954fa5265452ed7de4 Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Fri, 5 Jun 2026 06:30:52 -0500 Subject: [PATCH 07/26] feat(scanner): fingerprint PostgreSQL, Redis, Memcached (26.21) Ports 5432/6379/11211 were deep-probed and recorded as open, but fingerprint() had no handler for them, so Port.Service stayed empty. - postgresProbe: SSLRequest startup packet, read S/N reply -> "PostgreSQL" (reliable identification without authenticating). - redisInfo: send INFO server, parse redis_version: -> "Redis: "; auth-gated servers (-NOAUTH) -> "Redis (auth required)". - memcachedVersion: text "version" command -> "Memcached: ". - Dispatch the three ports in fingerprint(). - Tests for each via a new startRequestResponse helper (client speaks first). Co-Authored-By: Claude Opus 4.8 --- internal/scanner/banner.go | 100 ++++++++++++++++++++++++++++++++ internal/scanner/banner_test.go | 84 +++++++++++++++++++++++++++ internal/scanner/scanner.go | 6 ++ 3 files changed, 190 insertions(+) diff --git a/internal/scanner/banner.go b/internal/scanner/banner.go index 872a78e..c35316d 100644 --- a/internal/scanner/banner.go +++ b/internal/scanner/banner.go @@ -153,6 +153,106 @@ func mysqlGreeting(ctx context.Context, ip string, port int, timeout time.Durati return "MySQL: " + string(buf[5:end]) } +// redisInfo identifies a Redis/Valkey server and, when permitted, its +// version. Redis speaks RESP: we send `INFO server` and read the reply. +// An unauthenticated server returns a bulk string containing +// "redis_version:X"; a protected one returns "-NOAUTH …". Either reply +// shape identifies Redis without authenticating. +func redisInfo(ctx context.Context, ip string, port int, timeout time.Duration) string { + d := net.Dialer{Timeout: timeout} + conn, err := d.DialContext(ctx, "tcp", net.JoinHostPort(ip, strconv.Itoa(port))) + if err != nil { + return "" + } + defer func() { _ = conn.Close() }() + + _ = conn.SetDeadline(time.Now().Add(timeout)) + if _, err := conn.Write([]byte("INFO server\r\n")); err != nil { + return "" + } + buf := make([]byte, maxBannerBytes) + n, _ := conn.Read(buf) + if n == 0 { + return "" + } + resp := string(buf[:n]) + if i := strings.Index(resp, "redis_version:"); i >= 0 { + ver := resp[i+len("redis_version:"):] + if j := strings.IndexAny(ver, "\r\n"); j >= 0 { + ver = ver[:j] + } + if ver = strings.TrimSpace(ver); ver != "" { + return "Redis: " + ver + } + } + if strings.HasPrefix(resp, "-NOAUTH") { + return "Redis (auth required)" + } + // Any valid RESP reply (+, -, :, $, *) confirms Redis even without a + // parseable version. + switch resp[0] { + case '+', '-', ':', '$', '*': + return "Redis" + } + return "" +} + +// memcachedVersion sends the text-protocol `version` command. Memcached +// replies "VERSION \r\n"; anything else means it isn't memcached. +func memcachedVersion(ctx context.Context, ip string, port int, timeout time.Duration) string { + d := net.Dialer{Timeout: timeout} + conn, err := d.DialContext(ctx, "tcp", net.JoinHostPort(ip, strconv.Itoa(port))) + if err != nil { + return "" + } + defer func() { _ = conn.Close() }() + + _ = conn.SetDeadline(time.Now().Add(timeout)) + if _, err := conn.Write([]byte("version\r\n")); err != nil { + return "" + } + line, err := bufio.NewReader(&capReader{r: conn, n: maxBannerBytes}).ReadString('\n') + if err != nil && line == "" { + return "" + } + line = strings.TrimRight(line, "\r\n") + ver, ok := strings.CutPrefix(line, "VERSION ") + if !ok || ver == "" { + return "" + } + return "Memcached: " + ver +} + +// postgresProbe identifies a PostgreSQL server with the SSLRequest startup +// packet (int32 length=8, int32 request code 80877103). Postgres answers +// with a single byte 'S' (SSL offered) or 'N' (not offered); no other +// common service responds this way, so it's a reliable identifier without +// authenticating. The server version needs a full startup handshake, which +// we deliberately avoid. +func postgresProbe(ctx context.Context, ip string, port int, timeout time.Duration) string { + d := net.Dialer{Timeout: timeout} + conn, err := d.DialContext(ctx, "tcp", net.JoinHostPort(ip, strconv.Itoa(port))) + if err != nil { + return "" + } + defer func() { _ = conn.Close() }() + + // SSLRequest: length=8, code=80877103 (0x04D2162F), big-endian. + req := []byte{0x00, 0x00, 0x00, 0x08, 0x04, 0xd2, 0x16, 0x2f} + _ = conn.SetDeadline(time.Now().Add(timeout)) + if _, err := conn.Write(req); err != nil { + return "" + } + buf := make([]byte, 1) + if _, err := conn.Read(buf); err != nil { + return "" + } + if buf[0] == 'S' || buf[0] == 'N' { + return "PostgreSQL" + } + return "" +} + // capReader wraps an io.Reader with a hard byte cap, defended against a // peer that sends data without an end-of-line for longer than we'd want // to wait. Used by lineBanner. diff --git a/internal/scanner/banner_test.go b/internal/scanner/banner_test.go index 9cc755a..256e924 100644 --- a/internal/scanner/banner_test.go +++ b/internal/scanner/banner_test.go @@ -133,8 +133,92 @@ func TestTLSHTTPSFingerprint(t *testing.T) { } } +func TestRedisInfo_Version(t *testing.T) { + resp := "$80\r\n# Server\r\nredis_version:7.2.4\r\nredis_mode:standalone\r\n" + addr := startRequestResponse(t, []byte(resp)) + host, portStr, _ := net.SplitHostPort(addr) + got := redisInfo(context.Background(), host, atoi(t, portStr), 500*time.Millisecond) + if got != "Redis: 7.2.4" { + t.Errorf("got %q", got) + } +} + +func TestRedisInfo_NoAuth(t *testing.T) { + addr := startRequestResponse(t, []byte("-NOAUTH Authentication required.\r\n")) + host, portStr, _ := net.SplitHostPort(addr) + got := redisInfo(context.Background(), host, atoi(t, portStr), 500*time.Millisecond) + if got != "Redis (auth required)" { + t.Errorf("got %q", got) + } +} + +func TestMemcachedVersion(t *testing.T) { + addr := startRequestResponse(t, []byte("VERSION 1.6.21\r\n")) + host, portStr, _ := net.SplitHostPort(addr) + got := memcachedVersion(context.Background(), host, atoi(t, portStr), 500*time.Millisecond) + if got != "Memcached: 1.6.21" { + t.Errorf("got %q", got) + } +} + +func TestMemcachedVersion_NotMemcached(t *testing.T) { + addr := startRequestResponse(t, []byte("ERROR\r\n")) + host, portStr, _ := net.SplitHostPort(addr) + got := memcachedVersion(context.Background(), host, atoi(t, portStr), 500*time.Millisecond) + if got != "" { + t.Errorf("expected empty for non-memcached reply, got %q", got) + } +} + +func TestPostgresProbe_Identified(t *testing.T) { + for _, reply := range []byte{'S', 'N'} { + addr := startRequestResponse(t, []byte{reply}) + host, portStr, _ := net.SplitHostPort(addr) + got := postgresProbe(context.Background(), host, atoi(t, portStr), 500*time.Millisecond) + if got != "PostgreSQL" { + t.Errorf("reply %q: got %q", reply, got) + } + } +} + +func TestPostgresProbe_NotPostgres(t *testing.T) { + addr := startRequestResponse(t, []byte{'X'}) + host, portStr, _ := net.SplitHostPort(addr) + got := postgresProbe(context.Background(), host, atoi(t, portStr), 500*time.Millisecond) + if got != "" { + t.Errorf("expected empty for non-postgres reply, got %q", got) + } +} + // --- helpers --- +// startRequestResponse accepts one connection, consumes the client's +// request bytes, then writes the canned response and closes. Suited to +// request/response protocols (Redis, Memcached, Postgres) where the client +// speaks first. +func startRequestResponse(t *testing.T, response []byte) (addr string) { + t.Helper() + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { _ = ln.Close() }) + go func() { + for { + c, err := ln.Accept() + if err != nil { + return + } + _ = c.SetReadDeadline(time.Now().Add(time.Second)) + buf := make([]byte, 256) + _, _ = c.Read(buf) + _, _ = c.Write(response) + _ = c.Close() + } + }() + return ln.Addr().String() +} + func startLineGreeting(t *testing.T, line string) (addr string) { t.Helper() ln, err := net.Listen("tcp", "127.0.0.1:0") diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go index a553dc4..13f93a5 100644 --- a/internal/scanner/scanner.go +++ b/internal/scanner/scanner.go @@ -541,6 +541,12 @@ func fingerprint(ctx context.Context, ip string, port int, timeout time.Duration return tlsHTTPSFingerprint(ctx, ip, port, timeout) case 3306: return mysqlGreeting(ctx, ip, port, timeout) + case 5432: + return postgresProbe(ctx, ip, port, timeout) + case 6379: + return redisInfo(ctx, ip, port, timeout) + case 11211: + return memcachedVersion(ctx, ip, port, timeout) default: return "" } From 2943a0ea8825cbe09b32c2f528b943664216a30b Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Fri, 5 Jun 2026 06:30:52 -0500 Subject: [PATCH 08/26] =?UTF-8?q?docs:=20PostgreSQL/Redis/Memcached=20fing?= =?UTF-8?q?erprinting=20=E2=80=94=20README=20+=20ChangeLog=2026.21?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 --- ChangeLog.md | 37 +++++++++++++++++++++++++++++++++++++ README.md | 2 +- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index 6556a49..e4441ae 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -17,6 +17,43 @@ _No unreleased changes._ --- +## 26.21 — 2026-06-05 + +Service fingerprinting for PostgreSQL, Redis, and Memcached. These ports +(5432, 6379, 11211) were already in the deep-probe list and recorded as +open, but `fingerprint()` had no handler for them, so `Port.Service` was +always empty — operators couldn't tell a Redis from a random open port. +Post-backlog feature work (no `Planning.md` number). + +### Added + +- **PostgreSQL identification** (`postgresProbe`) — issues the SSLRequest + startup packet and reads the single-byte `S`/`N` reply. Reliable + identification without authenticating; labelled `PostgreSQL`. +- **Redis/Valkey fingerprinting** (`redisInfo`) — sends `INFO server` and + parses `redis_version:` → `Redis: `. An auth-gated server + (`-NOAUTH`) is still identified as `Redis (auth required)`. +- **Memcached fingerprinting** (`memcachedVersion`) — sends the text + `version` command → `Memcached: `. + +### Changed + +- **`fingerprint()` now dispatches ports 5432/6379/11211** to the new + handlers; all other ports are unchanged. + +### Tests + +- `internal/scanner/banner_test.go` — Redis version + NOAUTH paths, + Memcached version + non-memcached reply, Postgres `S`/`N` identification + + non-postgres reply. Adds a `startRequestResponse` test helper for + client-speaks-first protocols. + +### Notes + +- `go test ./...`, `go vet ./...`, and `golangci-lint run ./...` all green. + +--- + ## 26.20 — 2026-06-05 Scan-history retention. The `scans` table grew without bound: hosts had a diff --git a/README.md b/README.md index 71050dd..113336a 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ The system is designed to run as **two cooperating agent instances** — named * ## Features - **Active discovery** — concurrent TCP-probe scanning across configurable CIDR ranges to find live hosts. Optional deep TCP and UDP probe passes per profile. -- **Asset fingerprinting** — banner-grab on SSH, FTP, SMTP, POP3, IMAP, HTTP, HTTPS (with TLS cert peek), MySQL handshake, Telnet. Stored per-port in `Port.Service`. +- **Asset fingerprinting** — banner-grab on SSH, FTP, SMTP, POP3, IMAP, HTTP, HTTPS (with TLS cert peek), MySQL handshake, PostgreSQL (SSLRequest probe), Redis (`INFO`), Memcached (`version`), Telnet. Stored per-port in `Port.Service`. - **Device-type classifier** — heuristic rules over (vendor, OS banner, open ports) tag hosts as printer / router / hypervisor / windows-host / windows-dc / database (mysql|postgres|…) / mail-server / linux-host / appliance / iot-broker / embedded. - **MAC + vendor enrichment** — `/proc/net/arp` lookup on Linux + embedded OUI prefix table for ~80 common vendors. - **Per-subnet scan profiles** — aggressive hourly deep scans on critical infra, lazy daily liveness on guest networks, all in one config. From fa094b980d3b07e6ae5177f9264bb1c477749e69 Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Fri, 5 Jun 2026 06:33:38 -0500 Subject: [PATCH 09/26] test: cover logging, health client, and admin watchdog/scan handlers (26.22) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fill the largest zero-coverage gaps: - internal/logging 0%->100%: level parsing, json/text selection, agent field. - internal/health/client.go: Ping (200/non-200/bearer/conn-error) and FetchStatus (decode ok/bad json) — the file had no test. - internal/admin: handleWatchdog (peer + no-peer) and handleScanTrigger (501/204/503), plus CSRF rejection on POST /scan (missing token -> 403). No behaviour change. Co-Authored-By: Claude Opus 4.8 --- internal/admin/handlers_extra_test.go | 112 ++++++++++++++++++++++++++ internal/health/client_test.go | 81 +++++++++++++++++++ internal/logging/logging_test.go | 80 ++++++++++++++++++ 3 files changed, 273 insertions(+) create mode 100644 internal/admin/handlers_extra_test.go create mode 100644 internal/health/client_test.go create mode 100644 internal/logging/logging_test.go diff --git a/internal/admin/handlers_extra_test.go b/internal/admin/handlers_extra_test.go new file mode 100644 index 0000000..96a8669 --- /dev/null +++ b/internal/admin/handlers_extra_test.go @@ -0,0 +1,112 @@ +package admin_test + +import ( + "context" + "net/http" + "regexp" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/Ronin48/NetworkInventoryAgent/internal/admin" + "github.com/Ronin48/NetworkInventoryAgent/internal/health" +) + +// newServerWithTrigger starts an admin server wired with the given trigger. +func newServerWithTrigger(t *testing.T, trigger admin.Trigger, status func() health.Status) *admin.Server { + t.Helper() + srv, err := admin.NewServer(":0", "test-agent", + &mockHostStore{}, &mockPortStore{}, &mockScanStore{}, + status, trigger, admin.ServerOptions{}, + ) + require.NoError(t, err) + require.NoError(t, srv.Start()) + t.Cleanup(func() { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + _ = srv.Shutdown(ctx) + }) + return srv +} + +var csrfRe = regexp.MustCompile(`name="csrf" value="([0-9a-f]+)"`) + +// scrapeCSRF GETs the dashboard and extracts the embedded CSRF token. +func scrapeCSRF(t *testing.T, srv *admin.Server) string { + t.Helper() + resp := get(t, srv, "/") + defer func() { _ = resp.Body.Close() }() + m := csrfRe.FindStringSubmatch(readBody(t, resp)) + require.Len(t, m, 2, "dashboard should embed a CSRF token") + return m[1] +} + +func postScan(t *testing.T, srv *admin.Server, csrf string) *http.Response { + t.Helper() + req, err := http.NewRequest(http.MethodPost, "http://"+srv.Addr()+"/scan", nil) + require.NoError(t, err) + if csrf != "" { + req.Header.Set("X-CSRF-Token", csrf) + } + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + return resp +} + +func TestHandleWatchdog_NoPeer(t *testing.T) { + srv := newTestServer(t, &mockHostStore{}, &mockPortStore{}, &mockScanStore{}) + resp := get(t, srv, "/watchdog") + defer func() { _ = resp.Body.Close() }() + assert.Equal(t, http.StatusOK, resp.StatusCode) + assert.Contains(t, resp.Header.Get("Content-Type"), "text/html") +} + +func TestHandleWatchdog_WithPeer(t *testing.T) { + status := func() health.Status { + return health.Status{ + Name: "test-agent", + Healthy: true, + Peer: &health.PeerStatus{ + Addr: "http://neuromancer:8081", + Reachable: true, + LastCheckedAt: time.Now(), + PeerHostCount: 12, + }, + } + } + srv := newServerWithTrigger(t, nil, status) + resp := get(t, srv, "/watchdog") + defer func() { _ = resp.Body.Close() }() + assert.Equal(t, http.StatusOK, resp.StatusCode) + assert.Contains(t, readBody(t, resp), "neuromancer") +} + +func TestHandleScanTrigger_NotWired(t *testing.T) { + srv := newTestServer(t, &mockHostStore{}, &mockPortStore{}, &mockScanStore{}) // nil trigger + resp := postScan(t, srv, scrapeCSRF(t, srv)) + defer func() { _ = resp.Body.Close() }() + assert.Equal(t, http.StatusNotImplemented, resp.StatusCode) +} + +func TestHandleScanTrigger_Success(t *testing.T) { + srv := newServerWithTrigger(t, func() bool { return true }, healthyStatus) + resp := postScan(t, srv, scrapeCSRF(t, srv)) + defer func() { _ = resp.Body.Close() }() + assert.Equal(t, http.StatusNoContent, resp.StatusCode) +} + +func TestHandleScanTrigger_AlreadyPending(t *testing.T) { + srv := newServerWithTrigger(t, func() bool { return false }, healthyStatus) + resp := postScan(t, srv, scrapeCSRF(t, srv)) + defer func() { _ = resp.Body.Close() }() + assert.Equal(t, http.StatusServiceUnavailable, resp.StatusCode) +} + +func TestHandleScanTrigger_MissingCSRF(t *testing.T) { + srv := newServerWithTrigger(t, func() bool { return true }, healthyStatus) + resp := postScan(t, srv, "") // no token + defer func() { _ = resp.Body.Close() }() + assert.Equal(t, http.StatusForbidden, resp.StatusCode, "POST without CSRF token must be rejected") +} diff --git a/internal/health/client_test.go b/internal/health/client_test.go new file mode 100644 index 0000000..8c06b1f --- /dev/null +++ b/internal/health/client_test.go @@ -0,0 +1,81 @@ +package health_test + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/Ronin48/NetworkInventoryAgent/internal/health" +) + +func TestClient_Ping_OK(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/health", r.URL.Path) + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + require.NoError(t, health.NewClient(srv.URL).Ping(context.Background())) +} + +func TestClient_Ping_Non200(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + })) + defer srv.Close() + + err := health.NewClient(srv.URL).Ping(context.Background()) + require.Error(t, err) + assert.Contains(t, err.Error(), "503") +} + +func TestClient_Ping_SendsBearerToken(t *testing.T) { + var gotAuth string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotAuth = r.Header.Get("Authorization") + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + require.NoError(t, health.NewAuthedClient(srv.URL, "tok").Ping(context.Background())) + assert.Equal(t, "Bearer tok", gotAuth) +} + +func TestClient_Ping_ConnError(t *testing.T) { + // Nothing is listening on this address; Do() should fail. + err := health.NewClient("http://127.0.0.1:1").Ping(context.Background()) + require.Error(t, err) +} + +func TestClient_FetchStatus_OK(t *testing.T) { + want := health.Status{Name: "peer", Healthy: true, HostCount: 7, ScanCount: 3} + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/status", r.URL.Path) + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(want) + })) + defer srv.Close() + + got, err := health.NewClient(srv.URL).FetchStatus(context.Background()) + require.NoError(t, err) + assert.Equal(t, "peer", got.Name) + assert.True(t, got.Healthy) + assert.Equal(t, 7, got.HostCount) + assert.Equal(t, 3, got.ScanCount) +} + +func TestClient_FetchStatus_BadJSON(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte("not json")) + })) + defer srv.Close() + + _, err := health.NewClient(srv.URL).FetchStatus(context.Background()) + require.Error(t, err) + assert.Contains(t, err.Error(), "decode status") +} diff --git a/internal/logging/logging_test.go b/internal/logging/logging_test.go new file mode 100644 index 0000000..c10c278 --- /dev/null +++ b/internal/logging/logging_test.go @@ -0,0 +1,80 @@ +package logging_test + +import ( + "context" + "io" + "log/slog" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/Ronin48/NetworkInventoryAgent/internal/config" + "github.com/Ronin48/NetworkInventoryAgent/internal/logging" +) + +func TestSetup_LevelParsing(t *testing.T) { + cases := []struct { + level string + debugEnabled bool + infoEnabled bool + warnEnabled bool + errorOnlyName string + }{ + {"debug", true, true, true, ""}, + {"info", false, true, true, ""}, + {"warn", false, false, true, ""}, + {"error", false, false, false, ""}, + {"bogus", false, true, true, ""}, // unknown falls back to info + } + ctx := context.Background() + for _, c := range cases { + t.Run(c.level, func(t *testing.T) { + logging.Setup(config.LogConfig{Level: c.level, Format: "text"}, "") + h := slog.Default().Handler() + assert.Equal(t, c.debugEnabled, h.Enabled(ctx, slog.LevelDebug), "debug") + assert.Equal(t, c.infoEnabled, h.Enabled(ctx, slog.LevelInfo), "info") + assert.Equal(t, c.warnEnabled, h.Enabled(ctx, slog.LevelWarn), "warn") + }) + } +} + +func TestSetup_JSONFormatAndAgentName(t *testing.T) { + out := captureStdout(t, func() { + logging.Setup(config.LogConfig{Level: "info", Format: "json"}, "wintermute") + slog.Info("hello", "k", "v") + }) + assert.True(t, strings.HasPrefix(strings.TrimSpace(out), "{"), "json output should be an object: %q", out) + assert.Contains(t, out, `"msg":"hello"`) + assert.Contains(t, out, `"agent":"wintermute"`) + assert.Contains(t, out, `"k":"v"`) +} + +func TestSetup_TextFormatNoName(t *testing.T) { + out := captureStdout(t, func() { + logging.Setup(config.LogConfig{Level: "info", Format: "text"}, "") + slog.Info("hello") + }) + assert.Contains(t, out, "hello") + assert.NotContains(t, out, "agent=", "no agent field when name is empty") +} + +// captureStdout redirects os.Stdout for the duration of fn and returns what +// was written. Setup captures os.Stdout at call time, so fn must call Setup +// (not just log) inside the capture window. +func captureStdout(t *testing.T, fn func()) string { + t.Helper() + orig := os.Stdout + r, w, err := os.Pipe() + require.NoError(t, err) + os.Stdout = w + defer func() { os.Stdout = orig }() + + fn() + _ = w.Close() + data, err := io.ReadAll(r) + require.NoError(t, err) + return string(data) +} From 82f24f38dcb7fa606106acc225dd14b8e54cb205 Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Fri, 5 Jun 2026 06:33:38 -0500 Subject: [PATCH 10/26] =?UTF-8?q?docs:=20ChangeLog=2026.22=20=E2=80=94=20t?= =?UTF-8?q?est-coverage=20fill?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 --- ChangeLog.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index e4441ae..00dfa2c 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -17,6 +17,32 @@ _No unreleased changes._ --- +## 26.22 — 2026-06-05 + +Test-coverage fill for previously untested units. No behaviour change. +Post-backlog quality work (no `Planning.md` number). + +### Tests + +- **`internal/logging`** — `0% → 100%`. Level parsing (debug/info/warn/ + error + unknown→info fallback), JSON vs text handler selection, and the + `agent` field injection, via a captured-stdout helper. +- **`internal/health/client.go`** — the watchdog peer client had no test + file. Added `Ping` (200 / non-200 / bearer-token / connection error) + and `FetchStatus` (decode OK / bad JSON) coverage. +- **`internal/admin`** — covered the previously untested handlers: + `handleWatchdog` (with and without a peer) and `handleScanTrigger` + (not-wired → 501, success → 204, already-pending → 503), plus the CSRF + rejection path on `POST /scan` (missing token → 403). + +### Notes + +- `go test ./...`, `go vet ./...`, and `golangci-lint run ./...` all green. + Package coverage after this change: logging 100%, health 79.6%, admin + 74.1%, scanner 71.7%. + +--- + ## 26.21 — 2026-06-05 Service fingerprinting for PostgreSQL, Redis, and Memcached. These ports From 3f94de0a078f346c25d3259a2c88b89cc1844817 Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Fri, 5 Jun 2026 06:37:21 -0500 Subject: [PATCH 11/26] feat(scanner): classify NAS, hypervisors, k8s, containers, cameras (26.23) Several common asset classes fell through to the generic appliance tag. Add five categories built only on signals already available (NIC OUI + open ports), no dead vendor strings: - nas: Synology/Western Digital OUI, or NFS(2049)+SMB(445); ordered before the Windows SMB rule so a NAS isn't mislabelled windows-host. - hypervisor: also QEMU/KVM, VirtualBox, Hyper-V by OUI; Proxmox via 8006. - kubernetes-node: apiserver 6443 / etcd 2379 / kubelet 10250. - container-host: Docker daemon 2375/2376. - camera: RTSP 554. Tests for each plus a regression that SMB alone stays windows-host. Co-Authored-By: Claude Opus 4.8 --- internal/scanner/classify.go | 39 ++++++++++++++++++++ internal/scanner/classify_test.go | 59 +++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) diff --git a/internal/scanner/classify.go b/internal/scanner/classify.go index e550df1..eddfaaf 100644 --- a/internal/scanner/classify.go +++ b/internal/scanner/classify.go @@ -41,6 +41,27 @@ func classify(vendor, osfp string, tcp, udp []int) string { if strings.Contains(vlow, "vmware") && (tcpSet[902] || tcpSet[5988] || tcpSet[5989]) { return "hypervisor" } + // Other virtualization stacks are identifiable by NIC OUI alone + // (QEMU/KVM, VirtualBox, Microsoft Hyper-V) or, for Proxmox VE, its + // 8006 management port. + if strings.Contains(vlow, "qemu") || strings.Contains(vlow, "kvm") || + strings.Contains(vlow, "virtualbox") || strings.Contains(vlow, "hyper-v") { + return "hypervisor" + } + if tcpSet[8006] { + return "hypervisor" + } + + // ── Container / orchestration platforms ─────────────────────── + // Unique control-plane ports. These sit outside the default + // deep-probe list, so they only fire when an operator probes them — + // but the labels are precise when the data is present. + if tcpSet[6443] || tcpSet[2379] || tcpSet[10250] { + return "kubernetes-node" + } + if tcpSet[2375] || tcpSet[2376] { + return "container-host" + } // ── Database servers ────────────────────────────────────────── // Database ports are uniquely strong signals — almost nothing @@ -60,6 +81,17 @@ func classify(vendor, osfp string, tcp, udp []int) string { return "database (memcached)" } + // ── Storage / NAS ───────────────────────────────────────────── + // Vendor OUI pins Synology / Western Digital; otherwise NFS (2049) + // alongside SMB (445) is the NAS signature. Must precede the Windows + // SMB rule below so a NAS isn't mislabelled windows-host. + if strings.Contains(vlow, "synology") || strings.Contains(vlow, "western digital") { + return "nas" + } + if tcpSet[2049] && tcpSet[445] { + return "nas" + } + // ── Active Directory / Windows DC ───────────────────────────── // Kerberos (88) + LDAP (389) is the signature; SMB and DNS // usually ride along but aren't required. Must fire BEFORE the @@ -110,6 +142,13 @@ func classify(vendor, osfp string, tcp, udp []int) string { return "embedded" } + // ── IP cameras / NVR ────────────────────────────────────────── + // RTSP (554) is the unambiguous video signal. Outside the default + // deep-probe list; fires when an operator probes it. + if tcpSet[554] { + return "camera" + } + // ── SSH-banner-driven OS hints ──────────────────────────────── // fingerprint() records the SSH greeting as the OS fingerprint // when port 22 answered. OpenSSH on Linux is the bulk of these. diff --git a/internal/scanner/classify_test.go b/internal/scanner/classify_test.go index 9088b63..1904357 100644 --- a/internal/scanner/classify_test.go +++ b/internal/scanner/classify_test.go @@ -141,6 +141,65 @@ func TestClassify(t *testing.T) { tcp: []int{22}, want: "linux-host", }, + { + name: "nas by synology vendor", + vendor: "Synology", + tcp: []int{80, 443, 5000}, + want: "nas", + }, + { + name: "nas by western digital vendor", + vendor: "Western Digital", + tcp: []int{80}, + want: "nas", + }, + { + name: "nas by nfs plus smb", + tcp: []int{445, 2049}, + want: "nas", + }, + { + name: "smb alone is still windows-host (not nas)", + tcp: []int{445}, + want: "windows-host", + }, + { + name: "hypervisor by qemu/kvm vendor", + vendor: "QEMU/KVM", + tcp: []int{22}, + want: "hypervisor", + }, + { + name: "hypervisor by virtualbox vendor", + vendor: "VirtualBox", + tcp: []int{22}, + want: "hypervisor", + }, + { + name: "hypervisor by proxmox 8006", + tcp: []int{22, 8006}, + want: "hypervisor", + }, + { + name: "kubernetes node by apiserver 6443", + tcp: []int{6443}, + want: "kubernetes-node", + }, + { + name: "kubernetes node by kubelet 10250", + tcp: []int{10250}, + want: "kubernetes-node", + }, + { + name: "container host by docker daemon 2375", + tcp: []int{2375}, + want: "container-host", + }, + { + name: "camera by rtsp 554", + tcp: []int{80, 554}, + want: "camera", + }, { name: "no match returns empty", tcp: []int{4242}, From 8289db06d1599b7569cfc293a77d12f461152800 Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Fri, 5 Jun 2026 06:37:21 -0500 Subject: [PATCH 12/26] =?UTF-8?q?docs:=20classifier=20expansion=20?= =?UTF-8?q?=E2=80=94=20README=20+=20ChangeLog=2026.23?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 --- ChangeLog.md | 36 ++++++++++++++++++++++++++++++++++++ README.md | 2 +- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index 00dfa2c..5ed6924 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -17,6 +17,42 @@ _No unreleased changes._ --- +## 26.23 — 2026-06-05 + +Device-classifier expansion. Several common asset classes previously fell +through to the generic `appliance` tag (or no tag). The classifier gains +five new categories, built only on signals already available (NIC OUI + +open ports) — no dead vendor strings. Post-backlog feature work (no +`Planning.md` number). + +### Added + +- **`nas`** — Synology / Western Digital by NIC OUI, or NFS (2049) + + SMB (445). Fires before the Windows SMB rule so a NAS isn't mislabelled + `windows-host`. +- **`hypervisor`** — now also matches QEMU/KVM, VirtualBox, and Microsoft + Hyper-V by OUI, and Proxmox VE by its 8006 management port (previously + only VMware-by-ports). +- **`kubernetes-node`** — kube-apiserver (6443), etcd (2379), or kubelet + (10250). +- **`container-host`** — Docker daemon (2375/2376). +- **`camera`** — RTSP (554). + +### Tests + +- `internal/scanner/classify_test.go` — cases for each new category plus a + regression that SMB alone is still `windows-host` (not `nas`). + +### Notes + +- The Kubernetes/Docker/Proxmox/RTSP ports sit outside the default + deep-probe list, so those labels fire when an operator adds the ports via + a per-subnet `deep_probe_ports` profile; NAS-by-NFS+SMB and the OUI-based + rules fire under the default configuration. +- `go test ./...`, `go vet ./...`, and `golangci-lint run ./...` all green. + +--- + ## 26.22 — 2026-06-05 Test-coverage fill for previously untested units. No behaviour change. diff --git a/README.md b/README.md index 113336a..86fcae3 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ The system is designed to run as **two cooperating agent instances** — named * - **Active discovery** — concurrent TCP-probe scanning across configurable CIDR ranges to find live hosts. Optional deep TCP and UDP probe passes per profile. - **Asset fingerprinting** — banner-grab on SSH, FTP, SMTP, POP3, IMAP, HTTP, HTTPS (with TLS cert peek), MySQL handshake, PostgreSQL (SSLRequest probe), Redis (`INFO`), Memcached (`version`), Telnet. Stored per-port in `Port.Service`. -- **Device-type classifier** — heuristic rules over (vendor, OS banner, open ports) tag hosts as printer / router / hypervisor / windows-host / windows-dc / database (mysql|postgres|…) / mail-server / linux-host / appliance / iot-broker / embedded. +- **Device-type classifier** — heuristic rules over (vendor, OS banner, open ports) tag hosts as printer / router / hypervisor / windows-host / windows-server / windows-dc / nas / database (mysql|postgres|…) / mail-server / dns-server / kubernetes-node / container-host / camera / linux-host / appliance / iot-broker / embedded. - **MAC + vendor enrichment** — `/proc/net/arp` lookup on Linux + embedded OUI prefix table for ~80 common vendors. - **Per-subnet scan profiles** — aggressive hourly deep scans on critical infra, lazy daily liveness on guest networks, all in one config. - **Change detection + alerts** — diffs host inventory each cycle; fires `host.discovered` / `host.vanished` events to HTTP webhook and/or RFC 5424 syslog. From 575914509332c51092d4be268e8fcb2a312c55bc Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Fri, 5 Jun 2026 06:41:46 -0500 Subject: [PATCH 13/26] feat(scanner): UDP service fingerprinting for DNS and NTP (26.24) udpScan recorded open UDP ports but left Port.Service empty. Confirmed-open UDP ports are now fingerprinted: - DNS (53): standard root A query; reply with QR set + matching txn ID identifies it -> "DNS" (REFUSED still proves DNS). - NTP (123): NTPv3 client request; server-mode reply -> "NTP", with stratum appended when valid ("NTP (stratum 2)"). - New udp_banner.go (udpFingerprint/udpExchange); helpers take the port so they're testable and work for DNS/NTP on non-standard probed ports. Tests use a UDP responder for both protocols plus negative cases. Co-Authored-By: Claude Opus 4.8 --- internal/scanner/scanner.go | 11 ++-- internal/scanner/udp_banner.go | 96 +++++++++++++++++++++++++++++ internal/scanner/udp_banner_test.go | 96 +++++++++++++++++++++++++++++ 3 files changed, 199 insertions(+), 4 deletions(-) create mode 100644 internal/scanner/udp_banner.go create mode 100644 internal/scanner/udp_banner_test.go diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go index 13f93a5..c37c459 100644 --- a/internal/scanner/scanner.go +++ b/internal/scanner/scanner.go @@ -449,10 +449,13 @@ func (s *Scanner) udpScan(ctx context.Context, hostID int64, ip string, ts time. if !ok { return } - // UDP banner-grabs are protocol-specific (DNS query - // for 53, SNMP get for 161, …). Out of scope here; - // Service stays empty for UDP for now. - s.upsertPort(ctx, hostID, ip, port, models.UDP, state, "", ts) + // Protocol-specific fingerprint for well-known UDP ports + // (DNS/NTP today); other ports record an empty Service. + service := "" + if state == models.StateOpen { + service = udpFingerprint(ctx, ip, port, timeout) + } + s.upsertPort(ctx, hostID, ip, port, models.UDP, state, service, ts) if state == models.StateOpen { metrics.UDPProbeSuccessTotal.Inc() mu.Lock() diff --git a/internal/scanner/udp_banner.go b/internal/scanner/udp_banner.go new file mode 100644 index 0000000..eee2c5a --- /dev/null +++ b/internal/scanner/udp_banner.go @@ -0,0 +1,96 @@ +package scanner + +import ( + "context" + "net" + "strconv" + "time" +) + +// udpFingerprint sends a protocol-specific probe for well-known UDP ports +// and returns a Service label, or "" when the port isn't one we fingerprint +// or the peer didn't answer recognisably. Called only after the port is +// already known open (see udpScan), so the extra packet is cheap. +func udpFingerprint(ctx context.Context, ip string, port int, timeout time.Duration) string { + switch port { + case 53: + return dnsFingerprint(ctx, ip, port, timeout) + case 123: + return ntpFingerprint(ctx, ip, port, timeout) + default: + return "" + } +} + +// dnsFingerprint sends a standard DNS query (A record for the root, with a +// fixed transaction ID) and confirms the reply is a DNS response: a payload +// of at least the 12-byte header, the QR bit set, and the transaction ID +// echoed back. That identifies the service without depending on the answer +// contents (REFUSED still proves it's DNS). +func dnsFingerprint(ctx context.Context, ip string, port int, timeout time.Duration) string { + const id0, id1 = 0x13, 0x37 + query := []byte{ + id0, id1, // transaction ID + 0x01, 0x00, // flags: RD=1 + 0x00, 0x01, // QDCOUNT=1 + 0x00, 0x00, // ANCOUNT + 0x00, 0x00, // NSCOUNT + 0x00, 0x00, // ARCOUNT + 0x00, // QNAME = root (empty label) + 0x00, 0x01, // QTYPE=A + 0x00, 0x01, // QCLASS=IN + } + resp, ok := udpExchange(ctx, ip, port, timeout, query, 512) + if !ok || len(resp) < 12 { + return "" + } + if resp[0] != id0 || resp[1] != id1 { + return "" // ID mismatch — not a reply to our query + } + if resp[2]&0x80 == 0 { + return "" // QR bit not set — not a response + } + return "DNS" +} + +// ntpFingerprint sends an NTP v3 client request (mode 3) and inspects the +// reply: a 48-byte packet whose mode field is 4 (server) marks NTP. The +// stratum byte, when valid (1–15), is appended for a useful detail. +func ntpFingerprint(ctx context.Context, ip string, port int, timeout time.Duration) string { + req := make([]byte, 48) + req[0] = 0x1b // LI=0, VN=3, Mode=3 (client) + resp, ok := udpExchange(ctx, ip, port, timeout, req, 48) + if !ok || len(resp) < 48 { + return "" + } + if mode := resp[0] & 0x07; mode != 4 { + return "" // not a server reply + } + if stratum := resp[1]; stratum >= 1 && stratum <= 15 { + return "NTP (stratum " + strconv.Itoa(int(stratum)) + ")" + } + return "NTP" +} + +// udpExchange dials the UDP port, writes payload, and reads up to readcap +// bytes of the reply. Returns (reply, true) on a non-empty read. +func udpExchange(ctx context.Context, ip string, port int, timeout time.Duration, payload []byte, readCap int) ([]byte, bool) { + d := net.Dialer{Timeout: timeout} + conn, err := d.DialContext(ctx, "udp", net.JoinHostPort(ip, strconv.Itoa(port))) + if err != nil { + return nil, false + } + defer func() { _ = conn.Close() }() + + _ = conn.SetWriteDeadline(time.Now().Add(timeout)) + if _, err := conn.Write(payload); err != nil { + return nil, false + } + _ = conn.SetReadDeadline(time.Now().Add(timeout)) + buf := make([]byte, readCap) + n, err := conn.Read(buf) + if err != nil || n == 0 { + return nil, false + } + return buf[:n], true +} diff --git a/internal/scanner/udp_banner_test.go b/internal/scanner/udp_banner_test.go new file mode 100644 index 0000000..8a83699 --- /dev/null +++ b/internal/scanner/udp_banner_test.go @@ -0,0 +1,96 @@ +package scanner + +import ( + "context" + "net" + "testing" + "time" +) + +// startUDPResponder starts a UDP listener that replies to each datagram with +// handler(req). A nil handler return sends nothing (simulating silence). +func startUDPResponder(t *testing.T, handler func(req []byte) []byte) (host string, port int) { + t.Helper() + pc, err := net.ListenPacket("udp", "127.0.0.1:0") + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { _ = pc.Close() }) + go func() { + buf := make([]byte, 1024) + for { + n, addr, err := pc.ReadFrom(buf) + if err != nil { + return + } + if resp := handler(append([]byte(nil), buf[:n]...)); resp != nil { + _, _ = pc.WriteTo(resp, addr) + } + } + }() + h, p, _ := net.SplitHostPort(pc.LocalAddr().String()) + return h, atoi(t, p) +} + +func TestDNSFingerprint_Identified(t *testing.T) { + // Echo the query back with the QR (response) bit set. + host, port := startUDPResponder(t, func(req []byte) []byte { + if len(req) < 12 { + return nil + } + req[2] |= 0x80 + return req + }) + got := dnsFingerprint(context.Background(), host, port, 500*time.Millisecond) + if got != "DNS" { + t.Errorf("got %q, want DNS", got) + } +} + +func TestDNSFingerprint_NotDNS(t *testing.T) { + // Reply that's too short / lacks the QR bit → not identified. + host, port := startUDPResponder(t, func([]byte) []byte { return []byte{0x00, 0x00} }) + got := dnsFingerprint(context.Background(), host, port, 500*time.Millisecond) + if got != "" { + t.Errorf("got %q, want empty", got) + } +} + +func TestNTPFingerprint_WithStratum(t *testing.T) { + host, port := startUDPResponder(t, func([]byte) []byte { + resp := make([]byte, 48) + resp[0] = 0x1c // LI=0, VN=3, Mode=4 (server) + resp[1] = 2 // stratum 2 + return resp + }) + got := ntpFingerprint(context.Background(), host, port, 500*time.Millisecond) + if got != "NTP (stratum 2)" { + t.Errorf("got %q, want NTP (stratum 2)", got) + } +} + +func TestNTPFingerprint_NotServerMode(t *testing.T) { + // Mode 3 (client) reply is not a server → not identified. + host, port := startUDPResponder(t, func([]byte) []byte { + resp := make([]byte, 48) + resp[0] = 0x1b + return resp + }) + got := ntpFingerprint(context.Background(), host, port, 500*time.Millisecond) + if got != "" { + t.Errorf("got %q, want empty", got) + } +} + +func TestUDPFingerprint_UnknownPortIsEmpty(t *testing.T) { + if got := udpFingerprint(context.Background(), "127.0.0.1", 9999, 50*time.Millisecond); got != "" { + t.Errorf("got %q, want empty for unfingerprinted port", got) + } +} + +func TestUDPFingerprint_NoResponderTimesOut(t *testing.T) { + // Nothing listening: the read should time out and yield "". + if got := dnsFingerprint(context.Background(), "127.0.0.1", 53, 100*time.Millisecond); got != "" { + t.Errorf("got %q, want empty when no DNS responder", got) + } +} From 9f486e39c6e88e9adff702d4176c4ea3488750dd Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Fri, 5 Jun 2026 06:41:46 -0500 Subject: [PATCH 14/26] =?UTF-8?q?docs:=20UDP=20DNS/NTP=20fingerprinting=20?= =?UTF-8?q?=E2=80=94=20README=20+=20ChangeLog=2026.24?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 --- ChangeLog.md | 36 ++++++++++++++++++++++++++++++++++++ README.md | 2 +- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index 5ed6924..b78eb22 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -17,6 +17,42 @@ _No unreleased changes._ --- +## 26.24 — 2026-06-05 + +UDP service fingerprinting. The scanner recorded open UDP ports but left +`Port.Service` empty (an in-code "out of scope" note); a DNS resolver and a +plain open UDP port were indistinguishable in the inventory. UDP ports +confirmed open are now fingerprinted for the two highest-value protocols. +Post-backlog feature work (no `Planning.md` number). + +### Added + +- **DNS fingerprint** — sends a standard A query for the root and confirms + the reply is a DNS response (QR bit set, transaction ID echoed) → + `DNS`. Identifies the service regardless of the answer (REFUSED still + proves DNS). +- **NTP fingerprint** — sends an NTPv3 client request and checks the reply + is server mode → `NTP`, with the stratum appended when valid + (`NTP (stratum 2)`). +- **`udpFingerprint` / `udpExchange`** helpers in a new `udp_banner.go`. + +### Changed + +- **`udpScan` now fingerprints open UDP ports** (ports 53/123 today; others + still record an empty Service). One extra datagram per matched open port. + +### Tests + +- `internal/scanner/udp_banner_test.go` — DNS identified / not-DNS, NTP with + stratum / non-server mode, unfingerprinted port, and the no-responder + timeout, via a UDP test responder. + +### Notes + +- `go test ./...`, `go vet ./...`, and `golangci-lint run ./...` all green. + +--- + ## 26.23 — 2026-06-05 Device-classifier expansion. Several common asset classes previously fell diff --git a/README.md b/README.md index 86fcae3..5fe111e 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ The system is designed to run as **two cooperating agent instances** — named * ## Features - **Active discovery** — concurrent TCP-probe scanning across configurable CIDR ranges to find live hosts. Optional deep TCP and UDP probe passes per profile. -- **Asset fingerprinting** — banner-grab on SSH, FTP, SMTP, POP3, IMAP, HTTP, HTTPS (with TLS cert peek), MySQL handshake, PostgreSQL (SSLRequest probe), Redis (`INFO`), Memcached (`version`), Telnet. Stored per-port in `Port.Service`. +- **Asset fingerprinting** — banner-grab on SSH, FTP, SMTP, POP3, IMAP, HTTP, HTTPS (with TLS cert peek), MySQL handshake, PostgreSQL (SSLRequest probe), Redis (`INFO`), Memcached (`version`), Telnet, plus UDP DNS and NTP (stratum) probes. Stored per-port in `Port.Service`. - **Device-type classifier** — heuristic rules over (vendor, OS banner, open ports) tag hosts as printer / router / hypervisor / windows-host / windows-server / windows-dc / nas / database (mysql|postgres|…) / mail-server / dns-server / kubernetes-node / container-host / camera / linux-host / appliance / iot-broker / embedded. - **MAC + vendor enrichment** — `/proc/net/arp` lookup on Linux + embedded OUI prefix table for ~80 common vendors. - **Per-subnet scan profiles** — aggressive hourly deep scans on critical infra, lazy daily liveness on guest networks, all in one config. From 026dca5f46f90c7a37a6fadc9504553e7c2b06a0 Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Fri, 5 Jun 2026 06:59:15 -0500 Subject: [PATCH 15/26] feat(admin): paginate the /hosts and /scans list pages (26.25) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both pages rendered every row in one response — tens of thousands of hosts meant a multi-megabyte HTML page. They now paginate with the same ?limit=/?offset= convention as the JSON API (default 100, cap 1000), via the shared parsePagination. - pager type + newPager/pageSlice helpers; a shared "pager" template partial renders Prev/Next + "Showing X-Y of N", hidden when it all fits. - Host-inventory subtitle reports the full total, not the page size. - Tests: page windowing, prev/next targets, total, invalid limit -> 400. Bounds the rendered page; store.List still loads the full slice (pushing LIMIT into the store interface is a larger, later change). Co-Authored-By: Claude Opus 4.8 --- internal/admin/handlers.go | 16 +++++++- internal/admin/handlers_extra_test.go | 52 +++++++++++++++++++++++++ internal/admin/render.go | 55 +++++++++++++++++++++++++++ internal/admin/templates/base.html | 16 ++++++++ internal/admin/templates/hosts.html | 3 +- internal/admin/templates/scans.html | 1 + 6 files changed, 140 insertions(+), 3 deletions(-) diff --git a/internal/admin/handlers.go b/internal/admin/handlers.go index cdf9cdf..061362b 100644 --- a/internal/admin/handlers.go +++ b/internal/admin/handlers.go @@ -52,9 +52,15 @@ func (s *Server) handleHosts(w http.ResponseWriter, r *http.Request) { http.Error(w, "failed to load hosts", http.StatusInternalServerError) return } + limit, offset, err := parsePagination(r.URL.Query()) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } s.render(w, "hosts", hostsData{ pageData: s.basePage("Hosts"), - Hosts: hosts, + Hosts: pageSlice(hosts, offset, limit), + Pager: newPager("/hosts", len(hosts), limit, offset), }) } @@ -88,9 +94,15 @@ func (s *Server) handleScans(w http.ResponseWriter, r *http.Request) { http.Error(w, "failed to load scans", http.StatusInternalServerError) return } + limit, offset, err := parsePagination(r.URL.Query()) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } s.render(w, "scans", scansData{ pageData: s.basePage("Scans"), - Scans: scans, + Scans: pageSlice(scans, offset, limit), + Pager: newPager("/scans", len(scans), limit, offset), }) } diff --git a/internal/admin/handlers_extra_test.go b/internal/admin/handlers_extra_test.go index 96a8669..36afe1a 100644 --- a/internal/admin/handlers_extra_test.go +++ b/internal/admin/handlers_extra_test.go @@ -2,6 +2,7 @@ package admin_test import ( "context" + "fmt" "net/http" "regexp" "testing" @@ -12,6 +13,7 @@ import ( "github.com/Ronin48/NetworkInventoryAgent/internal/admin" "github.com/Ronin48/NetworkInventoryAgent/internal/health" + "github.com/Ronin48/NetworkInventoryAgent/models" ) // newServerWithTrigger starts an admin server wired with the given trigger. @@ -110,3 +112,53 @@ func TestHandleScanTrigger_MissingCSRF(t *testing.T) { defer func() { _ = resp.Body.Close() }() assert.Equal(t, http.StatusForbidden, resp.StatusCode, "POST without CSRF token must be rejected") } + +func TestHandleHosts_Pagination(t *testing.T) { + var hosts []*models.Host + for i := 1; i <= 5; i++ { + hosts = append(hosts, &models.Host{ + ID: int64(i), IPAddress: fmt.Sprintf("10.0.0.%d", i), LastSeen: time.Now(), + }) + } + srv := newTestServer(t, &mockHostStore{hosts: hosts}, &mockPortStore{}, &mockScanStore{}) + + // First page: two rows, a Next link, the true total in the subtitle. + resp := get(t, srv, "/hosts?limit=2&offset=0") + defer func() { _ = resp.Body.Close() }() + body := readBody(t, resp) + assert.Equal(t, http.StatusOK, resp.StatusCode) + assert.Contains(t, body, "of 5", "subtitle/pager should report the full total") + assert.Contains(t, body, "10.0.0.1") + assert.Contains(t, body, "10.0.0.2") + assert.NotContains(t, body, "10.0.0.3", "row beyond the page must not render") + assert.Contains(t, body, "/hosts?limit=2&offset=2", "Next link to the second page") + + // Last page: final row, no Next target. + resp2 := get(t, srv, "/hosts?limit=2&offset=4") + defer func() { _ = resp2.Body.Close() }() + body2 := readBody(t, resp2) + assert.Contains(t, body2, "10.0.0.5") + assert.Contains(t, body2, "/hosts?limit=2&offset=2", "Prev link back to page two") + assert.NotContains(t, body2, "offset=6", "no Next link past the end") +} + +func TestHandleHosts_InvalidLimit(t *testing.T) { + srv := newTestServer(t, &mockHostStore{}, &mockPortStore{}, &mockScanStore{}) + resp := get(t, srv, "/hosts?limit=0") + defer func() { _ = resp.Body.Close() }() + assert.Equal(t, http.StatusBadRequest, resp.StatusCode) +} + +func TestHandleScans_Pagination(t *testing.T) { + var scans []*models.Scan + for i := 1; i <= 3; i++ { + scans = append(scans, &models.Scan{ID: int64(i), Subnet: fmt.Sprintf("10.0.%d.0/24", i), StartedAt: time.Now()}) + } + srv := newTestServer(t, &mockHostStore{}, &mockPortStore{}, &mockScanStore{scans: scans}) + resp := get(t, srv, "/scans?limit=2&offset=0") + defer func() { _ = resp.Body.Close() }() + body := readBody(t, resp) + assert.Equal(t, http.StatusOK, resp.StatusCode) + assert.Contains(t, body, "of 3") + assert.Contains(t, body, "/scans?limit=2&offset=2", "Next link present") +} diff --git a/internal/admin/render.go b/internal/admin/render.go index 8fe153b..2ca62b9 100644 --- a/internal/admin/render.go +++ b/internal/admin/render.go @@ -59,6 +59,7 @@ type dashboardData struct { type hostsData struct { pageData Hosts []*models.Host + Pager pager } type hostDetailData struct { @@ -70,6 +71,60 @@ type hostDetailData struct { type scansData struct { pageData Scans []*models.Scan + Pager pager +} + +// pager carries pagination state for list templates. PagePath is the route +// the prev/next links target (e.g. "/hosts"). From/To are 1-based indices of +// the rows shown (both 0 when the page is empty). +type pager struct { + PagePath string + Total int + Limit int + Offset int + From int + To int + HasPrev bool + HasNext bool + PrevOffset int + NextOffset int +} + +// newPager computes display + link state for a window [offset, offset+limit) +// over a list of `total` items. +func newPager(path string, total, limit, offset int) pager { + p := pager{PagePath: path, Total: total, Limit: limit, Offset: offset} + if offset < total { + p.From = offset + 1 + end := offset + limit + if end > total { + end = total + } + p.To = end + } + if offset > 0 { + p.HasPrev = true + if p.PrevOffset = offset - limit; p.PrevOffset < 0 { + p.PrevOffset = 0 + } + } + if offset+limit < total { + p.HasNext = true + p.NextOffset = offset + limit + } + return p +} + +// pageSlice returns the [offset, offset+limit) window of s, clamped to bounds. +func pageSlice[T any](s []T, offset, limit int) []T { + if offset >= len(s) { + return nil + } + end := offset + limit + if end > len(s) { + end = len(s) + } + return s[offset:end] } type watchdogData struct { diff --git a/internal/admin/templates/base.html b/internal/admin/templates/base.html index c7b885d..eee0b51 100644 --- a/internal/admin/templates/base.html +++ b/internal/admin/templates/base.html @@ -61,10 +61,26 @@ .btn:hover{background:#2ea043} .btn-link{display:inline-block;background:#21262d;color:#c9d1d9;border:1px solid #30363d;border-radius:6px;padding:6px 12px;font-size:13px;font-weight:600;text-decoration:none} .btn-link:hover{background:#30363d;text-decoration:none} +.btn-link.disabled{opacity:.4;pointer-events:none} +.pager{display:flex;align-items:center;justify-content:space-between;margin-top:16px} +.pager-info{color:#8b949e;font-size:12px} +.pager-links{display:flex;gap:8px} {{end}} +{{define "pager"}} +{{if gt .Total .Limit}} +
+ Showing {{.From}}–{{.To}} of {{.Total}} + + {{if .HasPrev}}‹ Prev{{else}}‹ Prev{{end}} + {{if .HasNext}}Next ›{{else}}Next ›{{end}} + +
+{{end}} +{{end}} + {{define "nav"}}