From 153a1577a5b8fa256a1ac9763954ac3a368108a6 Mon Sep 17 00:00:00 2001 From: Emanuel Almeida Date: Mon, 23 Feb 2026 16:11:52 +0000 Subject: [PATCH] feat: add monitoring-collector.ts - HTTP health checks for 11 services --- api/services/monitoring-collector.ts | 181 +++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 api/services/monitoring-collector.ts diff --git a/api/services/monitoring-collector.ts b/api/services/monitoring-collector.ts new file mode 100644 index 0000000..a9714b9 --- /dev/null +++ b/api/services/monitoring-collector.ts @@ -0,0 +1,181 @@ +/** + * Monitoring Data Collector + * HTTP health checks for services + staleness detection for WP sites + * Runs every 5 minutes via scheduler in server.ts + * @author Descomplicar® | @link descomplicar.pt | @copyright 2026 + */ +import db from '../db.js' + +interface ServiceCheck { + name: string + url: string + okStatuses?: number[] // Additional HTTP codes to treat as 'up' (e.g. 403 for gateway) +} + +interface CheckResult { + status: 'up' | 'down' | 'warning' + http_code: number + response_time: number + error?: string +} + +/** + * Services to monitor via HTTP health check. + * Each entry maps to a record in tbl_eal_monitoring (category='service'). + */ +const SERVICES: ServiceCheck[] = [ + { name: 'Desk CRM', url: 'https://desk.descomplicar.pt' }, + { name: 'NextCloud', url: 'https://cloud.descomplicar.pt' }, + { name: 'Gitea', url: 'https://git.descomplicar.pt' }, + { name: 'Wiki.js', url: 'https://wiki.descomplicar.pt' }, + { name: 'Syncthing', url: 'https://sync.descomplicar.pt' }, + { name: 'Authentik', url: 'https://auth.descomplicar.pt' }, + { name: 'Metabase', url: 'https://bi.descomplicar.pt' }, + { name: 'N8N', url: 'https://automator.descomplicar.pt' }, + { name: 'Outline', url: 'https://hub.descomplicar.pt' }, + { name: 'WhatSMS', url: 'https://whatsms.pt' }, + { name: 'MCP Gateway', url: 'http://gateway.descomplicar.pt', okStatuses: [403] }, +] + +/** + * Check a single URL and return health status. + * Uses redirect: 'manual' so 302 (auth redirects) count as 'up'. + */ +async function checkUrl(url: string, timeoutMs = 10000): Promise { + const start = Date.now() + try { + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), timeoutMs) + + const response = await fetch(url, { + method: 'GET', + redirect: 'manual', + signal: controller.signal, + }) + + clearTimeout(timeout) + const response_time = Date.now() - start + const http_code = response.status + + // 2xx or 3xx = service is responding + if (http_code >= 200 && http_code < 400) { + return { status: 'up', http_code, response_time } + } + // 4xx = service responds but with client error + if (http_code >= 400 && http_code < 500) { + return { status: 'warning', http_code, response_time } + } + // 5xx = server error + return { status: 'down', http_code, response_time } + } catch (error: unknown) { + const response_time = Date.now() - start + const message = error instanceof Error ? error.message : 'Unknown error' + if (message.includes('abort')) { + return { status: 'down', http_code: 0, response_time, error: 'Timeout' } + } + return { status: 'down', http_code: 0, response_time, error: message } + } +} + +/** + * Update or insert a monitoring record. + * Tries UPDATE first; if no row matches, does INSERT. + */ +async function upsertMonitoring(category: string, name: string, status: string, details: object): Promise { + const detailsJson = JSON.stringify(details) + + const [result] = await db.query( + `UPDATE tbl_eal_monitoring SET status = ?, details = ?, last_check = NOW() WHERE category = ? AND name = ?`, + [status, detailsJson, category, name] + ) + + if ((result as any).affectedRows === 0) { + await db.query( + `INSERT INTO tbl_eal_monitoring (category, name, status, details, last_check) VALUES (?, ?, ?, ?, NOW())`, + [category, name, status, detailsJson] + ) + } +} + +/** + * Check all services via HTTP and update DB. + * Runs all checks in parallel for speed. + */ +export async function checkAllServices(): Promise<{ checked: number; up: number; down: number; warning: number }> { + let up = 0, down = 0, warning = 0 + + const results = await Promise.allSettled( + SERVICES.map(async (service) => { + const result = await checkUrl(service.url) + + // Override status if HTTP code is in the service's okStatuses list + if (result.status === 'warning' && service.okStatuses?.includes(result.http_code)) { + result.status = 'up' + } + + await upsertMonitoring('service', service.name, result.status, { + url: service.url, + http_code: result.http_code, + response_time: `${result.response_time}ms`, + ...(result.error ? { error: result.error } : {}) + }) + + return { name: service.name, ...result } + }) + ) + + for (const r of results) { + if (r.status === 'fulfilled') { + if (r.value.status === 'up') up++ + else if (r.value.status === 'warning') warning++ + else down++ + } else { + down++ + } + } + + return { checked: SERVICES.length, up, down, warning } +} + +/** + * Mark WP sites as warning if they haven't reported in >24h. + * The WP plugin (descomplicar-monitor) POSTs data periodically. + * If no data arrives, something is wrong. + */ +export async function checkStaleness(): Promise { + const [result] = await db.query( + `UPDATE tbl_eal_monitoring + SET status = 'warning', + details = JSON_SET(COALESCE(details, '{}'), '$.stale', true, '$.stale_reason', 'No data received in 24h') + WHERE category = 'wordpress' + AND status IN ('ok', 'up') + AND last_check < DATE_SUB(NOW(), INTERVAL 24 HOUR)` + ) + return (result as any).affectedRows || 0 +} + +/** + * Main collector entry point. + * Called by scheduler in server.ts every 5 minutes. + */ +export async function collectMonitoringData(): Promise { + console.log('[COLLECTOR] Starting monitoring collection...') + + try { + const services = await checkAllServices() + console.log(`[COLLECTOR] Services: ${services.up} up, ${services.warning} warning, ${services.down} down`) + } catch (err: unknown) { + console.error('[COLLECTOR] Service checks failed:', err instanceof Error ? err.message : err) + } + + try { + const stale = await checkStaleness() + if (stale > 0) { + console.log(`[COLLECTOR] Marked ${stale} stale WP site(s) as warning`) + } + } catch (err: unknown) { + console.error('[COLLECTOR] Staleness check failed:', err instanceof Error ? err.message : err) + } + + console.log('[COLLECTOR] Done') +}