/** * Monitoring Data Collector * HTTP health checks for services + EasyPanel API metrics + staleness detection * Runs every 5 minutes via scheduler in server.ts * @author Descomplicar® | @link descomplicar.pt | @copyright 2026 */ import db from '../db.js' interface ServiceCheck { name: string url: string okStatuses?: number[] // Additional HTTP codes to treat as 'up' (e.g. 403 for gateway) } interface CheckResult { status: 'up' | 'down' | 'warning' http_code: number response_time: number error?: string } /** * Services to monitor via HTTP health check. * Each entry maps to a record in tbl_eal_monitoring (category='service'). */ const SERVICES: ServiceCheck[] = [ { name: 'Desk CRM', url: 'https://desk.descomplicar.pt' }, { name: 'NextCloud', url: 'https://cloud.descomplicar.pt' }, { name: 'Gitea', url: 'https://git.descomplicar.pt' }, { name: 'Wiki.js', url: 'https://wiki.descomplicar.pt' }, { name: 'Syncthing', url: 'https://sync.descomplicar.pt' }, { name: 'Authentik', url: 'https://auth.descomplicar.pt' }, { name: 'Metabase', url: 'https://bi.descomplicar.pt' }, { name: 'N8N', url: 'https://automator.descomplicar.pt' }, { name: 'Outline', url: 'https://hub.descomplicar.pt' }, { name: 'WhatSMS', url: 'https://app.whatsms.pt' }, { name: 'MCP Gateway', url: 'http://gateway.descomplicar.pt', okStatuses: [403] }, ] /** * Check a single URL and return health status. * Uses redirect: 'manual' so 302 (auth redirects) count as 'up'. */ async function checkUrl(url: string, timeoutMs = 10000): Promise { const start = Date.now() try { const controller = new AbortController() const timeout = setTimeout(() => controller.abort(), timeoutMs) const response = await fetch(url, { method: 'GET', redirect: 'manual', signal: controller.signal, }) clearTimeout(timeout) const response_time = Date.now() - start const http_code = response.status // 2xx or 3xx = service is responding if (http_code >= 200 && http_code < 400) { return { status: 'up', http_code, response_time } } // 4xx = service responds but with client error if (http_code >= 400 && http_code < 500) { return { status: 'warning', http_code, response_time } } // 5xx = server error return { status: 'down', http_code, response_time } } catch (error: unknown) { const response_time = Date.now() - start const message = error instanceof Error ? error.message : 'Unknown error' if (message.includes('abort')) { return { status: 'down', http_code: 0, response_time, error: 'Timeout' } } return { status: 'down', http_code: 0, response_time, error: message } } } /** * Update or insert a monitoring record. * Tries UPDATE first; if no row matches, does INSERT. */ async function upsertMonitoring(category: string, name: string, status: string, details: object): Promise { const detailsJson = JSON.stringify(details) const [result] = await db.query( `UPDATE tbl_eal_monitoring SET status = ?, details = ?, last_check = NOW() WHERE category = ? AND name = ?`, [status, detailsJson, category, name] ) if ((result as any).affectedRows === 0) { await db.query( `INSERT INTO tbl_eal_monitoring (category, name, status, details, last_check) VALUES (?, ?, ?, ?, NOW())`, [category, name, status, detailsJson] ) } } /** * Check all services via HTTP and update DB. * Runs all checks in parallel for speed. */ export async function checkAllServices(): Promise<{ checked: number; up: number; down: number; warning: number }> { let up = 0, down = 0, warning = 0 const results = await Promise.allSettled( SERVICES.map(async (service) => { const result = await checkUrl(service.url) // Override status if HTTP code is in the service's okStatuses list if (result.status === 'warning' && service.okStatuses?.includes(result.http_code)) { result.status = 'up' } await upsertMonitoring('service', service.name, result.status, { url: service.url, http_code: result.http_code, response_time: `${result.response_time}ms`, ...(result.error ? { error: result.error } : {}) }) return { name: service.name, ...result } }) ) for (const r of results) { if (r.status === 'fulfilled') { if (r.value.status === 'up') up++ else if (r.value.status === 'warning') warning++ else down++ } else { down++ } } return { checked: SERVICES.length, up, down, warning } } /** * Mark WP sites as warning if they haven't reported in >24h. * The WP plugin (descomplicar-monitor) POSTs data periodically. * If no data arrives, something is wrong. */ export async function checkStaleness(): Promise { // NOTA: o plugin WP grava category='site' (nao 'wordpress'). Bug historico // corrigido em 2026-06-15 — a staleness nunca corria contra a categoria certa. const [result] = await db.query( `UPDATE tbl_eal_monitoring SET status = 'warning', details = JSON_SET(COALESCE(details, '{}'), '$.stale', true, '$.stale_reason', 'Sem dados do plugin há >26h (wp-cron pode não estar a disparar)') WHERE category = 'site' AND status IN ('ok', 'up') AND last_check < DATE_SUB(NOW(), INTERVAL 26 HOUR)` ) return (result as any).affectedRows || 0 } /** * Verificação ACTIVA de sites de clientes (independente do plugin WP / wp-cron). * Mesmos sites do watchdog-sites.py. Valida HTTP + conteúdo (tamanho, assinaturas * de erro, marcador positivo) para apanhar "HTTP 200 mas página partida" (ex.: Redis). * Grava em tbl_eal_monitoring com category='site_uptime', name=hostname. */ const PING_SITES: { host: string; url: string; marker: string }[] = [ { host: 'descomplicar.pt', url: 'https://descomplicar.pt', marker: 'Descomplicar' }, { host: 'emanuelalmeida.pt', url: 'https://emanuelalmeida.pt', marker: 'Emanuel Almeida' }, { host: 'solarfvengenharia.com', url: 'https://solarfvengenharia.com', marker: 'Solar FV' }, { host: 'ignitionvortex.pt', url: 'https://ignitionvortex.pt', marker: 'Ignition Vortex' }, { host: 'watercontrol.pt', url: 'https://watercontrol.pt', marker: 'Water Control' }, { host: 'familyclinic.pt', url: 'https://familyclinic.pt', marker: 'Family Clinic' }, { host: 'karateclubedegaia.com', url: 'https://karateclubedegaia.com', marker: 'Karate Clube de Gaia' }, { host: 'whatsms.pt', url: 'https://whatsms.pt', marker: 'Todos os canais' }, { host: 'carstuff.pt', url: 'https://carstuff.pt', marker: 'CarStuff' }, { host: 'care.descomplicar.pt', url: 'https://care.descomplicar.pt', marker: 'Care Descomplicar' }, { host: 'ccv.descomplicar.pt', url: 'https://ccv.descomplicar.pt', marker: 'Carvoeiro Caves' }, { host: 'e-commerce.descomplicar.pt', url: 'https://e-commerce.descomplicar.pt', marker: 'e-Commerce Descomplicar' }, { host: 'ecommerce.descomplicar.pt', url: 'https://ecommerce.descomplicar.pt', marker: 'Descomplicar - Template' }, { host: 'ecommerce-demo.descomplicar.pt', url: 'https://ecommerce-demo.descomplicar.pt', marker: 'Descomplicar - Demo' }, { host: 'starter.descomplicar.pt', url: 'https://starter.descomplicar.pt', marker: 'Starter Descomplicar' }, ] const SITE_SIZE_FLOOR = 5000 const SITE_ERROR_SIGNATURES = [ 'error establishing a database connection', 'error establishing a redis connection', 'there has been a critical error', 'cannot connect to redis', 'redis connection failed', 'error establishing a connection to redis', 'service unavailable', ] async function checkSiteContent( url: string, marker: string, timeoutMs = 15000, ): Promise<{ status: 'up' | 'down'; http_code: number; reason: string }> { try { const controller = new AbortController() const timer = setTimeout(() => controller.abort(), timeoutMs) const response = await fetch(url, { method: 'GET', redirect: 'follow', signal: controller.signal, headers: { 'User-Agent': 'dashboard-site-uptime/1.0' }, }) const body = await response.text() clearTimeout(timer) const code = response.status if (code >= 500) return { status: 'down', http_code: code, reason: `HTTP ${code}` } if (body.length < SITE_SIZE_FLOOR) return { status: 'down', http_code: code, reason: `página curta (${body.length}B)` } const low = body.toLowerCase() for (const sig of SITE_ERROR_SIGNATURES) { if (low.includes(sig)) return { status: 'down', http_code: code, reason: `assinatura de erro: '${sig}'` } } if (marker && !low.includes(marker.toLowerCase())) return { status: 'down', http_code: code, reason: `marcador ausente: '${marker}'` } return { status: 'up', http_code: code, reason: 'ok' } } catch (error: unknown) { const msg = error instanceof Error ? error.message : 'Unknown error' return { status: 'down', http_code: 0, reason: msg.includes('abort') ? 'timeout' : msg } } } export async function checkAllSites(): Promise<{ checked: number; up: number; down: number }> { let up = 0, down = 0 const results = await Promise.allSettled( PING_SITES.map(async (site) => { const r = await checkSiteContent(site.url, site.marker) await upsertMonitoring('site_uptime', site.host, r.status === 'up' ? 'ok' : 'failed', { url: site.url, http_code: r.http_code, reason: r.reason, checked_by: 'dashboard-active', }) return r }), ) for (const r of results) { if (r.status === 'fulfilled' && r.value.status === 'up') up++ else down++ } return { checked: PING_SITES.length, up, down } } /** * Collect EasyPanel server metrics + container stats via SSH. * A API tRPC do EasyPanel não expõe endpoint monitor.* nesta versão. * SSH com password ao Easy server (5.9.90.70) funciona a partir do container. */ export async function collectEasyPanelMetrics(): Promise { const { collectSSHMetrics } = await import('./server-metrics.js') const result = await collectSSHMetrics() return result.success > 0 } /** * Collect Docker Swarm service status via SSH to EasyPanel server. * Usa `docker service ls` para obter replicas actual vs desired. */ export async function collectEasyPanelContainers(): Promise { const easyHost = process.env.EASY_HOST || '5.9.90.70' const easyUser = process.env.EASY_USER || 'root' const easyPass = process.env.EASY_PASS || '' if (!easyPass) return false try { const { Client } = await import('ssh2') const output = await new Promise((resolve, reject) => { const conn = new Client() let data = '' const timer = setTimeout(() => { conn.end(); reject(new Error('timeout')) }, 20000) conn.on('ready', () => { conn.exec("docker service ls --format '{{.Name}} {{.Replicas}}'", (err, stream) => { if (err) { clearTimeout(timer); conn.end(); reject(err); return } stream.on('data', (chunk: Buffer) => { data += chunk.toString() }) stream.on('close', () => { clearTimeout(timer); conn.end(); resolve(data) }) stream.stderr.on('data', () => {}) }) }) conn.on('error', (err) => { clearTimeout(timer); reject(err) }) conn.connect({ host: easyHost, port: 22, username: easyUser, password: easyPass, readyTimeout: 15000 }) }) let total = 0, up = 0, down = 0 const unhealthy: string[] = [] for (const line of output.trim().split('\n')) { if (!line.trim()) continue const parts = line.trim().split(/\s+/) const name = parts[0] || '' const replicas = parts[1] || '0/0' const [actual, desired] = replicas.split('/').map(Number) total++ if (actual >= desired && desired > 0) { up++ } else { down++ unhealthy.push(name.replace('descomplicar_', '')) } } const status = down > 0 ? 'warning' : 'ok' await upsertMonitoring('container', 'EasyPanel Containers', status, { total, up, down, restarting: 0, ...(unhealthy.length > 0 ? { unhealthy } : {}), }) console.log(`[EASYPANEL] Containers: ${up}/${total} running${down > 0 ? `, ${down} down: ${unhealthy.join(', ')}` : ''}`) return true } catch (err: unknown) { console.error('[EASYPANEL] Container collection failed:', err instanceof Error ? err.message : err) return false } } /** * Main collector entry point. * Called by scheduler in server.ts every 5 minutes. */ export async function collectMonitoringData(): Promise { console.log('[COLLECTOR] Starting monitoring collection...') try { const services = await checkAllServices() console.log(`[COLLECTOR] Services: ${services.up} up, ${services.warning} warning, ${services.down} down`) } catch (err: unknown) { console.error('[COLLECTOR] Service checks failed:', err instanceof Error ? err.message : err) } // Verificação activa de sites de clientes (HTTP+conteúdo, independente do wp-cron) try { const sites = await checkAllSites() console.log(`[COLLECTOR] Sites (active): ${sites.up} up, ${sites.down} down`) } catch (err: unknown) { console.error('[COLLECTOR] Active site checks failed:', err instanceof Error ? err.message : err) } // EasyPanel API metrics (replaces SSH for Easy server) try { const gotStats = await collectEasyPanelMetrics() const gotContainers = await collectEasyPanelContainers() if (!gotStats && !gotContainers) { console.warn('[COLLECTOR] EasyPanel metrics unavailable (check EASY_HOST/EASY_USER/EASY_PASS)') } } catch (err: unknown) { console.error('[COLLECTOR] EasyPanel collection failed:', err instanceof Error ? err.message : err) } try { const stale = await checkStaleness() if (stale > 0) { console.log(`[COLLECTOR] Marked ${stale} stale WP site(s) as warning`) } } catch (err: unknown) { console.error('[COLLECTOR] Staleness check failed:', err instanceof Error ? err.message : err) } console.log('[COLLECTOR] Done') }