08d2a31dc9
Alinha PING_SITES do check activo com o watchdog-sites.py: +carstuff +6 internos (care, ccv, e-commerce, ecommerce, ecommerce-demo, starter). Security Audit (Regra #47): sem deps novas; vulns pré-existentes documentadas no commit anterior. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
364 lines
14 KiB
TypeScript
364 lines
14 KiB
TypeScript
/**
|
|
* Monitoring Data Collector
|
|
* HTTP health checks for services + EasyPanel API metrics + staleness detection
|
|
* Runs every 5 minutes via scheduler in server.ts
|
|
* @author Descomplicar® | @link descomplicar.pt | @copyright 2026
|
|
*/
|
|
import db from '../db.js'
|
|
|
|
interface ServiceCheck {
|
|
name: string
|
|
url: string
|
|
okStatuses?: number[] // Additional HTTP codes to treat as 'up' (e.g. 403 for gateway)
|
|
}
|
|
|
|
interface CheckResult {
|
|
status: 'up' | 'down' | 'warning'
|
|
http_code: number
|
|
response_time: number
|
|
error?: string
|
|
}
|
|
|
|
/**
|
|
* Services to monitor via HTTP health check.
|
|
* Each entry maps to a record in tbl_eal_monitoring (category='service').
|
|
*/
|
|
const SERVICES: ServiceCheck[] = [
|
|
{ name: 'Desk CRM', url: 'https://desk.descomplicar.pt' },
|
|
{ name: 'NextCloud', url: 'https://cloud.descomplicar.pt' },
|
|
{ name: 'Gitea', url: 'https://git.descomplicar.pt' },
|
|
{ name: 'Wiki.js', url: 'https://wiki.descomplicar.pt' },
|
|
{ name: 'Syncthing', url: 'https://sync.descomplicar.pt' },
|
|
{ name: 'Authentik', url: 'https://auth.descomplicar.pt' },
|
|
{ name: 'Metabase', url: 'https://bi.descomplicar.pt' },
|
|
{ name: 'N8N', url: 'https://automator.descomplicar.pt' },
|
|
{ name: 'Outline', url: 'https://hub.descomplicar.pt' },
|
|
{ name: 'WhatSMS', url: 'https://app.whatsms.pt' },
|
|
{ name: 'MCP Gateway', url: 'http://gateway.descomplicar.pt', okStatuses: [403] },
|
|
]
|
|
|
|
/**
|
|
* Check a single URL and return health status.
|
|
* Uses redirect: 'manual' so 302 (auth redirects) count as 'up'.
|
|
*/
|
|
async function checkUrl(url: string, timeoutMs = 10000): Promise<CheckResult> {
|
|
const start = Date.now()
|
|
try {
|
|
const controller = new AbortController()
|
|
const timeout = setTimeout(() => controller.abort(), timeoutMs)
|
|
|
|
const response = await fetch(url, {
|
|
method: 'GET',
|
|
redirect: 'manual',
|
|
signal: controller.signal,
|
|
})
|
|
|
|
clearTimeout(timeout)
|
|
const response_time = Date.now() - start
|
|
const http_code = response.status
|
|
|
|
// 2xx or 3xx = service is responding
|
|
if (http_code >= 200 && http_code < 400) {
|
|
return { status: 'up', http_code, response_time }
|
|
}
|
|
// 4xx = service responds but with client error
|
|
if (http_code >= 400 && http_code < 500) {
|
|
return { status: 'warning', http_code, response_time }
|
|
}
|
|
// 5xx = server error
|
|
return { status: 'down', http_code, response_time }
|
|
} catch (error: unknown) {
|
|
const response_time = Date.now() - start
|
|
const message = error instanceof Error ? error.message : 'Unknown error'
|
|
if (message.includes('abort')) {
|
|
return { status: 'down', http_code: 0, response_time, error: 'Timeout' }
|
|
}
|
|
return { status: 'down', http_code: 0, response_time, error: message }
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Update or insert a monitoring record.
|
|
* Tries UPDATE first; if no row matches, does INSERT.
|
|
*/
|
|
async function upsertMonitoring(category: string, name: string, status: string, details: object): Promise<void> {
|
|
const detailsJson = JSON.stringify(details)
|
|
|
|
const [result] = await db.query(
|
|
`UPDATE tbl_eal_monitoring SET status = ?, details = ?, last_check = NOW() WHERE category = ? AND name = ?`,
|
|
[status, detailsJson, category, name]
|
|
)
|
|
|
|
if ((result as any).affectedRows === 0) {
|
|
await db.query(
|
|
`INSERT INTO tbl_eal_monitoring (category, name, status, details, last_check) VALUES (?, ?, ?, ?, NOW())`,
|
|
[category, name, status, detailsJson]
|
|
)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check all services via HTTP and update DB.
|
|
* Runs all checks in parallel for speed.
|
|
*/
|
|
export async function checkAllServices(): Promise<{ checked: number; up: number; down: number; warning: number }> {
|
|
let up = 0, down = 0, warning = 0
|
|
|
|
const results = await Promise.allSettled(
|
|
SERVICES.map(async (service) => {
|
|
const result = await checkUrl(service.url)
|
|
|
|
// Override status if HTTP code is in the service's okStatuses list
|
|
if (result.status === 'warning' && service.okStatuses?.includes(result.http_code)) {
|
|
result.status = 'up'
|
|
}
|
|
|
|
await upsertMonitoring('service', service.name, result.status, {
|
|
url: service.url,
|
|
http_code: result.http_code,
|
|
response_time: `${result.response_time}ms`,
|
|
...(result.error ? { error: result.error } : {})
|
|
})
|
|
|
|
return { name: service.name, ...result }
|
|
})
|
|
)
|
|
|
|
for (const r of results) {
|
|
if (r.status === 'fulfilled') {
|
|
if (r.value.status === 'up') up++
|
|
else if (r.value.status === 'warning') warning++
|
|
else down++
|
|
} else {
|
|
down++
|
|
}
|
|
}
|
|
|
|
return { checked: SERVICES.length, up, down, warning }
|
|
}
|
|
|
|
/**
|
|
* Mark WP sites as warning if they haven't reported in >24h.
|
|
* The WP plugin (descomplicar-monitor) POSTs data periodically.
|
|
* If no data arrives, something is wrong.
|
|
*/
|
|
export async function checkStaleness(): Promise<number> {
|
|
// NOTA: o plugin WP grava category='site' (nao 'wordpress'). Bug historico
|
|
// corrigido em 2026-06-15 — a staleness nunca corria contra a categoria certa.
|
|
const [result] = await db.query(
|
|
`UPDATE tbl_eal_monitoring
|
|
SET status = 'warning',
|
|
details = JSON_SET(COALESCE(details, '{}'), '$.stale', true, '$.stale_reason', 'Sem dados do plugin há >26h (wp-cron pode não estar a disparar)')
|
|
WHERE category = 'site'
|
|
AND status IN ('ok', 'up')
|
|
AND last_check < DATE_SUB(NOW(), INTERVAL 26 HOUR)`
|
|
)
|
|
return (result as any).affectedRows || 0
|
|
}
|
|
|
|
/**
|
|
* Verificação ACTIVA de sites de clientes (independente do plugin WP / wp-cron).
|
|
* Mesmos sites do watchdog-sites.py. Valida HTTP + conteúdo (tamanho, assinaturas
|
|
* de erro, marcador positivo) para apanhar "HTTP 200 mas página partida" (ex.: Redis).
|
|
* Grava em tbl_eal_monitoring com category='site_uptime', name=hostname.
|
|
*/
|
|
const PING_SITES: { host: string; url: string; marker: string }[] = [
|
|
{ host: 'descomplicar.pt', url: 'https://descomplicar.pt', marker: 'Descomplicar' },
|
|
{ host: 'emanuelalmeida.pt', url: 'https://emanuelalmeida.pt', marker: 'Emanuel Almeida' },
|
|
{ host: 'solarfvengenharia.com', url: 'https://solarfvengenharia.com', marker: 'Solar FV' },
|
|
{ host: 'ignitionvortex.pt', url: 'https://ignitionvortex.pt', marker: 'Ignition Vortex' },
|
|
{ host: 'watercontrol.pt', url: 'https://watercontrol.pt', marker: 'Water Control' },
|
|
{ host: 'familyclinic.pt', url: 'https://familyclinic.pt', marker: 'Family Clinic' },
|
|
{ host: 'karateclubedegaia.com', url: 'https://karateclubedegaia.com', marker: 'Karate Clube de Gaia' },
|
|
{ host: 'whatsms.pt', url: 'https://whatsms.pt', marker: 'Todos os canais' },
|
|
{ host: 'carstuff.pt', url: 'https://carstuff.pt', marker: 'CarStuff' },
|
|
{ host: 'care.descomplicar.pt', url: 'https://care.descomplicar.pt', marker: 'Care Descomplicar' },
|
|
{ host: 'ccv.descomplicar.pt', url: 'https://ccv.descomplicar.pt', marker: 'Carvoeiro Caves' },
|
|
{ host: 'e-commerce.descomplicar.pt', url: 'https://e-commerce.descomplicar.pt', marker: 'e-Commerce Descomplicar' },
|
|
{ host: 'ecommerce.descomplicar.pt', url: 'https://ecommerce.descomplicar.pt', marker: 'Descomplicar - Template' },
|
|
{ host: 'ecommerce-demo.descomplicar.pt', url: 'https://ecommerce-demo.descomplicar.pt', marker: 'Descomplicar - Demo' },
|
|
{ host: 'starter.descomplicar.pt', url: 'https://starter.descomplicar.pt', marker: 'Starter Descomplicar' },
|
|
]
|
|
const SITE_SIZE_FLOOR = 5000
|
|
const SITE_ERROR_SIGNATURES = [
|
|
'error establishing a database connection',
|
|
'error establishing a redis connection',
|
|
'there has been a critical error',
|
|
'cannot connect to redis',
|
|
'redis connection failed',
|
|
'error establishing a connection to redis',
|
|
'service unavailable',
|
|
]
|
|
|
|
async function checkSiteContent(
|
|
url: string,
|
|
marker: string,
|
|
timeoutMs = 15000,
|
|
): Promise<{ status: 'up' | 'down'; http_code: number; reason: string }> {
|
|
try {
|
|
const controller = new AbortController()
|
|
const timer = setTimeout(() => controller.abort(), timeoutMs)
|
|
const response = await fetch(url, {
|
|
method: 'GET',
|
|
redirect: 'follow',
|
|
signal: controller.signal,
|
|
headers: { 'User-Agent': 'dashboard-site-uptime/1.0' },
|
|
})
|
|
const body = await response.text()
|
|
clearTimeout(timer)
|
|
const code = response.status
|
|
if (code >= 500) return { status: 'down', http_code: code, reason: `HTTP ${code}` }
|
|
if (body.length < SITE_SIZE_FLOOR)
|
|
return { status: 'down', http_code: code, reason: `página curta (${body.length}B)` }
|
|
const low = body.toLowerCase()
|
|
for (const sig of SITE_ERROR_SIGNATURES) {
|
|
if (low.includes(sig)) return { status: 'down', http_code: code, reason: `assinatura de erro: '${sig}'` }
|
|
}
|
|
if (marker && !low.includes(marker.toLowerCase()))
|
|
return { status: 'down', http_code: code, reason: `marcador ausente: '${marker}'` }
|
|
return { status: 'up', http_code: code, reason: 'ok' }
|
|
} catch (error: unknown) {
|
|
const msg = error instanceof Error ? error.message : 'Unknown error'
|
|
return { status: 'down', http_code: 0, reason: msg.includes('abort') ? 'timeout' : msg }
|
|
}
|
|
}
|
|
|
|
export async function checkAllSites(): Promise<{ checked: number; up: number; down: number }> {
|
|
let up = 0, down = 0
|
|
const results = await Promise.allSettled(
|
|
PING_SITES.map(async (site) => {
|
|
const r = await checkSiteContent(site.url, site.marker)
|
|
await upsertMonitoring('site_uptime', site.host, r.status === 'up' ? 'ok' : 'failed', {
|
|
url: site.url,
|
|
http_code: r.http_code,
|
|
reason: r.reason,
|
|
checked_by: 'dashboard-active',
|
|
})
|
|
return r
|
|
}),
|
|
)
|
|
for (const r of results) {
|
|
if (r.status === 'fulfilled' && r.value.status === 'up') up++
|
|
else down++
|
|
}
|
|
return { checked: PING_SITES.length, up, down }
|
|
}
|
|
|
|
/**
|
|
* Collect EasyPanel server metrics + container stats via SSH.
|
|
* A API tRPC do EasyPanel não expõe endpoint monitor.* nesta versão.
|
|
* SSH com password ao Easy server (5.9.90.70) funciona a partir do container.
|
|
*/
|
|
export async function collectEasyPanelMetrics(): Promise<boolean> {
|
|
const { collectSSHMetrics } = await import('./server-metrics.js')
|
|
const result = await collectSSHMetrics()
|
|
return result.success > 0
|
|
}
|
|
|
|
/**
|
|
* Collect Docker Swarm service status via SSH to EasyPanel server.
|
|
* Usa `docker service ls` para obter replicas actual vs desired.
|
|
*/
|
|
export async function collectEasyPanelContainers(): Promise<boolean> {
|
|
const easyHost = process.env.EASY_HOST || '5.9.90.70'
|
|
const easyUser = process.env.EASY_USER || 'root'
|
|
const easyPass = process.env.EASY_PASS || ''
|
|
|
|
if (!easyPass) return false
|
|
|
|
try {
|
|
const { Client } = await import('ssh2')
|
|
const output = await new Promise<string>((resolve, reject) => {
|
|
const conn = new Client()
|
|
let data = ''
|
|
const timer = setTimeout(() => { conn.end(); reject(new Error('timeout')) }, 20000)
|
|
|
|
conn.on('ready', () => {
|
|
conn.exec("docker service ls --format '{{.Name}} {{.Replicas}}'", (err, stream) => {
|
|
if (err) { clearTimeout(timer); conn.end(); reject(err); return }
|
|
stream.on('data', (chunk: Buffer) => { data += chunk.toString() })
|
|
stream.on('close', () => { clearTimeout(timer); conn.end(); resolve(data) })
|
|
stream.stderr.on('data', () => {})
|
|
})
|
|
})
|
|
conn.on('error', (err) => { clearTimeout(timer); reject(err) })
|
|
conn.connect({ host: easyHost, port: 22, username: easyUser, password: easyPass, readyTimeout: 15000 })
|
|
})
|
|
|
|
let total = 0, up = 0, down = 0
|
|
const unhealthy: string[] = []
|
|
|
|
for (const line of output.trim().split('\n')) {
|
|
if (!line.trim()) continue
|
|
const parts = line.trim().split(/\s+/)
|
|
const name = parts[0] || ''
|
|
const replicas = parts[1] || '0/0'
|
|
const [actual, desired] = replicas.split('/').map(Number)
|
|
total++
|
|
if (actual >= desired && desired > 0) {
|
|
up++
|
|
} else {
|
|
down++
|
|
unhealthy.push(name.replace('descomplicar_', ''))
|
|
}
|
|
}
|
|
|
|
const status = down > 0 ? 'warning' : 'ok'
|
|
await upsertMonitoring('container', 'EasyPanel Containers', status, {
|
|
total, up, down, restarting: 0,
|
|
...(unhealthy.length > 0 ? { unhealthy } : {}),
|
|
})
|
|
|
|
console.log(`[EASYPANEL] Containers: ${up}/${total} running${down > 0 ? `, ${down} down: ${unhealthy.join(', ')}` : ''}`)
|
|
return true
|
|
} catch (err: unknown) {
|
|
console.error('[EASYPANEL] Container collection failed:', err instanceof Error ? err.message : err)
|
|
return false
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Main collector entry point.
|
|
* Called by scheduler in server.ts every 5 minutes.
|
|
*/
|
|
export async function collectMonitoringData(): Promise<void> {
|
|
console.log('[COLLECTOR] Starting monitoring collection...')
|
|
|
|
try {
|
|
const services = await checkAllServices()
|
|
console.log(`[COLLECTOR] Services: ${services.up} up, ${services.warning} warning, ${services.down} down`)
|
|
} catch (err: unknown) {
|
|
console.error('[COLLECTOR] Service checks failed:', err instanceof Error ? err.message : err)
|
|
}
|
|
|
|
// Verificação activa de sites de clientes (HTTP+conteúdo, independente do wp-cron)
|
|
try {
|
|
const sites = await checkAllSites()
|
|
console.log(`[COLLECTOR] Sites (active): ${sites.up} up, ${sites.down} down`)
|
|
} catch (err: unknown) {
|
|
console.error('[COLLECTOR] Active site checks failed:', err instanceof Error ? err.message : err)
|
|
}
|
|
|
|
// EasyPanel API metrics (replaces SSH for Easy server)
|
|
try {
|
|
const gotStats = await collectEasyPanelMetrics()
|
|
const gotContainers = await collectEasyPanelContainers()
|
|
if (!gotStats && !gotContainers) {
|
|
console.warn('[COLLECTOR] EasyPanel metrics unavailable (check EASY_HOST/EASY_USER/EASY_PASS)')
|
|
}
|
|
} catch (err: unknown) {
|
|
console.error('[COLLECTOR] EasyPanel collection failed:', err instanceof Error ? err.message : err)
|
|
}
|
|
|
|
try {
|
|
const stale = await checkStaleness()
|
|
if (stale > 0) {
|
|
console.log(`[COLLECTOR] Marked ${stale} stale WP site(s) as warning`)
|
|
}
|
|
} catch (err: unknown) {
|
|
console.error('[COLLECTOR] Staleness check failed:', err instanceof Error ? err.message : err)
|
|
}
|
|
|
|
console.log('[COLLECTOR] Done')
|
|
}
|