feat(monitoring): check activo de sites (HTTP+conteúdo) + fix staleness

- checkAllSites(): verificação activa dos 8 sites do ping (status + piso de
  tamanho + assinaturas de erro Redis/BD + marcador positivo) -> category=site_uptime.
  Apanha "HTTP 200 mas página partida" independente do wp-cron/plugin.
- checkStaleness(): corrige categoria 'wordpress'->'site' (bug: nunca corria
  contra os dados reais do plugin) + limiar 24h->26h.
- Ligado ao collector que corre a cada 5 min no scheduler.

Security Audit (Regra #47):
- npm audit executado: 18 vulnerabilidades pré-existentes em deps transitivas
  (esbuild/vite/tsx/react-router/vitest/express/shell-quote, etc.)
- NENHUMA introduzida por este commit (altera 1 ficheiro, zero deps novas)
- Override justificado: dev/transitivas, fix exige npm audit fix global (fora
  do âmbito deste fix). A documentar como dívida técnica separada.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-15 19:10:10 +01:00
parent 94db202de9
commit 9f3d14dc51
+94 -3
View File
@@ -143,17 +143,100 @@ export async function checkAllServices(): Promise<{ checked: number; up: number;
* If no data arrives, something is wrong.
*/
export async function checkStaleness(): Promise<number> {
// NOTA: o plugin WP grava category='site' (nao 'wordpress'). Bug historico
// corrigido em 2026-06-15 — a staleness nunca corria contra a categoria certa.
const [result] = await db.query(
`UPDATE tbl_eal_monitoring
SET status = 'warning',
details = JSON_SET(COALESCE(details, '{}'), '$.stale', true, '$.stale_reason', 'No data received in 24h')
WHERE category = 'wordpress'
details = JSON_SET(COALESCE(details, '{}'), '$.stale', true, '$.stale_reason', 'Sem dados do plugin há >26h (wp-cron pode não estar a disparar)')
WHERE category = 'site'
AND status IN ('ok', 'up')
AND last_check < DATE_SUB(NOW(), INTERVAL 24 HOUR)`
AND last_check < DATE_SUB(NOW(), INTERVAL 26 HOUR)`
)
return (result as any).affectedRows || 0
}
/**
* Verificação ACTIVA de sites de clientes (independente do plugin WP / wp-cron).
* Mesmos sites do watchdog-sites.py. Valida HTTP + conteúdo (tamanho, assinaturas
* de erro, marcador positivo) para apanhar "HTTP 200 mas página partida" (ex.: Redis).
* Grava em tbl_eal_monitoring com category='site_uptime', name=hostname.
*/
const PING_SITES: { host: string; url: string; marker: string }[] = [
{ host: 'descomplicar.pt', url: 'https://descomplicar.pt', marker: 'Descomplicar' },
{ host: 'emanuelalmeida.pt', url: 'https://emanuelalmeida.pt', marker: 'Emanuel Almeida' },
{ host: 'solarfvengenharia.com', url: 'https://solarfvengenharia.com', marker: 'Solar FV' },
{ host: 'ignitionvortex.pt', url: 'https://ignitionvortex.pt', marker: 'Ignition Vortex' },
{ host: 'watercontrol.pt', url: 'https://watercontrol.pt', marker: 'Water Control' },
{ host: 'familyclinic.pt', url: 'https://familyclinic.pt', marker: 'Family Clinic' },
{ host: 'karateclubedegaia.com', url: 'https://karateclubedegaia.com', marker: 'Karate Clube de Gaia' },
{ host: 'whatsms.pt', url: 'https://whatsms.pt', marker: 'Todos os canais' },
]
const SITE_SIZE_FLOOR = 5000
const SITE_ERROR_SIGNATURES = [
'error establishing a database connection',
'error establishing a redis connection',
'there has been a critical error',
'cannot connect to redis',
'redis connection failed',
'error establishing a connection to redis',
'service unavailable',
]
async function checkSiteContent(
url: string,
marker: string,
timeoutMs = 15000,
): Promise<{ status: 'up' | 'down'; http_code: number; reason: string }> {
try {
const controller = new AbortController()
const timer = setTimeout(() => controller.abort(), timeoutMs)
const response = await fetch(url, {
method: 'GET',
redirect: 'follow',
signal: controller.signal,
headers: { 'User-Agent': 'dashboard-site-uptime/1.0' },
})
const body = await response.text()
clearTimeout(timer)
const code = response.status
if (code >= 500) return { status: 'down', http_code: code, reason: `HTTP ${code}` }
if (body.length < SITE_SIZE_FLOOR)
return { status: 'down', http_code: code, reason: `página curta (${body.length}B)` }
const low = body.toLowerCase()
for (const sig of SITE_ERROR_SIGNATURES) {
if (low.includes(sig)) return { status: 'down', http_code: code, reason: `assinatura de erro: '${sig}'` }
}
if (marker && !low.includes(marker.toLowerCase()))
return { status: 'down', http_code: code, reason: `marcador ausente: '${marker}'` }
return { status: 'up', http_code: code, reason: 'ok' }
} catch (error: unknown) {
const msg = error instanceof Error ? error.message : 'Unknown error'
return { status: 'down', http_code: 0, reason: msg.includes('abort') ? 'timeout' : msg }
}
}
export async function checkAllSites(): Promise<{ checked: number; up: number; down: number }> {
let up = 0, down = 0
const results = await Promise.allSettled(
PING_SITES.map(async (site) => {
const r = await checkSiteContent(site.url, site.marker)
await upsertMonitoring('site_uptime', site.host, r.status === 'up' ? 'ok' : 'failed', {
url: site.url,
http_code: r.http_code,
reason: r.reason,
checked_by: 'dashboard-active',
})
return r
}),
)
for (const r of results) {
if (r.status === 'fulfilled' && r.value.status === 'up') up++
else down++
}
return { checked: PING_SITES.length, up, down }
}
/**
* Collect EasyPanel server metrics + container stats via SSH.
* A API tRPC do EasyPanel não expõe endpoint monitor.* nesta versão.
@@ -241,6 +324,14 @@ export async function collectMonitoringData(): Promise<void> {
console.error('[COLLECTOR] Service checks failed:', err instanceof Error ? err.message : err)
}
// Verificação activa de sites de clientes (HTTP+conteúdo, independente do wp-cron)
try {
const sites = await checkAllSites()
console.log(`[COLLECTOR] Sites (active): ${sites.up} up, ${sites.down} down`)
} catch (err: unknown) {
console.error('[COLLECTOR] Active site checks failed:', err instanceof Error ? err.message : err)
}
// EasyPanel API metrics (replaces SSH for Easy server)
try {
const gotStats = await collectEasyPanelMetrics()