/** * Detector automático de padrões sobre a BD `sessions` (Observabilidade Fase 6A). * * Seis detectores heurísticos em SQL puro (via better-sqlite3). Cada detector * devolve zero ou mais `Pattern` para a semana analisada. Pipeline: * 1. Correr detectores sobre intervalo [weekStart, weekEnd] * 2. Persistir via `upsertPattern` (idempotente por (week_iso, pattern_key)) * 3. Calcular `consecutive_weeks` olhando para semanas anteriores */ import type Database from 'better-sqlite3' import type { SessionsDb, PatternRecord } from './db.js' export type Severity = 'info' | 'warning' | 'action' export interface Pattern { pattern_key: string title: string description: string severity: Severity metric_value: number | null sample_session_ids: string[] affected_count: number } export interface DetectCtx { db: Database.Database weekStartIso: string weekEndIso: string } /** Converte Date para string ISO UTC. */ function iso(d: Date): string { return d.toISOString() } /** * Calcula intervalo [segunda 00:00:00 UTC, domingo 23:59:59.999 UTC] da semana * que contém `ref` (Regra 17 — semana começa à segunda). */ export function weekRange(ref: Date): { start: Date; end: Date; iso: string } { const d = new Date(Date.UTC(ref.getUTCFullYear(), ref.getUTCMonth(), ref.getUTCDate())) const dow = d.getUTCDay() // 0=Dom, 1=Seg const diffToMonday = dow === 0 ? -6 : 1 - dow const start = new Date(d) start.setUTCDate(d.getUTCDate() + diffToMonday) const end = new Date(start) end.setUTCDate(start.getUTCDate() + 6) end.setUTCHours(23, 59, 59, 999) return { start, end, iso: weekIso(start) } } /** Semana ISO 8601 (YYYY-Www) para segunda de referência. */ export function weekIso(monday: Date): string { // Usa algoritmo ISO: quinta da mesma semana determina o ano const thursday = new Date(monday) thursday.setUTCDate(monday.getUTCDate() + 3) const year = thursday.getUTCFullYear() const jan1 = new Date(Date.UTC(year, 0, 1)) const week = Math.floor( ((thursday.getTime() - jan1.getTime()) / 86400000 + (jan1.getUTCDay() === 0 ? 6 : jan1.getUTCDay() - 1)) / 7 ) + 1 return `${year}-W${String(week).padStart(2, '0')}` } /** Helper: todos os session_ids no intervalo. */ function baseRows(ctx: DetectCtx) { return ctx.db.prepare(` SELECT session_id, project_slug, started_at, event_count, tool_calls, tools_used, skills_invoked, outcome, duration_sec FROM sessions WHERE started_at >= ? AND started_at <= ? `).all(ctx.weekStartIso, ctx.weekEndIso) as Array<{ session_id: string project_slug: string started_at: string event_count: number tool_calls: number tools_used: string skills_invoked: string outcome: string duration_sec: number | null }> } /** 1. Skills com taxa elevada de erro/interrupção. */ export function detectSkillsHighErrorRate(ctx: DetectCtx): Pattern[] { const rows = baseRows(ctx) // Agregar por skill const bySkill = new Map() for (const r of rows) { let skills: string[] = [] try { skills = JSON.parse(r.skills_invoked) } catch {} for (const sk of skills) { const entry = bySkill.get(sk) ?? { total: 0, fail: 0, ids: [] } entry.total++ // Interrupções em sessões longas (≥10 eventos) são redirects naturais do utilizador, // não falhas da skill. Só contar erros reais ou interrupções muito precoces. const isRealFailure = r.outcome === 'error' || (r.outcome === 'interrupted' && (r.event_count ?? 0) < 10) if (isRealFailure) { entry.fail++ if (entry.ids.length < 5) entry.ids.push(r.session_id) } bySkill.set(sk, entry) } } const out: Pattern[] = [] for (const [skill, v] of bySkill) { if (v.total < 3) continue const ratio = v.fail / v.total if (ratio <= 0.2) continue const severity: Severity = ratio > 0.4 ? 'action' : 'warning' out.push({ pattern_key: `skill_error_rate:${skill}`, title: `Skill ${skill}: ${(ratio * 100).toFixed(0)}% das sessões falham`, description: `De ${v.total} sessões que invocaram ${skill}, ${v.fail} terminaram em erro/interrupção.`, severity, metric_value: Math.round(ratio * 1000) / 1000, sample_session_ids: v.ids, affected_count: v.fail, }) } return out } /** 2. Tools com baixa eficiência (tool_calls/event_count elevado). */ export function detectToolsLowEfficiency(ctx: DetectCtx): Pattern[] { const rows = baseRows(ctx) const byTool = new Map() for (const r of rows) { if (!r.event_count || r.event_count === 0) continue const ratio = r.tool_calls / r.event_count let tools: string[] = [] try { tools = JSON.parse(r.tools_used) } catch {} for (const t of tools) { const e = byTool.get(t) ?? { sum: 0, count: 0, ids: [] } e.sum += ratio e.count++ if (e.ids.length < 5) e.ids.push(r.session_id) byTool.set(t, e) } } const out: Pattern[] = [] for (const [tool, v] of byTool) { if (v.count < 5) continue const avg = v.sum / v.count if (avg <= 0.5) continue out.push({ pattern_key: `tool_low_efficiency:${tool}`, title: `Tool ${tool}: rácio tool_calls/event_count médio ${avg.toFixed(2)}`, description: `Em ${v.count} sessões, ${tool} domina o event_count. Indício de uso ineficiente ou looping.`, severity: 'info', metric_value: Math.round(avg * 1000) / 1000, sample_session_ids: v.ids, affected_count: v.count, }) } return out } /** 3. Pares (skill, tool) mais frequentes. */ export function detectSkillToolPairs(ctx: DetectCtx): Pattern[] { const rows = baseRows(ctx) const byPair = new Map() for (const r of rows) { let skills: string[] = [] let tools: string[] = [] try { skills = JSON.parse(r.skills_invoked) } catch {} try { tools = JSON.parse(r.tools_used) } catch {} for (const s of skills) { for (const t of tools) { const key = `${s}::${t}` const e = byPair.get(key) ?? { count: 0, ids: [] } e.count++ if (e.ids.length < 5) e.ids.push(r.session_id) byPair.set(key, e) } } } const sorted = [...byPair.entries()].filter(([, v]) => v.count >= 5).sort((a, b) => b[1].count - a[1].count).slice(0, 5) return sorted.map(([key, v]) => ({ pattern_key: `skill_tool_pair:${key}`, title: `Par frequente: ${key.replace('::', ' + ')}`, description: `Skill e tool co-ocorreram em ${v.count} sessões esta semana.`, severity: 'info' as Severity, metric_value: v.count, sample_session_ids: v.ids, affected_count: v.count, })) } /** 4. Duration outliers: sessões > p95 por projecto com outcome != completed. */ export function detectDurationOutliers(ctx: DetectCtx): Pattern[] { const rows = baseRows(ctx).filter((r) => r.duration_sec != null && r.duration_sec > 0) const byProject = new Map>() for (const r of rows) { const arr = byProject.get(r.project_slug) ?? [] arr.push(r) byProject.set(r.project_slug, arr) } const out: Pattern[] = [] for (const [proj, arr] of byProject) { if (arr.length < 4) continue const durations = arr.map((r) => r.duration_sec as number).sort((a, b) => a - b) const p95Idx = Math.max(0, Math.floor(durations.length * 0.95) - 1) const p95 = durations[p95Idx] const outliers = arr.filter((r) => (r.duration_sec as number) > p95 && r.outcome !== 'completed') if (outliers.length < 3) continue out.push({ pattern_key: `duration_outliers:${proj}`, title: `Projecto ${proj}: ${outliers.length} sessões longas não concluídas`, description: `Sessões com duração acima do p95 (${p95}s) e outcome != completed. Sinal de sessões penduradas.`, severity: 'warning', metric_value: p95, sample_session_ids: outliers.slice(0, 5).map((r) => r.session_id), affected_count: outliers.length, }) } return out } /** 5. Sessões abandonadas (event_count < 3 AND outcome=unknown). */ export function detectAbandonedSessions(ctx: DetectCtx): Pattern[] { const rows = ctx.db.prepare(` SELECT session_id FROM sessions WHERE started_at >= ? AND started_at <= ? AND event_count < 3 AND outcome = 'unknown' `).all(ctx.weekStartIso, ctx.weekEndIso) as Array<{ session_id: string }> if (rows.length < 5) return [] return [{ pattern_key: 'abandoned_sessions', title: `${rows.length} sessões abandonadas esta semana`, description: `Sessões com menos de 3 eventos e outcome=unknown — tipicamente abertas e descartadas.`, severity: 'info', metric_value: rows.length, sample_session_ids: rows.slice(0, 5).map((r) => r.session_id), affected_count: rows.length, }] } /** 6. Crescimento de complexidade: avg(tool_calls) actual vs semana anterior. */ export function detectGrowingComplexity(ctx: DetectCtx, prevWeekStartIso: string, prevWeekEndIso: string): Pattern[] { const curRows = baseRows(ctx) const prevRows = ctx.db.prepare(` SELECT skills_invoked, tool_calls FROM sessions WHERE started_at >= ? AND started_at <= ? `).all(prevWeekStartIso, prevWeekEndIso) as Array<{ skills_invoked: string; tool_calls: number }> const curBySkill = new Map() for (const r of curRows) { let sk: string[] = [] try { sk = JSON.parse(r.skills_invoked) } catch {} for (const s of sk) { const e = curBySkill.get(s) ?? { sum: 0, count: 0, ids: [] } e.sum += r.tool_calls e.count++ if (e.ids.length < 5) e.ids.push(r.session_id) curBySkill.set(s, e) } } const prevBySkill = new Map() for (const r of prevRows) { let sk: string[] = [] try { sk = JSON.parse(r.skills_invoked) } catch {} for (const s of sk) { const e = prevBySkill.get(s) ?? { sum: 0, count: 0 } e.sum += r.tool_calls e.count++ prevBySkill.set(s, e) } } const out: Pattern[] = [] for (const [skill, cur] of curBySkill) { if (cur.count < 5) continue const curAvg = cur.sum / cur.count const prev = prevBySkill.get(skill) if (!prev || prev.count < 3) continue const prevAvg = prev.sum / prev.count if (prevAvg === 0 || curAvg <= prevAvg * 1.3) continue out.push({ pattern_key: `growing_complexity:${skill}`, title: `Skill ${skill}: tool_calls médio +${Math.round((curAvg / prevAvg - 1) * 100)}% vs semana anterior`, description: `Média de tool_calls/sessão subiu de ${prevAvg.toFixed(1)} para ${curAvg.toFixed(1)}.`, severity: 'warning', metric_value: Math.round(curAvg * 10) / 10, sample_session_ids: cur.ids, affected_count: cur.count, }) } return out } /** * 7. Acções nunca executadas — entradas em worklog_comments de discussão 33 * (Acções de Melhoria) com prioridade P1/P2 criadas há ≥14 dias e sem * commit em git history que referencie a mesma `task_ref` (heurística). */ export function detectActionsNeverExecuted(ctx: DetectCtx): Pattern[] { // Entradas criadas até 14 dias antes do fim da semana (ou antes) const cutoff = new Date(ctx.weekEndIso) cutoff.setUTCDate(cutoff.getUTCDate() - 14) const cutoffIso = cutoff.toISOString() const rows = ctx.db.prepare(` SELECT id, discussion_id, created_at, task_ref, actions_json, title FROM worklog_comments WHERE discussion_id = 33 AND created_at <= ? ORDER BY created_at DESC LIMIT 500 `).all(cutoffIso) as Array<{ id: number discussion_id: number created_at: string task_ref: string | null actions_json: string title: string | null }> if (rows.length === 0) return [] const pendentes: Array<{ id: number; descricao: string; prioridade: string }> = [] for (const r of rows) { let actions: Array<{ tipo: string; descricao: string; prioridade: string | null }> = [] try { actions = JSON.parse(r.actions_json) } catch {} for (const a of actions) { const prio = (a.prioridade ?? '').toUpperCase() if (prio === 'P1' || prio === 'P2') { pendentes.push({ id: r.id, descricao: a.descricao.slice(0, 120), prioridade: prio }) if (pendentes.length >= 10) break } } if (pendentes.length >= 10) break } if (pendentes.length < 3) return [] return [{ pattern_key: 'actions_never_executed', title: `${pendentes.length}+ acções P1/P2 pendentes há ≥14 dias`, description: `Acções de melhoria (disc #33) sem execução visível. Amostra: ${pendentes.slice(0, 3).map((p) => `[${p.prioridade}] ${p.descricao}`).join(' | ')}`, severity: 'warning', metric_value: pendentes.length, sample_session_ids: pendentes.slice(0, 5).map((p) => `worklog:${p.id}`), affected_count: pendentes.length, }] } /** * 8. Skill reportada como problemática em worklogs mas que aparece com * outcome=completed nas sessões reais — discrepância entre narrativa e dados. */ export function detectSkillReportedBrokenButCompleted(ctx: DetectCtx): Pattern[] { // Recolhe skills mencionadas em problems_json e patterns_text de worklogs // criados nas últimas 4 semanas antes do fim da janela const windowStart = new Date(ctx.weekEndIso) windowStart.setUTCDate(windowStart.getUTCDate() - 28) const windowIso = windowStart.toISOString() const worklogs = ctx.db.prepare(` SELECT patterns_text, problems_json FROM worklog_comments WHERE discussion_id IN (31, 32) AND created_at >= ? LIMIT 500 `).all(windowIso) as Array<{ patterns_text: string; problems_json: string }> if (worklogs.length === 0) return [] // Extrai tokens parecidos com skill name (slash-prefixed ou nome conhecido) const skillMentions = new Map() const skillRegex = /\/([a-z][a-z0-9_-]{2,40})\b/gi for (const w of worklogs) { const blob = `${w.patterns_text} ${w.problems_json}`.toLowerCase() for (const m of blob.matchAll(skillRegex)) { skillMentions.set(m[1], (skillMentions.get(m[1]) ?? 0) + 1) } } if (skillMentions.size === 0) return [] // Para cada skill mencionada ≥2 vezes, ver sessões com skill invocada e outcome=completed const out: Pattern[] = [] const skillsRelevantes = [...skillMentions.entries()].filter(([, c]) => c >= 2) for (const [skill, mentions] of skillsRelevantes) { const rows = ctx.db.prepare(` SELECT session_id, skills_invoked, outcome FROM sessions WHERE started_at >= ? AND started_at <= ? AND skills_invoked LIKE ? AND outcome = 'completed' `).all(ctx.weekStartIso, ctx.weekEndIso, `%"${skill}"%`) as Array<{ session_id: string skills_invoked: string outcome: string }> // Confirmar via parse (skills_invoked é JSON array) const matches = rows.filter((r) => { try { return (JSON.parse(r.skills_invoked) as string[]).includes(skill) } catch { return false } }) if (matches.length >= 3) { out.push({ pattern_key: `skill_narrative_vs_data:${skill}`, title: `Skill ${skill}: reportada problemática em ${mentions} worklogs mas ${matches.length} sessões completed`, description: `Discrepância entre narrativa (worklogs #31/#32) e dados (sessions.outcome). Investigar se o problema é silencioso.`, severity: 'info', metric_value: matches.length, sample_session_ids: matches.slice(0, 5).map((r) => r.session_id), affected_count: matches.length, }) } } return out } /** * 9. Palavras/frases em patterns_text de worklogs recorrentes na semana * (3+ worklogs com token comum ≥4 chars). */ export function detectWorklogPatternFrequency(ctx: DetectCtx): Pattern[] { const rows = ctx.db.prepare(` SELECT id, patterns_text FROM worklog_comments WHERE created_at >= ? AND created_at <= ? `).all(ctx.weekStartIso, ctx.weekEndIso) as Array<{ id: number; patterns_text: string }> if (rows.length === 0) return [] const tokenCount = new Map() const stop = new Set(['para', 'como', 'mais', 'sobre', 'quando', 'apenas', 'entre', 'depois', 'antes', 'pelo', 'pela', 'pelos', 'pelas', 'esta', 'este', 'este', 'isso', 'isto', 'cada', 'muito', 'muita', 'outro', 'outra', 'nosso', 'nossa', 'todas', 'todos', 'seja', 'ser', 'ter', 'com', 'sem', 'dos', 'das', 'que', 'nao', 'sim']) for (const r of rows) { let items: string[] = [] try { items = JSON.parse(r.patterns_text) } catch {} const seen = new Set() for (const t of items) { const words = t .toLowerCase() .normalize('NFD') .replace(/[̀-ͯ]/g, '') .split(/[^a-z0-9]+/) .filter((w) => w.length >= 5 && !stop.has(w)) for (const w of words) { if (seen.has(w)) continue seen.add(w) const e = tokenCount.get(w) ?? { count: 0, ids: [] } e.count++ e.ids.push(r.id) tokenCount.set(w, e) } } } const frequent = [...tokenCount.entries()] .filter(([, v]) => v.count >= 3) .sort((a, b) => b[1].count - a[1].count) .slice(0, 5) if (frequent.length === 0) return [] return [{ pattern_key: 'worklog_pattern_frequency', title: `Termos recorrentes em ${rows.length} worklogs desta semana`, description: `Top tokens em patterns_text: ${frequent.map(([w, v]) => `${w}(${v.count})`).join(', ')}`, severity: 'info', metric_value: frequent[0][1].count, sample_session_ids: frequent.flatMap(([, v]) => v.ids.slice(0, 2)).slice(0, 5).map((id) => `worklog:${id}`), affected_count: rows.length, }] } /** Orquestra todos os detectores para a semana indicada. */ export function detectPatterns( dbWrapper: SessionsDb, weekStart: Date, weekEnd: Date, ): Pattern[] { const db = dbWrapper.rawDb() const ctx: DetectCtx = { db, weekStartIso: iso(weekStart), weekEndIso: iso(weekEnd), } const prevStart = new Date(weekStart); prevStart.setUTCDate(prevStart.getUTCDate() - 7) const prevEnd = new Date(weekEnd); prevEnd.setUTCDate(prevEnd.getUTCDate() - 7) const base: Pattern[] = [ ...detectSkillsHighErrorRate(ctx), ...detectToolsLowEfficiency(ctx), ...detectSkillToolPairs(ctx), ...detectDurationOutliers(ctx), ...detectAbandonedSessions(ctx), ...detectGrowingComplexity(ctx, iso(prevStart), iso(prevEnd)), ] // Cross-detectors: só correm se houver worklogs na janela const worklogCount = (db.prepare(`SELECT COUNT(*) as c FROM worklog_comments`).get() as { c: number }).c if (worklogCount > 0) { base.push( ...detectActionsNeverExecuted(ctx), ...detectSkillReportedBrokenButCompleted(ctx), ...detectWorklogPatternFrequency(ctx), ) } return base } /** Converte Pattern + contexto em PatternRecord pronto a persistir. */ export function toPatternRecord(p: Pattern, weekIso: string, consecutiveWeeks: number): PatternRecord { return { detected_at: new Date().toISOString(), week_iso: weekIso, pattern_key: p.pattern_key, title: p.title, description: p.description, severity: p.severity, metric_value: p.metric_value, sample_session_ids: p.sample_session_ids, affected_count: p.affected_count, consecutive_weeks: consecutiveWeeks, } }