feat(observabilidade): tabela patterns + 6 detectores SQL

Adiciona tabela 'patterns' à BD sessions (UNIQUE por week_iso+pattern_key)
e helpers upsertPattern/getPatternsByWeek/getConsecutiveWeeks no SessionsDb.

Módulo patterns.ts implementa 6 detectores heurísticos para deteccão semanal:
  1. skills_with_high_error_rate (ratio > 0.2, severity warning|action)
  2. tools_low_efficiency (tool_calls/event_count médio > 0.5)
  3. skill_tool_pairs (top 5 co-ocorrências)
  4. duration_outliers (sessões > p95 com outcome != completed)
  5. abandoned_sessions (event_count<3 AND outcome=unknown, >=5)
  6. growing_complexity (avg tool_calls actual > anterior*1.3)

5 testes cobrem detector de erro, abandonadas, consecutive_weeks,
idempotência do upsert e toPatternRecord.

Refs Fase 6A · Desk #2059 · Project #65

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-23 02:17:21 +01:00
parent 2c8525bc8a
commit 2a523a505e
3 changed files with 568 additions and 0 deletions
+119
View File
@@ -13,6 +13,20 @@ export interface ListFilters {
offset?: number
}
export interface PatternRecord {
id?: number
detected_at: string
week_iso: string
pattern_key: string
title: string
description: string
severity: 'info' | 'warning' | 'action'
metric_value: number | null
sample_session_ids: string[]
affected_count: number
consecutive_weeks: number
}
export interface SessionsDb {
upsertSession(meta: SessionMeta): void
upsertMany(metas: SessionMeta[]): void
@@ -20,6 +34,10 @@ export interface SessionsDb {
countSessions(filters: ListFilters): number
getSession(id: string): SessionMeta | null
deleteByJsonlPath(path: string): void
upsertPattern(p: PatternRecord): void
getPatternsByWeek(week: string): PatternRecord[]
getConsecutiveWeeks(pattern_key: string, uptoWeek: string): number
rawDb(): Database.Database
close(): void
}
@@ -46,6 +64,23 @@ CREATE TABLE IF NOT EXISTS sessions (
);
CREATE INDEX IF NOT EXISTS idx_started ON sessions(started_at DESC);
CREATE INDEX IF NOT EXISTS idx_project ON sessions(project_slug, started_at DESC);
CREATE TABLE IF NOT EXISTS patterns (
id INTEGER PRIMARY KEY AUTOINCREMENT,
detected_at TEXT NOT NULL,
week_iso TEXT NOT NULL,
pattern_key TEXT NOT NULL,
title TEXT NOT NULL,
description TEXT NOT NULL,
severity TEXT NOT NULL,
metric_value REAL,
sample_session_ids TEXT NOT NULL,
affected_count INTEGER NOT NULL,
consecutive_weeks INTEGER NOT NULL DEFAULT 1,
UNIQUE(week_iso, pattern_key)
);
CREATE INDEX IF NOT EXISTS idx_patterns_week ON patterns(week_iso);
CREATE INDEX IF NOT EXISTS idx_patterns_key ON patterns(pattern_key);
`
function rowToMeta(row: Record<string, unknown>): SessionMeta {
@@ -177,8 +212,92 @@ export function openSessionsDb(dbPath: string): SessionsDb {
deleteByJsonlPath(path) {
db.prepare('DELETE FROM sessions WHERE jsonl_path = ?').run(path)
},
upsertPattern(p: PatternRecord) {
db.prepare(`
INSERT INTO patterns (detected_at, week_iso, pattern_key, title, description,
severity, metric_value, sample_session_ids, affected_count, consecutive_weeks)
VALUES (@detected_at, @week_iso, @pattern_key, @title, @description,
@severity, @metric_value, @sample_session_ids, @affected_count, @consecutive_weeks)
ON CONFLICT(week_iso, pattern_key) DO UPDATE SET
detected_at = excluded.detected_at,
title = excluded.title,
description = excluded.description,
severity = excluded.severity,
metric_value = excluded.metric_value,
sample_session_ids = excluded.sample_session_ids,
affected_count = excluded.affected_count,
consecutive_weeks = excluded.consecutive_weeks
`).run({
detected_at: p.detected_at,
week_iso: p.week_iso,
pattern_key: p.pattern_key,
title: p.title,
description: p.description,
severity: p.severity,
metric_value: p.metric_value,
sample_session_ids: JSON.stringify(p.sample_session_ids),
affected_count: p.affected_count,
consecutive_weeks: p.consecutive_weeks,
})
},
getPatternsByWeek(week: string): PatternRecord[] {
const rows = db.prepare('SELECT * FROM patterns WHERE week_iso = ? ORDER BY severity DESC, affected_count DESC').all(week) as Record<string, unknown>[]
return rows.map((r) => ({
id: r.id as number,
detected_at: r.detected_at as string,
week_iso: r.week_iso as string,
pattern_key: r.pattern_key as string,
title: r.title as string,
description: r.description as string,
severity: r.severity as PatternRecord['severity'],
metric_value: (r.metric_value as number | null) ?? null,
sample_session_ids: JSON.parse(r.sample_session_ids as string),
affected_count: r.affected_count as number,
consecutive_weeks: r.consecutive_weeks as number,
}))
},
getConsecutiveWeeks(pattern_key: string, uptoWeek: string): number {
// Conta semanas consecutivas até uptoWeek (inclusive) em que pattern_key apareceu
const rows = db.prepare('SELECT DISTINCT week_iso FROM patterns WHERE pattern_key = ? AND week_iso <= ? ORDER BY week_iso DESC').all(pattern_key, uptoWeek) as { week_iso: string }[]
if (rows.length === 0) return 0
let count = 0
let cursor = uptoWeek
for (const row of rows) {
if (row.week_iso === cursor) {
count++
cursor = prevWeekIso(cursor)
} else {
break
}
}
return count
},
rawDb(): Database.Database {
return db
},
close() {
db.close()
},
}
}
/** Calcula semana ISO anterior (YYYY-Www). */
export function prevWeekIso(week: string): string {
const m = week.match(/^(\d{4})-W(\d{2})$/)
if (!m) return week
const year = parseInt(m[1], 10)
const w = parseInt(m[2], 10)
if (w > 1) return `${year}-W${String(w - 1).padStart(2, '0')}`
// Semana 1 → última semana do ano anterior (52 ou 53)
const prevYear = year - 1
const last = weeksInYear(prevYear)
return `${prevYear}-W${String(last).padStart(2, '0')}`
}
function weeksInYear(year: number): number {
// ISO: ano tem 53 semanas se 1 Jan é quinta ou (ano bissexto e 1 Jan é quarta)
const jan1 = new Date(Date.UTC(year, 0, 1)).getUTCDay()
const isLeap = (year % 4 === 0 && year % 100 !== 0) || year % 400 === 0
if (jan1 === 4 || (isLeap && jan1 === 3)) return 53
return 52
}
+326
View File
@@ -0,0 +1,326 @@
/**
* Detector automático de padrões sobre a BD `sessions` (Observabilidade Fase 6A).
*
* Seis detectores heurísticos em SQL puro (via better-sqlite3). Cada detector
* devolve zero ou mais `Pattern` para a semana analisada. Pipeline:
* 1. Correr detectores sobre intervalo [weekStart, weekEnd]
* 2. Persistir via `upsertPattern` (idempotente por (week_iso, pattern_key))
* 3. Calcular `consecutive_weeks` olhando para semanas anteriores
*/
import type Database from 'better-sqlite3'
import type { SessionsDb, PatternRecord } from './db.js'
export type Severity = 'info' | 'warning' | 'action'
export interface Pattern {
pattern_key: string
title: string
description: string
severity: Severity
metric_value: number | null
sample_session_ids: string[]
affected_count: number
}
export interface DetectCtx {
db: Database.Database
weekStartIso: string
weekEndIso: string
}
/** Converte Date para string ISO UTC. */
function iso(d: Date): string {
return d.toISOString()
}
/**
* Calcula intervalo [segunda 00:00:00 UTC, domingo 23:59:59.999 UTC] da semana
* que contém `ref` (Regra 17 — semana começa à segunda).
*/
export function weekRange(ref: Date): { start: Date; end: Date; iso: string } {
const d = new Date(Date.UTC(ref.getUTCFullYear(), ref.getUTCMonth(), ref.getUTCDate()))
const dow = d.getUTCDay() // 0=Dom, 1=Seg
const diffToMonday = dow === 0 ? -6 : 1 - dow
const start = new Date(d)
start.setUTCDate(d.getUTCDate() + diffToMonday)
const end = new Date(start)
end.setUTCDate(start.getUTCDate() + 6)
end.setUTCHours(23, 59, 59, 999)
return { start, end, iso: weekIso(start) }
}
/** Semana ISO 8601 (YYYY-Www) para segunda de referência. */
export function weekIso(monday: Date): string {
// Usa algoritmo ISO: quinta da mesma semana determina o ano
const thursday = new Date(monday)
thursday.setUTCDate(monday.getUTCDate() + 3)
const year = thursday.getUTCFullYear()
const jan1 = new Date(Date.UTC(year, 0, 1))
const week = Math.floor(
((thursday.getTime() - jan1.getTime()) / 86400000 + (jan1.getUTCDay() === 0 ? 6 : jan1.getUTCDay() - 1)) / 7
) + 1
return `${year}-W${String(week).padStart(2, '0')}`
}
/** Helper: todos os session_ids no intervalo. */
function baseRows(ctx: DetectCtx) {
return ctx.db.prepare(`
SELECT session_id, project_slug, started_at, event_count, tool_calls, tools_used, skills_invoked, outcome, duration_sec
FROM sessions
WHERE started_at >= ? AND started_at <= ?
`).all(ctx.weekStartIso, ctx.weekEndIso) as Array<{
session_id: string
project_slug: string
started_at: string
event_count: number
tool_calls: number
tools_used: string
skills_invoked: string
outcome: string
duration_sec: number | null
}>
}
/** 1. Skills com taxa elevada de erro/interrupção. */
export function detectSkillsHighErrorRate(ctx: DetectCtx): Pattern[] {
const rows = baseRows(ctx)
// Agregar por skill
const bySkill = new Map<string, { total: number; fail: number; ids: string[] }>()
for (const r of rows) {
let skills: string[] = []
try { skills = JSON.parse(r.skills_invoked) } catch {}
for (const sk of skills) {
const entry = bySkill.get(sk) ?? { total: 0, fail: 0, ids: [] }
entry.total++
if (r.outcome === 'error' || r.outcome === 'interrupted') {
entry.fail++
if (entry.ids.length < 5) entry.ids.push(r.session_id)
}
bySkill.set(sk, entry)
}
}
const out: Pattern[] = []
for (const [skill, v] of bySkill) {
if (v.total < 3) continue
const ratio = v.fail / v.total
if (ratio <= 0.2) continue
const severity: Severity = ratio > 0.4 ? 'action' : 'warning'
out.push({
pattern_key: `skill_error_rate:${skill}`,
title: `Skill ${skill}: ${(ratio * 100).toFixed(0)}% das sessões falham`,
description: `De ${v.total} sessões que invocaram ${skill}, ${v.fail} terminaram em erro/interrupção.`,
severity,
metric_value: Math.round(ratio * 1000) / 1000,
sample_session_ids: v.ids,
affected_count: v.fail,
})
}
return out
}
/** 2. Tools com baixa eficiência (tool_calls/event_count elevado). */
export function detectToolsLowEfficiency(ctx: DetectCtx): Pattern[] {
const rows = baseRows(ctx)
const byTool = new Map<string, { sum: number; count: number; ids: string[] }>()
for (const r of rows) {
if (!r.event_count || r.event_count === 0) continue
const ratio = r.tool_calls / r.event_count
let tools: string[] = []
try { tools = JSON.parse(r.tools_used) } catch {}
for (const t of tools) {
const e = byTool.get(t) ?? { sum: 0, count: 0, ids: [] }
e.sum += ratio
e.count++
if (e.ids.length < 5) e.ids.push(r.session_id)
byTool.set(t, e)
}
}
const out: Pattern[] = []
for (const [tool, v] of byTool) {
if (v.count < 5) continue
const avg = v.sum / v.count
if (avg <= 0.5) continue
out.push({
pattern_key: `tool_low_efficiency:${tool}`,
title: `Tool ${tool}: rácio tool_calls/event_count médio ${avg.toFixed(2)}`,
description: `Em ${v.count} sessões, ${tool} domina o event_count. Indício de uso ineficiente ou looping.`,
severity: 'info',
metric_value: Math.round(avg * 1000) / 1000,
sample_session_ids: v.ids,
affected_count: v.count,
})
}
return out
}
/** 3. Pares (skill, tool) mais frequentes. */
export function detectSkillToolPairs(ctx: DetectCtx): Pattern[] {
const rows = baseRows(ctx)
const byPair = new Map<string, { count: number; ids: string[] }>()
for (const r of rows) {
let skills: string[] = []
let tools: string[] = []
try { skills = JSON.parse(r.skills_invoked) } catch {}
try { tools = JSON.parse(r.tools_used) } catch {}
for (const s of skills) {
for (const t of tools) {
const key = `${s}::${t}`
const e = byPair.get(key) ?? { count: 0, ids: [] }
e.count++
if (e.ids.length < 5) e.ids.push(r.session_id)
byPair.set(key, e)
}
}
}
const sorted = [...byPair.entries()].filter(([, v]) => v.count >= 5).sort((a, b) => b[1].count - a[1].count).slice(0, 5)
return sorted.map(([key, v]) => ({
pattern_key: `skill_tool_pair:${key}`,
title: `Par frequente: ${key.replace('::', ' + ')}`,
description: `Skill e tool co-ocorreram em ${v.count} sessões esta semana.`,
severity: 'info' as Severity,
metric_value: v.count,
sample_session_ids: v.ids,
affected_count: v.count,
}))
}
/** 4. Duration outliers: sessões > p95 por projecto com outcome != completed. */
export function detectDurationOutliers(ctx: DetectCtx): Pattern[] {
const rows = baseRows(ctx).filter((r) => r.duration_sec != null && r.duration_sec > 0)
const byProject = new Map<string, Array<typeof rows[number]>>()
for (const r of rows) {
const arr = byProject.get(r.project_slug) ?? []
arr.push(r)
byProject.set(r.project_slug, arr)
}
const out: Pattern[] = []
for (const [proj, arr] of byProject) {
if (arr.length < 4) continue
const durations = arr.map((r) => r.duration_sec as number).sort((a, b) => a - b)
const p95Idx = Math.max(0, Math.floor(durations.length * 0.95) - 1)
const p95 = durations[p95Idx]
const outliers = arr.filter((r) => (r.duration_sec as number) > p95 && r.outcome !== 'completed')
if (outliers.length < 3) continue
out.push({
pattern_key: `duration_outliers:${proj}`,
title: `Projecto ${proj}: ${outliers.length} sessões longas não concluídas`,
description: `Sessões com duração acima do p95 (${p95}s) e outcome != completed. Sinal de sessões penduradas.`,
severity: 'warning',
metric_value: p95,
sample_session_ids: outliers.slice(0, 5).map((r) => r.session_id),
affected_count: outliers.length,
})
}
return out
}
/** 5. Sessões abandonadas (event_count < 3 AND outcome=unknown). */
export function detectAbandonedSessions(ctx: DetectCtx): Pattern[] {
const rows = ctx.db.prepare(`
SELECT session_id FROM sessions
WHERE started_at >= ? AND started_at <= ?
AND event_count < 3 AND outcome = 'unknown'
`).all(ctx.weekStartIso, ctx.weekEndIso) as Array<{ session_id: string }>
if (rows.length < 5) return []
return [{
pattern_key: 'abandoned_sessions',
title: `${rows.length} sessões abandonadas esta semana`,
description: `Sessões com menos de 3 eventos e outcome=unknown — tipicamente abertas e descartadas.`,
severity: 'info',
metric_value: rows.length,
sample_session_ids: rows.slice(0, 5).map((r) => r.session_id),
affected_count: rows.length,
}]
}
/** 6. Crescimento de complexidade: avg(tool_calls) actual vs semana anterior. */
export function detectGrowingComplexity(ctx: DetectCtx, prevWeekStartIso: string, prevWeekEndIso: string): Pattern[] {
const curRows = baseRows(ctx)
const prevRows = ctx.db.prepare(`
SELECT skills_invoked, tool_calls FROM sessions
WHERE started_at >= ? AND started_at <= ?
`).all(prevWeekStartIso, prevWeekEndIso) as Array<{ skills_invoked: string; tool_calls: number }>
const curBySkill = new Map<string, { sum: number; count: number; ids: string[] }>()
for (const r of curRows) {
let sk: string[] = []
try { sk = JSON.parse(r.skills_invoked) } catch {}
for (const s of sk) {
const e = curBySkill.get(s) ?? { sum: 0, count: 0, ids: [] }
e.sum += r.tool_calls
e.count++
if (e.ids.length < 5) e.ids.push(r.session_id)
curBySkill.set(s, e)
}
}
const prevBySkill = new Map<string, { sum: number; count: number }>()
for (const r of prevRows) {
let sk: string[] = []
try { sk = JSON.parse(r.skills_invoked) } catch {}
for (const s of sk) {
const e = prevBySkill.get(s) ?? { sum: 0, count: 0 }
e.sum += r.tool_calls
e.count++
prevBySkill.set(s, e)
}
}
const out: Pattern[] = []
for (const [skill, cur] of curBySkill) {
if (cur.count < 5) continue
const curAvg = cur.sum / cur.count
const prev = prevBySkill.get(skill)
if (!prev || prev.count < 3) continue
const prevAvg = prev.sum / prev.count
if (prevAvg === 0 || curAvg <= prevAvg * 1.3) continue
out.push({
pattern_key: `growing_complexity:${skill}`,
title: `Skill ${skill}: tool_calls médio +${Math.round((curAvg / prevAvg - 1) * 100)}% vs semana anterior`,
description: `Média de tool_calls/sessão subiu de ${prevAvg.toFixed(1)} para ${curAvg.toFixed(1)}.`,
severity: 'warning',
metric_value: Math.round(curAvg * 10) / 10,
sample_session_ids: cur.ids,
affected_count: cur.count,
})
}
return out
}
/** Orquestra todos os detectores para a semana indicada. */
export function detectPatterns(
dbWrapper: SessionsDb,
weekStart: Date,
weekEnd: Date,
): Pattern[] {
const db = dbWrapper.rawDb()
const ctx: DetectCtx = {
db,
weekStartIso: iso(weekStart),
weekEndIso: iso(weekEnd),
}
const prevStart = new Date(weekStart); prevStart.setUTCDate(prevStart.getUTCDate() - 7)
const prevEnd = new Date(weekEnd); prevEnd.setUTCDate(prevEnd.getUTCDate() - 7)
return [
...detectSkillsHighErrorRate(ctx),
...detectToolsLowEfficiency(ctx),
...detectSkillToolPairs(ctx),
...detectDurationOutliers(ctx),
...detectAbandonedSessions(ctx),
...detectGrowingComplexity(ctx, iso(prevStart), iso(prevEnd)),
]
}
/** Converte Pattern + contexto em PatternRecord pronto a persistir. */
export function toPatternRecord(p: Pattern, weekIso: string, consecutiveWeeks: number): PatternRecord {
return {
detected_at: new Date().toISOString(),
week_iso: weekIso,
pattern_key: p.pattern_key,
title: p.title,
description: p.description,
severity: p.severity,
metric_value: p.metric_value,
sample_session_ids: p.sample_session_ids,
affected_count: p.affected_count,
consecutive_weeks: consecutiveWeeks,
}
}
+123
View File
@@ -0,0 +1,123 @@
import { describe, it, expect, beforeEach } from 'vitest'
import { mkdtempSync } from 'fs'
import { tmpdir } from 'os'
import { join } from 'path'
import { openSessionsDb, type SessionsDb, type PatternRecord } from '../services/sessions/db.js'
import { detectPatterns, weekRange, toPatternRecord } from '../services/sessions/patterns.js'
import type { SessionMeta } from '../types/session.js'
function meta(overrides: Partial<SessionMeta>): SessionMeta {
return {
session_id: 's-' + Math.random().toString(36).slice(2, 10),
project_path: '/tmp/project',
project_slug: 'project',
jsonl_path: '/tmp/' + Math.random().toString(36).slice(2) + '.jsonl',
started_at: '2026-04-20T10:00:00Z', // segunda de 2026-W17
ended_at: '2026-04-20T10:30:00Z',
duration_sec: 1800,
event_count: 50,
user_messages: 5,
assistant_msgs: 10,
tool_calls: 20,
first_prompt: 'olá',
tools_used: ['Bash'],
skills_invoked: [],
outcome: 'completed',
permission_mode: 'default',
file_size: 10000,
indexed_at: '2026-04-20T10:31:00Z',
...overrides,
}
}
describe('patterns detector', () => {
let db: SessionsDb
beforeEach(() => {
const dir = mkdtempSync(join(tmpdir(), 'obs-pat-'))
db = openSessionsDb(join(dir, 'sessions.db'))
})
it('detecta skill com taxa elevada de erro (action)', () => {
// 3 sessões skill X: 2 error, 1 completed → ratio 0.67 → severity=action
db.upsertSession(meta({ session_id: 'a', skills_invoked: ['skillX'], outcome: 'error' }))
db.upsertSession(meta({ session_id: 'b', skills_invoked: ['skillX'], outcome: 'interrupted' }))
db.upsertSession(meta({ session_id: 'c', skills_invoked: ['skillX'], outcome: 'completed' }))
const { start, end } = weekRange(new Date('2026-04-22T00:00:00Z'))
const patterns = detectPatterns(db, start, end)
const errorRate = patterns.find((p) => p.pattern_key === 'skill_error_rate:skillX')
expect(errorRate).toBeDefined()
expect(errorRate!.severity).toBe('action')
expect(errorRate!.affected_count).toBe(2)
})
it('detecta sessões abandonadas', () => {
for (let i = 0; i < 6; i++) {
db.upsertSession(meta({ session_id: `ab-${i}`, event_count: 1, outcome: 'unknown' }))
}
const { start, end } = weekRange(new Date('2026-04-22T00:00:00Z'))
const patterns = detectPatterns(db, start, end)
expect(patterns.some((p) => p.pattern_key === 'abandoned_sessions')).toBe(true)
})
it('getConsecutiveWeeks devolve 3 após upserts em semanas sucessivas', () => {
const key = 'skill_error_rate:Y'
const weeks = ['2026-W15', '2026-W16', '2026-W17']
for (const w of weeks) {
db.upsertPattern({
detected_at: new Date().toISOString(),
week_iso: w,
pattern_key: key,
title: 't',
description: 'd',
severity: 'warning',
metric_value: 0.5,
sample_session_ids: ['x'],
affected_count: 1,
consecutive_weeks: 1,
})
}
expect(db.getConsecutiveWeeks(key, '2026-W17')).toBe(3)
expect(db.getConsecutiveWeeks(key, '2026-W16')).toBe(2)
})
it('upsertPattern é idempotente por (week_iso, pattern_key)', () => {
const base: PatternRecord = {
detected_at: '2026-04-20T00:00:00Z',
week_iso: '2026-W17',
pattern_key: 'test',
title: 'v1',
description: 'd',
severity: 'info',
metric_value: 1,
sample_session_ids: ['a'],
affected_count: 1,
consecutive_weeks: 1,
}
db.upsertPattern(base)
db.upsertPattern({ ...base, title: 'v2', affected_count: 5, consecutive_weeks: 2 })
const rows = db.getPatternsByWeek('2026-W17')
expect(rows).toHaveLength(1)
expect(rows[0].title).toBe('v2')
expect(rows[0].affected_count).toBe(5)
expect(rows[0].consecutive_weeks).toBe(2)
})
it('toPatternRecord propaga week_iso e consecutive_weeks', () => {
const rec = toPatternRecord(
{
pattern_key: 'k',
title: 't',
description: 'd',
severity: 'warning',
metric_value: 0.42,
sample_session_ids: ['a', 'b'],
affected_count: 2,
},
'2026-W17',
3,
)
expect(rec.week_iso).toBe('2026-W17')
expect(rec.consecutive_weeks).toBe(3)
expect(rec.severity).toBe('warning')
})
})