diff --git a/api/services/sessions/db.ts b/api/services/sessions/db.ts index ae41e57..9d441f5 100644 --- a/api/services/sessions/db.ts +++ b/api/services/sessions/db.ts @@ -13,6 +13,20 @@ export interface ListFilters { offset?: number } +export interface PatternRecord { + id?: number + detected_at: string + week_iso: string + pattern_key: string + title: string + description: string + severity: 'info' | 'warning' | 'action' + metric_value: number | null + sample_session_ids: string[] + affected_count: number + consecutive_weeks: number +} + export interface SessionsDb { upsertSession(meta: SessionMeta): void upsertMany(metas: SessionMeta[]): void @@ -20,6 +34,10 @@ export interface SessionsDb { countSessions(filters: ListFilters): number getSession(id: string): SessionMeta | null deleteByJsonlPath(path: string): void + upsertPattern(p: PatternRecord): void + getPatternsByWeek(week: string): PatternRecord[] + getConsecutiveWeeks(pattern_key: string, uptoWeek: string): number + rawDb(): Database.Database close(): void } @@ -46,6 +64,23 @@ CREATE TABLE IF NOT EXISTS sessions ( ); CREATE INDEX IF NOT EXISTS idx_started ON sessions(started_at DESC); CREATE INDEX IF NOT EXISTS idx_project ON sessions(project_slug, started_at DESC); + +CREATE TABLE IF NOT EXISTS patterns ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + detected_at TEXT NOT NULL, + week_iso TEXT NOT NULL, + pattern_key TEXT NOT NULL, + title TEXT NOT NULL, + description TEXT NOT NULL, + severity TEXT NOT NULL, + metric_value REAL, + sample_session_ids TEXT NOT NULL, + affected_count INTEGER NOT NULL, + consecutive_weeks INTEGER NOT NULL DEFAULT 1, + UNIQUE(week_iso, pattern_key) +); +CREATE INDEX IF NOT EXISTS idx_patterns_week ON patterns(week_iso); +CREATE INDEX IF NOT EXISTS idx_patterns_key ON patterns(pattern_key); ` function rowToMeta(row: Record): SessionMeta { @@ -177,8 +212,92 @@ export function openSessionsDb(dbPath: string): SessionsDb { deleteByJsonlPath(path) { db.prepare('DELETE FROM sessions WHERE jsonl_path = ?').run(path) }, + upsertPattern(p: PatternRecord) { + db.prepare(` + INSERT INTO patterns (detected_at, week_iso, pattern_key, title, description, + severity, metric_value, sample_session_ids, affected_count, consecutive_weeks) + VALUES (@detected_at, @week_iso, @pattern_key, @title, @description, + @severity, @metric_value, @sample_session_ids, @affected_count, @consecutive_weeks) + ON CONFLICT(week_iso, pattern_key) DO UPDATE SET + detected_at = excluded.detected_at, + title = excluded.title, + description = excluded.description, + severity = excluded.severity, + metric_value = excluded.metric_value, + sample_session_ids = excluded.sample_session_ids, + affected_count = excluded.affected_count, + consecutive_weeks = excluded.consecutive_weeks + `).run({ + detected_at: p.detected_at, + week_iso: p.week_iso, + pattern_key: p.pattern_key, + title: p.title, + description: p.description, + severity: p.severity, + metric_value: p.metric_value, + sample_session_ids: JSON.stringify(p.sample_session_ids), + affected_count: p.affected_count, + consecutive_weeks: p.consecutive_weeks, + }) + }, + getPatternsByWeek(week: string): PatternRecord[] { + const rows = db.prepare('SELECT * FROM patterns WHERE week_iso = ? ORDER BY severity DESC, affected_count DESC').all(week) as Record[] + return rows.map((r) => ({ + id: r.id as number, + detected_at: r.detected_at as string, + week_iso: r.week_iso as string, + pattern_key: r.pattern_key as string, + title: r.title as string, + description: r.description as string, + severity: r.severity as PatternRecord['severity'], + metric_value: (r.metric_value as number | null) ?? null, + sample_session_ids: JSON.parse(r.sample_session_ids as string), + affected_count: r.affected_count as number, + consecutive_weeks: r.consecutive_weeks as number, + })) + }, + getConsecutiveWeeks(pattern_key: string, uptoWeek: string): number { + // Conta semanas consecutivas até uptoWeek (inclusive) em que pattern_key apareceu + const rows = db.prepare('SELECT DISTINCT week_iso FROM patterns WHERE pattern_key = ? AND week_iso <= ? ORDER BY week_iso DESC').all(pattern_key, uptoWeek) as { week_iso: string }[] + if (rows.length === 0) return 0 + let count = 0 + let cursor = uptoWeek + for (const row of rows) { + if (row.week_iso === cursor) { + count++ + cursor = prevWeekIso(cursor) + } else { + break + } + } + return count + }, + rawDb(): Database.Database { + return db + }, close() { db.close() }, } } + +/** Calcula semana ISO anterior (YYYY-Www). */ +export function prevWeekIso(week: string): string { + const m = week.match(/^(\d{4})-W(\d{2})$/) + if (!m) return week + const year = parseInt(m[1], 10) + const w = parseInt(m[2], 10) + if (w > 1) return `${year}-W${String(w - 1).padStart(2, '0')}` + // Semana 1 → última semana do ano anterior (52 ou 53) + const prevYear = year - 1 + const last = weeksInYear(prevYear) + return `${prevYear}-W${String(last).padStart(2, '0')}` +} + +function weeksInYear(year: number): number { + // ISO: ano tem 53 semanas se 1 Jan é quinta ou (ano bissexto e 1 Jan é quarta) + const jan1 = new Date(Date.UTC(year, 0, 1)).getUTCDay() + const isLeap = (year % 4 === 0 && year % 100 !== 0) || year % 400 === 0 + if (jan1 === 4 || (isLeap && jan1 === 3)) return 53 + return 52 +} diff --git a/api/services/sessions/patterns.ts b/api/services/sessions/patterns.ts new file mode 100644 index 0000000..22b3928 --- /dev/null +++ b/api/services/sessions/patterns.ts @@ -0,0 +1,326 @@ +/** + * Detector automático de padrões sobre a BD `sessions` (Observabilidade Fase 6A). + * + * Seis detectores heurísticos em SQL puro (via better-sqlite3). Cada detector + * devolve zero ou mais `Pattern` para a semana analisada. Pipeline: + * 1. Correr detectores sobre intervalo [weekStart, weekEnd] + * 2. Persistir via `upsertPattern` (idempotente por (week_iso, pattern_key)) + * 3. Calcular `consecutive_weeks` olhando para semanas anteriores + */ +import type Database from 'better-sqlite3' +import type { SessionsDb, PatternRecord } from './db.js' + +export type Severity = 'info' | 'warning' | 'action' + +export interface Pattern { + pattern_key: string + title: string + description: string + severity: Severity + metric_value: number | null + sample_session_ids: string[] + affected_count: number +} + +export interface DetectCtx { + db: Database.Database + weekStartIso: string + weekEndIso: string +} + +/** Converte Date para string ISO UTC. */ +function iso(d: Date): string { + return d.toISOString() +} + +/** + * Calcula intervalo [segunda 00:00:00 UTC, domingo 23:59:59.999 UTC] da semana + * que contém `ref` (Regra 17 — semana começa à segunda). + */ +export function weekRange(ref: Date): { start: Date; end: Date; iso: string } { + const d = new Date(Date.UTC(ref.getUTCFullYear(), ref.getUTCMonth(), ref.getUTCDate())) + const dow = d.getUTCDay() // 0=Dom, 1=Seg + const diffToMonday = dow === 0 ? -6 : 1 - dow + const start = new Date(d) + start.setUTCDate(d.getUTCDate() + diffToMonday) + const end = new Date(start) + end.setUTCDate(start.getUTCDate() + 6) + end.setUTCHours(23, 59, 59, 999) + return { start, end, iso: weekIso(start) } +} + +/** Semana ISO 8601 (YYYY-Www) para segunda de referência. */ +export function weekIso(monday: Date): string { + // Usa algoritmo ISO: quinta da mesma semana determina o ano + const thursday = new Date(monday) + thursday.setUTCDate(monday.getUTCDate() + 3) + const year = thursday.getUTCFullYear() + const jan1 = new Date(Date.UTC(year, 0, 1)) + const week = Math.floor( + ((thursday.getTime() - jan1.getTime()) / 86400000 + (jan1.getUTCDay() === 0 ? 6 : jan1.getUTCDay() - 1)) / 7 + ) + 1 + return `${year}-W${String(week).padStart(2, '0')}` +} + +/** Helper: todos os session_ids no intervalo. */ +function baseRows(ctx: DetectCtx) { + return ctx.db.prepare(` + SELECT session_id, project_slug, started_at, event_count, tool_calls, tools_used, skills_invoked, outcome, duration_sec + FROM sessions + WHERE started_at >= ? AND started_at <= ? + `).all(ctx.weekStartIso, ctx.weekEndIso) as Array<{ + session_id: string + project_slug: string + started_at: string + event_count: number + tool_calls: number + tools_used: string + skills_invoked: string + outcome: string + duration_sec: number | null + }> +} + +/** 1. Skills com taxa elevada de erro/interrupção. */ +export function detectSkillsHighErrorRate(ctx: DetectCtx): Pattern[] { + const rows = baseRows(ctx) + // Agregar por skill + const bySkill = new Map() + for (const r of rows) { + let skills: string[] = [] + try { skills = JSON.parse(r.skills_invoked) } catch {} + for (const sk of skills) { + const entry = bySkill.get(sk) ?? { total: 0, fail: 0, ids: [] } + entry.total++ + if (r.outcome === 'error' || r.outcome === 'interrupted') { + entry.fail++ + if (entry.ids.length < 5) entry.ids.push(r.session_id) + } + bySkill.set(sk, entry) + } + } + const out: Pattern[] = [] + for (const [skill, v] of bySkill) { + if (v.total < 3) continue + const ratio = v.fail / v.total + if (ratio <= 0.2) continue + const severity: Severity = ratio > 0.4 ? 'action' : 'warning' + out.push({ + pattern_key: `skill_error_rate:${skill}`, + title: `Skill ${skill}: ${(ratio * 100).toFixed(0)}% das sessões falham`, + description: `De ${v.total} sessões que invocaram ${skill}, ${v.fail} terminaram em erro/interrupção.`, + severity, + metric_value: Math.round(ratio * 1000) / 1000, + sample_session_ids: v.ids, + affected_count: v.fail, + }) + } + return out +} + +/** 2. Tools com baixa eficiência (tool_calls/event_count elevado). */ +export function detectToolsLowEfficiency(ctx: DetectCtx): Pattern[] { + const rows = baseRows(ctx) + const byTool = new Map() + for (const r of rows) { + if (!r.event_count || r.event_count === 0) continue + const ratio = r.tool_calls / r.event_count + let tools: string[] = [] + try { tools = JSON.parse(r.tools_used) } catch {} + for (const t of tools) { + const e = byTool.get(t) ?? { sum: 0, count: 0, ids: [] } + e.sum += ratio + e.count++ + if (e.ids.length < 5) e.ids.push(r.session_id) + byTool.set(t, e) + } + } + const out: Pattern[] = [] + for (const [tool, v] of byTool) { + if (v.count < 5) continue + const avg = v.sum / v.count + if (avg <= 0.5) continue + out.push({ + pattern_key: `tool_low_efficiency:${tool}`, + title: `Tool ${tool}: rácio tool_calls/event_count médio ${avg.toFixed(2)}`, + description: `Em ${v.count} sessões, ${tool} domina o event_count. Indício de uso ineficiente ou looping.`, + severity: 'info', + metric_value: Math.round(avg * 1000) / 1000, + sample_session_ids: v.ids, + affected_count: v.count, + }) + } + return out +} + +/** 3. Pares (skill, tool) mais frequentes. */ +export function detectSkillToolPairs(ctx: DetectCtx): Pattern[] { + const rows = baseRows(ctx) + const byPair = new Map() + for (const r of rows) { + let skills: string[] = [] + let tools: string[] = [] + try { skills = JSON.parse(r.skills_invoked) } catch {} + try { tools = JSON.parse(r.tools_used) } catch {} + for (const s of skills) { + for (const t of tools) { + const key = `${s}::${t}` + const e = byPair.get(key) ?? { count: 0, ids: [] } + e.count++ + if (e.ids.length < 5) e.ids.push(r.session_id) + byPair.set(key, e) + } + } + } + const sorted = [...byPair.entries()].filter(([, v]) => v.count >= 5).sort((a, b) => b[1].count - a[1].count).slice(0, 5) + return sorted.map(([key, v]) => ({ + pattern_key: `skill_tool_pair:${key}`, + title: `Par frequente: ${key.replace('::', ' + ')}`, + description: `Skill e tool co-ocorreram em ${v.count} sessões esta semana.`, + severity: 'info' as Severity, + metric_value: v.count, + sample_session_ids: v.ids, + affected_count: v.count, + })) +} + +/** 4. Duration outliers: sessões > p95 por projecto com outcome != completed. */ +export function detectDurationOutliers(ctx: DetectCtx): Pattern[] { + const rows = baseRows(ctx).filter((r) => r.duration_sec != null && r.duration_sec > 0) + const byProject = new Map>() + for (const r of rows) { + const arr = byProject.get(r.project_slug) ?? [] + arr.push(r) + byProject.set(r.project_slug, arr) + } + const out: Pattern[] = [] + for (const [proj, arr] of byProject) { + if (arr.length < 4) continue + const durations = arr.map((r) => r.duration_sec as number).sort((a, b) => a - b) + const p95Idx = Math.max(0, Math.floor(durations.length * 0.95) - 1) + const p95 = durations[p95Idx] + const outliers = arr.filter((r) => (r.duration_sec as number) > p95 && r.outcome !== 'completed') + if (outliers.length < 3) continue + out.push({ + pattern_key: `duration_outliers:${proj}`, + title: `Projecto ${proj}: ${outliers.length} sessões longas não concluídas`, + description: `Sessões com duração acima do p95 (${p95}s) e outcome != completed. Sinal de sessões penduradas.`, + severity: 'warning', + metric_value: p95, + sample_session_ids: outliers.slice(0, 5).map((r) => r.session_id), + affected_count: outliers.length, + }) + } + return out +} + +/** 5. Sessões abandonadas (event_count < 3 AND outcome=unknown). */ +export function detectAbandonedSessions(ctx: DetectCtx): Pattern[] { + const rows = ctx.db.prepare(` + SELECT session_id FROM sessions + WHERE started_at >= ? AND started_at <= ? + AND event_count < 3 AND outcome = 'unknown' + `).all(ctx.weekStartIso, ctx.weekEndIso) as Array<{ session_id: string }> + if (rows.length < 5) return [] + return [{ + pattern_key: 'abandoned_sessions', + title: `${rows.length} sessões abandonadas esta semana`, + description: `Sessões com menos de 3 eventos e outcome=unknown — tipicamente abertas e descartadas.`, + severity: 'info', + metric_value: rows.length, + sample_session_ids: rows.slice(0, 5).map((r) => r.session_id), + affected_count: rows.length, + }] +} + +/** 6. Crescimento de complexidade: avg(tool_calls) actual vs semana anterior. */ +export function detectGrowingComplexity(ctx: DetectCtx, prevWeekStartIso: string, prevWeekEndIso: string): Pattern[] { + const curRows = baseRows(ctx) + const prevRows = ctx.db.prepare(` + SELECT skills_invoked, tool_calls FROM sessions + WHERE started_at >= ? AND started_at <= ? + `).all(prevWeekStartIso, prevWeekEndIso) as Array<{ skills_invoked: string; tool_calls: number }> + + const curBySkill = new Map() + for (const r of curRows) { + let sk: string[] = [] + try { sk = JSON.parse(r.skills_invoked) } catch {} + for (const s of sk) { + const e = curBySkill.get(s) ?? { sum: 0, count: 0, ids: [] } + e.sum += r.tool_calls + e.count++ + if (e.ids.length < 5) e.ids.push(r.session_id) + curBySkill.set(s, e) + } + } + const prevBySkill = new Map() + for (const r of prevRows) { + let sk: string[] = [] + try { sk = JSON.parse(r.skills_invoked) } catch {} + for (const s of sk) { + const e = prevBySkill.get(s) ?? { sum: 0, count: 0 } + e.sum += r.tool_calls + e.count++ + prevBySkill.set(s, e) + } + } + const out: Pattern[] = [] + for (const [skill, cur] of curBySkill) { + if (cur.count < 5) continue + const curAvg = cur.sum / cur.count + const prev = prevBySkill.get(skill) + if (!prev || prev.count < 3) continue + const prevAvg = prev.sum / prev.count + if (prevAvg === 0 || curAvg <= prevAvg * 1.3) continue + out.push({ + pattern_key: `growing_complexity:${skill}`, + title: `Skill ${skill}: tool_calls médio +${Math.round((curAvg / prevAvg - 1) * 100)}% vs semana anterior`, + description: `Média de tool_calls/sessão subiu de ${prevAvg.toFixed(1)} para ${curAvg.toFixed(1)}.`, + severity: 'warning', + metric_value: Math.round(curAvg * 10) / 10, + sample_session_ids: cur.ids, + affected_count: cur.count, + }) + } + return out +} + +/** Orquestra todos os detectores para a semana indicada. */ +export function detectPatterns( + dbWrapper: SessionsDb, + weekStart: Date, + weekEnd: Date, +): Pattern[] { + const db = dbWrapper.rawDb() + const ctx: DetectCtx = { + db, + weekStartIso: iso(weekStart), + weekEndIso: iso(weekEnd), + } + const prevStart = new Date(weekStart); prevStart.setUTCDate(prevStart.getUTCDate() - 7) + const prevEnd = new Date(weekEnd); prevEnd.setUTCDate(prevEnd.getUTCDate() - 7) + return [ + ...detectSkillsHighErrorRate(ctx), + ...detectToolsLowEfficiency(ctx), + ...detectSkillToolPairs(ctx), + ...detectDurationOutliers(ctx), + ...detectAbandonedSessions(ctx), + ...detectGrowingComplexity(ctx, iso(prevStart), iso(prevEnd)), + ] +} + +/** Converte Pattern + contexto em PatternRecord pronto a persistir. */ +export function toPatternRecord(p: Pattern, weekIso: string, consecutiveWeeks: number): PatternRecord { + return { + detected_at: new Date().toISOString(), + week_iso: weekIso, + pattern_key: p.pattern_key, + title: p.title, + description: p.description, + severity: p.severity, + metric_value: p.metric_value, + sample_session_ids: p.sample_session_ids, + affected_count: p.affected_count, + consecutive_weeks: consecutiveWeeks, + } +} diff --git a/api/tests/sessions-patterns.test.ts b/api/tests/sessions-patterns.test.ts new file mode 100644 index 0000000..c9066b3 --- /dev/null +++ b/api/tests/sessions-patterns.test.ts @@ -0,0 +1,123 @@ +import { describe, it, expect, beforeEach } from 'vitest' +import { mkdtempSync } from 'fs' +import { tmpdir } from 'os' +import { join } from 'path' +import { openSessionsDb, type SessionsDb, type PatternRecord } from '../services/sessions/db.js' +import { detectPatterns, weekRange, toPatternRecord } from '../services/sessions/patterns.js' +import type { SessionMeta } from '../types/session.js' + +function meta(overrides: Partial): SessionMeta { + return { + session_id: 's-' + Math.random().toString(36).slice(2, 10), + project_path: '/tmp/project', + project_slug: 'project', + jsonl_path: '/tmp/' + Math.random().toString(36).slice(2) + '.jsonl', + started_at: '2026-04-20T10:00:00Z', // segunda de 2026-W17 + ended_at: '2026-04-20T10:30:00Z', + duration_sec: 1800, + event_count: 50, + user_messages: 5, + assistant_msgs: 10, + tool_calls: 20, + first_prompt: 'olá', + tools_used: ['Bash'], + skills_invoked: [], + outcome: 'completed', + permission_mode: 'default', + file_size: 10000, + indexed_at: '2026-04-20T10:31:00Z', + ...overrides, + } +} + +describe('patterns detector', () => { + let db: SessionsDb + beforeEach(() => { + const dir = mkdtempSync(join(tmpdir(), 'obs-pat-')) + db = openSessionsDb(join(dir, 'sessions.db')) + }) + + it('detecta skill com taxa elevada de erro (action)', () => { + // 3 sessões skill X: 2 error, 1 completed → ratio 0.67 → severity=action + db.upsertSession(meta({ session_id: 'a', skills_invoked: ['skillX'], outcome: 'error' })) + db.upsertSession(meta({ session_id: 'b', skills_invoked: ['skillX'], outcome: 'interrupted' })) + db.upsertSession(meta({ session_id: 'c', skills_invoked: ['skillX'], outcome: 'completed' })) + const { start, end } = weekRange(new Date('2026-04-22T00:00:00Z')) + const patterns = detectPatterns(db, start, end) + const errorRate = patterns.find((p) => p.pattern_key === 'skill_error_rate:skillX') + expect(errorRate).toBeDefined() + expect(errorRate!.severity).toBe('action') + expect(errorRate!.affected_count).toBe(2) + }) + + it('detecta sessões abandonadas', () => { + for (let i = 0; i < 6; i++) { + db.upsertSession(meta({ session_id: `ab-${i}`, event_count: 1, outcome: 'unknown' })) + } + const { start, end } = weekRange(new Date('2026-04-22T00:00:00Z')) + const patterns = detectPatterns(db, start, end) + expect(patterns.some((p) => p.pattern_key === 'abandoned_sessions')).toBe(true) + }) + + it('getConsecutiveWeeks devolve 3 após upserts em semanas sucessivas', () => { + const key = 'skill_error_rate:Y' + const weeks = ['2026-W15', '2026-W16', '2026-W17'] + for (const w of weeks) { + db.upsertPattern({ + detected_at: new Date().toISOString(), + week_iso: w, + pattern_key: key, + title: 't', + description: 'd', + severity: 'warning', + metric_value: 0.5, + sample_session_ids: ['x'], + affected_count: 1, + consecutive_weeks: 1, + }) + } + expect(db.getConsecutiveWeeks(key, '2026-W17')).toBe(3) + expect(db.getConsecutiveWeeks(key, '2026-W16')).toBe(2) + }) + + it('upsertPattern é idempotente por (week_iso, pattern_key)', () => { + const base: PatternRecord = { + detected_at: '2026-04-20T00:00:00Z', + week_iso: '2026-W17', + pattern_key: 'test', + title: 'v1', + description: 'd', + severity: 'info', + metric_value: 1, + sample_session_ids: ['a'], + affected_count: 1, + consecutive_weeks: 1, + } + db.upsertPattern(base) + db.upsertPattern({ ...base, title: 'v2', affected_count: 5, consecutive_weeks: 2 }) + const rows = db.getPatternsByWeek('2026-W17') + expect(rows).toHaveLength(1) + expect(rows[0].title).toBe('v2') + expect(rows[0].affected_count).toBe(5) + expect(rows[0].consecutive_weeks).toBe(2) + }) + + it('toPatternRecord propaga week_iso e consecutive_weeks', () => { + const rec = toPatternRecord( + { + pattern_key: 'k', + title: 't', + description: 'd', + severity: 'warning', + metric_value: 0.42, + sample_session_ids: ['a', 'b'], + affected_count: 2, + }, + '2026-W17', + 3, + ) + expect(rec.week_iso).toBe('2026-W17') + expect(rec.consecutive_weeks).toBe(3) + expect(rec.severity).toBe('warning') + }) +})