feat(observabilidade): tabela worklog_comments + parser HTML + importer MCP

- Schema worklog_comments (id, discussion, parent, datas, staff, campos parseados em JSON) - Parser HTML tolerante (h2/h3/h4) extrai title, task_ref, duration, work_items, files_modified, problems, patterns_text, actions - Módulo worklog-import com paginação MCP get_discussion_comments - Helper mcp-client.ts partilhado (gateway MCP JSON-RPC + SSE) - Dep runtime: node-html-parser Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 03:07:09 +01:00
parent 86770b1570
commit 11f9833aac
5 changed files with 685 additions and 0 deletions
@@ -27,6 +27,31 @@ export interface PatternRecord {
  consecutive_weeks: number
 }

+export interface WorklogCommentRecord {
+  id: number
+  discussion_id: number
+  created_at: string
+  staff_id: number | null
+  title: string | null
+  task_ref: string | null
+  duration_sec: number | null
+  work_items: string[]
+  files_modified: string[]
+  problems: { problema: string; solucao: string }[]
+  patterns_text: string[]
+  actions: { tipo: string; descricao: string; prioridade: string | null }[]
+  raw_html: string
+  imported_at: string
+}
+
+export interface WorklogFilters {
+  discussion_id?: number
+  task_ref?: string
+  sinceIso?: string
+  limit?: number
+  offset?: number
+}
+
 export interface SessionsDb {
  upsertSession(meta: SessionMeta): void
  upsertMany(metas: SessionMeta[]): void
@@ -37,6 +62,10 @@ export interface SessionsDb {
  upsertPattern(p: PatternRecord): void
  getPatternsByWeek(week: string): PatternRecord[]
  getConsecutiveWeeks(pattern_key: string, uptoWeek: string): number
+  upsertWorklogComment(c: WorklogCommentRecord): { inserted: boolean }
+  hasWorklogComment(id: number): boolean
+  listWorklogComments(filters: WorklogFilters): WorklogCommentRecord[]
+  countWorklogComments(filters?: WorklogFilters): number
  rawDb(): Database.Database
  close(): void
 }
@@ -81,6 +110,25 @@ CREATE TABLE IF NOT EXISTS patterns (
 );
 CREATE INDEX IF NOT EXISTS idx_patterns_week ON patterns(week_iso);
 CREATE INDEX IF NOT EXISTS idx_patterns_key ON patterns(pattern_key);
+
+CREATE TABLE IF NOT EXISTS worklog_comments (
+  id              INTEGER PRIMARY KEY,
+  discussion_id   INTEGER NOT NULL,
+  created_at      TEXT NOT NULL,
+  staff_id        INTEGER,
+  title           TEXT,
+  task_ref        TEXT,
+  duration_sec    INTEGER,
+  work_items      TEXT NOT NULL,
+  files_modified  TEXT NOT NULL,
+  problems_json   TEXT NOT NULL,
+  patterns_text   TEXT NOT NULL,
+  actions_json    TEXT NOT NULL,
+  raw_html        TEXT NOT NULL,
+  imported_at     TEXT NOT NULL
+);
+CREATE INDEX IF NOT EXISTS idx_wc_discussion ON worklog_comments(discussion_id, created_at DESC);
+CREATE INDEX IF NOT EXISTS idx_wc_task ON worklog_comments(task_ref);
 `

 function rowToMeta(row: Record<string, unknown>): SessionMeta {
@@ -272,6 +320,89 @@ export function openSessionsDb(dbPath: string): SessionsDb {
      }
      return count
    },
+    upsertWorklogComment(c: WorklogCommentRecord): { inserted: boolean } {
+      const existing = db.prepare('SELECT 1 FROM worklog_comments WHERE id = ?').get(c.id)
+      const inserted = !existing
+      db.prepare(`
+        INSERT INTO worklog_comments (id, discussion_id, created_at, staff_id, title, task_ref,
+          duration_sec, work_items, files_modified, problems_json, patterns_text, actions_json,
+          raw_html, imported_at)
+        VALUES (@id, @discussion_id, @created_at, @staff_id, @title, @task_ref,
+          @duration_sec, @work_items, @files_modified, @problems_json, @patterns_text, @actions_json,
+          @raw_html, @imported_at)
+        ON CONFLICT(id) DO UPDATE SET
+          discussion_id = excluded.discussion_id,
+          created_at = excluded.created_at,
+          staff_id = excluded.staff_id,
+          title = excluded.title,
+          task_ref = excluded.task_ref,
+          duration_sec = excluded.duration_sec,
+          work_items = excluded.work_items,
+          files_modified = excluded.files_modified,
+          problems_json = excluded.problems_json,
+          patterns_text = excluded.patterns_text,
+          actions_json = excluded.actions_json,
+          raw_html = excluded.raw_html,
+          imported_at = excluded.imported_at
+      `).run({
+        id: c.id,
+        discussion_id: c.discussion_id,
+        created_at: c.created_at,
+        staff_id: c.staff_id,
+        title: c.title,
+        task_ref: c.task_ref,
+        duration_sec: c.duration_sec,
+        work_items: JSON.stringify(c.work_items),
+        files_modified: JSON.stringify(c.files_modified),
+        problems_json: JSON.stringify(c.problems),
+        patterns_text: JSON.stringify(c.patterns_text),
+        actions_json: JSON.stringify(c.actions),
+        raw_html: c.raw_html,
+        imported_at: c.imported_at,
+      })
+      return { inserted }
+    },
+    hasWorklogComment(id: number): boolean {
+      return !!db.prepare('SELECT 1 FROM worklog_comments WHERE id = ?').get(id)
+    },
+    listWorklogComments(filters: WorklogFilters): WorklogCommentRecord[] {
+      const parts: string[] = []
+      const params: Record<string, unknown> = {}
+      if (filters.discussion_id) { parts.push('discussion_id = @discussion_id'); params.discussion_id = filters.discussion_id }
+      if (filters.task_ref) { parts.push('task_ref = @task_ref'); params.task_ref = filters.task_ref }
+      if (filters.sinceIso) { parts.push('created_at >= @since'); params.since = filters.sinceIso }
+      const where = parts.length ? 'WHERE ' + parts.join(' AND ') : ''
+      const limit = filters.limit ?? 1000
+      const offset = filters.offset ?? 0
+      const rows = db.prepare(`SELECT * FROM worklog_comments ${where} ORDER BY created_at DESC LIMIT @limit OFFSET @offset`)
+        .all({ ...params, limit, offset }) as Record<string, unknown>[]
+      return rows.map((r) => ({
+        id: r.id as number,
+        discussion_id: r.discussion_id as number,
+        created_at: r.created_at as string,
+        staff_id: (r.staff_id as number | null) ?? null,
+        title: (r.title as string | null) ?? null,
+        task_ref: (r.task_ref as string | null) ?? null,
+        duration_sec: (r.duration_sec as number | null) ?? null,
+        work_items: JSON.parse(r.work_items as string),
+        files_modified: JSON.parse(r.files_modified as string),
+        problems: JSON.parse(r.problems_json as string),
+        patterns_text: JSON.parse(r.patterns_text as string),
+        actions: JSON.parse(r.actions_json as string),
+        raw_html: r.raw_html as string,
+        imported_at: r.imported_at as string,
+      }))
+    },
+    countWorklogComments(filters?: WorklogFilters): number {
+      const parts: string[] = []
+      const params: Record<string, unknown> = {}
+      if (filters?.discussion_id) { parts.push('discussion_id = @discussion_id'); params.discussion_id = filters.discussion_id }
+      if (filters?.task_ref) { parts.push('task_ref = @task_ref'); params.task_ref = filters.task_ref }
+      if (filters?.sinceIso) { parts.push('created_at >= @since'); params.since = filters.sinceIso }
+      const where = parts.length ? 'WHERE ' + parts.join(' AND ') : ''
+      const row = db.prepare(`SELECT COUNT(*) as c FROM worklog_comments ${where}`).get(params) as { c: number }
+      return row.c
+    },
    rawDb(): Database.Database {
      return db
    },
@@ -0,0 +1,69 @@
+/**
+ * Cliente HTTP mínimo para o gateway MCP (JSON-RPC 2.0 sobre HTTP).
+ *
+ * Suporta resposta em JSON puro ou SSE (text/event-stream). Partilhado entre
+ * os scripts de Observabilidade (patterns + worklog import).
+ */
+
+export interface MCPToolCallResult {
+  content?: Array<{ type: string; text: string }>
+  isError?: boolean
+}
+
+export async function callMcpTool(
+  tool: string,
+  args: Record<string, unknown>,
+): Promise<MCPToolCallResult> {
+  const url = process.env.MCP_GATEWAY_URL ?? 'https://gateway.descomplicar.pt/v1/desk-crm/mcp'
+  const token = process.env.MCP_GATEWAY_TOKEN
+  if (!token) throw new Error('MCP_GATEWAY_TOKEN não definido')
+  const body = {
+    jsonrpc: '2.0',
+    id: Date.now(),
+    method: 'tools/call',
+    params: { name: tool, arguments: args },
+  }
+  const res = await fetch(url, {
+    method: 'POST',
+    headers: {
+      Authorization: `Bearer ${token}`,
+      'Content-Type': 'application/json',
+      Accept: 'application/json, text/event-stream',
+    },
+    body: JSON.stringify(body),
+  })
+  if (!res.ok) {
+    const txt = await res.text().catch(() => '')
+    throw new Error(`MCP gateway ${res.status}: ${txt.slice(0, 300)}`)
+  }
+  const raw = await res.text()
+  let payload: string | null = null
+  for (const line of raw.split(/\r?\n/)) {
+    const trimmed = line.trim()
+    if (!trimmed) continue
+    if (trimmed.startsWith('data: ')) {
+      payload = trimmed.slice(6)
+      break
+    }
+    if (trimmed.startsWith('{')) {
+      payload = trimmed
+      break
+    }
+  }
+  if (!payload) throw new Error(`MCP resposta sem payload JSON: ${raw.slice(0, 200)}`)
+  const parsed = JSON.parse(payload) as { error?: unknown; result?: MCPToolCallResult }
+  if (parsed.error) throw new Error(`MCP error: ${JSON.stringify(parsed.error)}`)
+  const result = parsed.result as MCPToolCallResult | undefined
+  if (result?.isError) {
+    const txt = result.content?.map((c) => c.text).join('\n') ?? ''
+    throw new Error(`MCP tool ${tool} devolveu isError: ${txt.slice(0, 300)}`)
+  }
+  return result ?? {}
+}
+
+/** Extrai o primeiro bloco de texto JSON-encoded do resultado MCP. */
+export function extractMcpJsonPayload<T = unknown>(r: MCPToolCallResult): T {
+  const text = r.content?.find((c) => c.type === 'text')?.text
+  if (!text) throw new Error('MCP result sem content text')
+  return JSON.parse(text) as T
+}
@@ -0,0 +1,351 @@
+/**
+ * Importer dos comentários das discussões Desk #31 (Logs), #32 (Reflexões)
+ * e #33 (Acções de Melhoria) para a tabela `worklog_comments`.
+ *
+ * Parser HTML tolerante — aceita ambos formatos produzidos pelo skill
+ * `gestao:worklog` (versão antiga usava `<h2>/<h3>` inline-styled, versão
+ * nova usa `<h4>` limpos). Secções identificadas por título normalizado
+ * (ex.: "trabalho realizado", "ficheiros modificados", "problemas",
+ * "padrões detectados", "acções sugeridas").
+ */
+import { parse, type HTMLElement } from 'node-html-parser'
+import type { SessionsDb, WorklogCommentRecord } from './db.js'
+import { callMcpTool, extractMcpJsonPayload } from './mcp-client.js'
+
+export interface ParsedWorklogComment {
+  id: number
+  discussion_id: number
+  created_at: string
+  staff_id: number | null
+  title: string | null
+  task_ref: string | null
+  duration_sec: number | null
+  work_items: string[]
+  files_modified: string[]
+  problems: { problema: string; solucao: string }[]
+  patterns_text: string[]
+  actions: { tipo: string; descricao: string; prioridade: string | null }[]
+  raw_html: string
+}
+
+interface RawComment {
+  id: number
+  discussion_id: number
+  content: string
+  created: unknown
+  staff_id: number | null
+  children?: RawComment[]
+}
+
+/** Remove whitespace redundante. */
+function norm(s: string): string {
+  return s.replace(/\s+/g, ' ').trim()
+}
+
+/** Converte string livre para chave de secção (lowercase, sem acentos, sem pontuação). */
+function sectionKey(s: string): string {
+  return s
+    .toLowerCase()
+    .normalize('NFD')
+    .replace(/[̀-ͯ]/g, '')
+    .replace(/[^a-z0-9 ]/g, ' ')
+    .replace(/\s+/g, ' ')
+    .trim()
+}
+
+const SECTION_WORK = new Set(['trabalho realizado', 'o que foi feito', 'feito', 'realizado', 'trabalho'])
+const SECTION_FILES = new Set(['ficheiros modificados', 'ficheiros alterados', 'files modified', 'ficheiros'])
+const SECTION_PROBLEMS = new Set(['problemas solucoes', 'problemas', 'solucoes', 'problemas e solucoes', 'problemas solucao'])
+const SECTION_PATTERNS = new Set(['padroes detectados', 'padroes', 'patterns', 'insights'])
+const SECTION_ACTIONS = new Set(['accoes sugeridas', 'accoes', 'acoes sugeridas', 'acoes', 'actions', 'accoes de melhoria'])
+
+/** Extrai data ISO do título (YYYY-MM-DD [HH:MM]) ou devolve null. */
+function parseDateFromTitle(title: string): string | null {
+  const m = title.match(/(\d{4})-(\d{2})-(\d{2})(?:[ T](\d{2}):(\d{2}))?/)
+  if (!m) return null
+  const [, y, mo, d, hh, mm] = m
+  if (hh && mm) return `${y}-${mo}-${d}T${hh}:${mm}:00Z`
+  return `${y}-${mo}-${d}T00:00:00Z`
+}
+
+/** Tenta extrair "Tarefa: #ID" ou similar. */
+function parseTaskRef(text: string): string | null {
+  const m = text.match(/(?:Tarefa|Task|Ticket)[:\s]*(#?\d+)/i)
+  if (m) return m[1].startsWith('#') ? m[1] : `#${m[1]}`
+  const bare = text.match(/#(\d{3,6})/)
+  return bare ? `#${bare[1]}` : null
+}
+
+/** "~2h 30m" / "~45 min" / "5 minutos" → segundos. */
+function parseDuration(text: string): number | null {
+  const m = text.match(/~?\s*(\d+)\s*h\s*(\d+)?\s*m?/i)
+  if (m) {
+    const h = parseInt(m[1], 10)
+    const mm = m[2] ? parseInt(m[2], 10) : 0
+    return h * 3600 + mm * 60
+  }
+  const mm = text.match(/~?\s*(\d+)\s*(?:min|minutos|m)/i)
+  if (mm) return parseInt(mm[1], 10) * 60
+  return null
+}
+
+/** Extrai texto de um elemento, incluindo inner HTML como plain text. */
+function textOf(el: HTMLElement): string {
+  return norm(el.text ?? '')
+}
+
+/** Colecta items de uma UL ou lista no mesmo nível que vem depois de um cabeçalho. */
+function collectFollowingListItems(heading: HTMLElement): string[] {
+  const items: string[] = []
+  let cur: HTMLElement | null = heading.nextElementSibling
+  while (cur) {
+    const tag = cur.rawTagName?.toLowerCase()
+    if (tag && /^h[1-6]$/.test(tag)) break
+    if (tag === 'ul' || tag === 'ol') {
+      for (const li of cur.querySelectorAll('li')) {
+        const t = textOf(li)
+        if (t) items.push(t)
+      }
+    } else if (tag === 'p') {
+      // Alguns comentários partem o UL em múltiplos <p>; vasculha <li> dentro
+      for (const li of cur.querySelectorAll('li')) {
+        const t = textOf(li)
+        if (t) items.push(t)
+      }
+    }
+    cur = cur.nextElementSibling
+  }
+  return items
+}
+
+/** Parse item "[Tipo] descrição" ou "Tipo: descrição (Px)". */
+function parseActionItem(raw: string): { tipo: string; descricao: string; prioridade: string | null } {
+  // Remove checkbox inicial "[ ]" ou "[x]" se existir
+  let s = raw.trim().replace(/^\[[\s xX✓]\]\s*/, '')
+  const bracket = s.match(/^\[([^\]]+)\]\s*(.+)$/)
+  let tipo = 'Geral'
+  let rest = s
+  if (bracket) {
+    tipo = bracket[1].trim()
+    rest = bracket[2].trim()
+  }
+  const prio = rest.match(/\b(P[0-4])\b/i)
+  return {
+    tipo,
+    descricao: rest,
+    prioridade: prio ? prio[1].toUpperCase() : null,
+  }
+}
+
+/** Parse problema/solução. Heurística: "Problema: X | Solução: Y" ou pares de <li>. */
+function parseProblemItem(raw: string): { problema: string; solucao: string } {
+  const s = raw.trim()
+  const split = s.split(/\s*(?:->|→|\|\s*Solu[çc][ãa]o:|\s*Solu[çc][ãa]o:)\s*/i)
+  if (split.length >= 2) {
+    return {
+      problema: split[0].replace(/^Problema:\s*/i, '').trim(),
+      solucao: split.slice(1).join(' ').trim(),
+    }
+  }
+  return { problema: s, solucao: '' }
+}
+
+/** Extrai lista "bruta" de todas as <li> dentro do HTML (fallback). */
+function extractAllLiItems(root: HTMLElement): string[] {
+  return root
+    .querySelectorAll('li')
+    .map((li) => textOf(li))
+    .filter(Boolean)
+}
+
+export function parseWorklogHtml(
+  html: string,
+  meta: { id: number; discussion_id: number; created_at: string; staff_id?: number | null },
+): ParsedWorklogComment {
+  const root = parse(html || '')
+  const headings = root.querySelectorAll('h1, h2, h3, h4, h5, h6')
+
+  // Título: primeiro heading não vazio
+  let title: string | null = null
+  for (const h of headings) {
+    const t = textOf(h)
+    if (t) { title = t; break }
+  }
+
+  // Data: preferir `meta.created_at` se válido; senão extrair do título ou do texto
+  let createdAt = meta.created_at
+  if (!createdAt || createdAt === '1970-01-01T00:00:00.000Z' || createdAt.startsWith('1970')) {
+    const fromTitle = title ? parseDateFromTitle(title) : null
+    if (fromTitle) createdAt = fromTitle
+    else {
+      const fromText = parseDateFromTitle(textOf(root).slice(0, 500))
+      createdAt = fromText ?? new Date().toISOString()
+    }
+  }
+
+  const fullText = textOf(root)
+  const taskRef = parseTaskRef(fullText)
+  const durationSec = parseDuration(fullText)
+
+  // Indexa secções por chave normalizada
+  const sections = new Map<string, HTMLElement>()
+  for (const h of headings) {
+    const key = sectionKey(textOf(h))
+    if (!sections.has(key)) sections.set(key, h)
+  }
+
+  function findSection(target: Set<string>): HTMLElement | null {
+    for (const [k, el] of sections) {
+      if (target.has(k)) return el
+    }
+    // match parcial (ex.: "trabalho realizado manutenção" — começa com)
+    for (const [k, el] of sections) {
+      for (const t of target) {
+        if (k.startsWith(t) || t.startsWith(k)) return el
+      }
+    }
+    return null
+  }
+
+  const workHeading = findSection(SECTION_WORK)
+  const filesHeading = findSection(SECTION_FILES)
+  const problemsHeading = findSection(SECTION_PROBLEMS)
+  const patternsHeading = findSection(SECTION_PATTERNS)
+  const actionsHeading = findSection(SECTION_ACTIONS)
+
+  const workItems = workHeading ? collectFollowingListItems(workHeading) : []
+  const filesModified = filesHeading ? collectFollowingListItems(filesHeading) : []
+  const problemsRaw = problemsHeading ? collectFollowingListItems(problemsHeading) : []
+  const patternsText = patternsHeading ? collectFollowingListItems(patternsHeading) : []
+  const actionsRaw = actionsHeading ? collectFollowingListItems(actionsHeading) : []
+
+  // Fallback: se nenhuma secção encontrada mas existem <li>, e a discussão é #33,
+  // tratar tudo como acções (formato diferente das outras discussões)
+  let actions = actionsRaw.map(parseActionItem)
+  if (meta.discussion_id === 33 && actions.length === 0) {
+    actions = extractAllLiItems(root).map(parseActionItem)
+  }
+
+  const problems = problemsRaw.map(parseProblemItem)
+
+  return {
+    id: meta.id,
+    discussion_id: meta.discussion_id,
+    created_at: createdAt,
+    staff_id: meta.staff_id ?? null,
+    title,
+    task_ref: taskRef,
+    duration_sec: durationSec,
+    work_items: workItems,
+    files_modified: filesModified,
+    problems,
+    patterns_text: patternsText,
+    actions,
+    raw_html: html,
+  }
+}
+
+/** Converte o campo `created` devolvido pelo Desk MCP (pode ser objecto vazio). */
+function normalizeMcpDate(v: unknown): string {
+  if (!v) return ''
+  if (typeof v === 'string') return v
+  if (typeof v === 'object') {
+    const obj = v as Record<string, unknown>
+    if (typeof obj.date === 'string') return obj.date
+    if (typeof obj.datetime === 'string') return obj.datetime
+  }
+  return ''
+}
+
+/** Achata a árvore de comentários (comentários com children recursivos). */
+function flattenComments(comments: RawComment[]): RawComment[] {
+  const out: RawComment[] = []
+  for (const c of comments) {
+    out.push(c)
+    if (c.children && c.children.length) {
+      out.push(...flattenComments(c.children))
+    }
+  }
+  return out
+}
+
+export interface ImportResult {
+  discussion_id: number
+  fetched: number
+  imported: number
+  updated: number
+  skipped: number
+  errors: number
+}
+
+/**
+ * Importa todos os comentários de uma discussão Desk. Paginação por `limit`/`offset`.
+ * Idempotente por `id` — comentários já existentes sofrem update (raw_html pode mudar).
+ */
+export async function importWorklogDiscussion(
+  db: SessionsDb,
+  discussionId: number,
+  opts: { sinceIso?: string; pageSize?: number; maxPages?: number } = {},
+): Promise<ImportResult> {
+  // O MCP desk-crm parece clampar resultados em 200/página independentemente do limit.
+  // Pedimos 200 e iteramos offset até a resposta vir vazia.
+  const pageSize = opts.pageSize ?? 200
+  const maxPages = opts.maxPages ?? 20
+  const result: ImportResult = {
+    discussion_id: discussionId,
+    fetched: 0,
+    imported: 0,
+    updated: 0,
+    skipped: 0,
+    errors: 0,
+  }
+
+  let offset = 0
+  for (let page = 0; page < maxPages; page++) {
+    const raw = await callMcpTool('get_discussion_comments', {
+      discussion_id: discussionId,
+      limit: pageSize,
+      offset,
+    })
+    const payload = extractMcpJsonPayload<{
+      success?: boolean
+      comments?: RawComment[]
+    }>(raw)
+    const pageComments = flattenComments(payload.comments ?? [])
+    if (pageComments.length === 0) break
+    result.fetched += pageComments.length
+
+    const importedAt = new Date().toISOString()
+    for (const c of pageComments) {
+      try {
+        const createdStr = normalizeMcpDate(c.created)
+        const parsed = parseWorklogHtml(c.content ?? '', {
+          id: c.id,
+          discussion_id: c.discussion_id ?? discussionId,
+          created_at: createdStr || '',
+          staff_id: c.staff_id,
+        })
+        if (opts.sinceIso && parsed.created_at < opts.sinceIso) {
+          result.skipped++
+          continue
+        }
+        const record: WorklogCommentRecord = {
+          ...parsed,
+          imported_at: importedAt,
+        }
+        const { inserted } = db.upsertWorklogComment(record)
+        if (inserted) result.imported++
+        else result.updated++
+      } catch (e) {
+        console.error(`[worklog-import] erro a parsear comentário #${c.id}:`, (e as Error).message)
+        result.errors++
+      }
+    }
+
+    // Avança offset; quando próxima página vier vazia, o while quebra na próxima iter.
+    offset += pageComments.length
+    // Safety: se MCP devolveu 0, para
+    if (pageComments.length === 0) break
+  }
+  return result
+}