DashDescomplicar/api/services/sessions/worklog-import.ts

/**
 * Importer dos comentários das discussões Desk #31 (Logs), #32 (Reflexões)
 * e #33 (Acções de Melhoria) para a tabela `worklog_comments`.
 *
 * Parser HTML tolerante — aceita ambos formatos produzidos pelo skill
 * `gestao:worklog` (versão antiga usava `<h2>/<h3>` inline-styled, versão
 * nova usa `<h4>` limpos). Secções identificadas por título normalizado
 * (ex.: "trabalho realizado", "ficheiros modificados", "problemas",
 * "padrões detectados", "acções sugeridas").
 */
import { parse, type HTMLElement } from 'node-html-parser'
import type { SessionsDb, WorklogCommentRecord } from './db.js'
import { callMcpTool, extractMcpJsonPayload } from './mcp-client.js'

export interface ParsedWorklogComment {
  id: number
  discussion_id: number
  created_at: string
  staff_id: number | null
  title: string | null
  task_ref: string | null
  duration_sec: number | null
  work_items: string[]
  files_modified: string[]
  problems: { problema: string; solucao: string }[]
  patterns_text: string[]
  actions: { tipo: string; descricao: string; prioridade: string | null }[]
  raw_html: string
}

interface RawComment {
  id: number
  discussion_id: number
  content: string
  created: unknown
  staff_id: number | null
  children?: RawComment[]
}

/** Remove whitespace redundante. */
function norm(s: string): string {
  return s.replace(/\s+/g, ' ').trim()
}

/** Converte string livre para chave de secção (lowercase, sem acentos, sem pontuação). */
function sectionKey(s: string): string {
  return s
    .toLowerCase()
    .normalize('NFD')
    .replace(/[̀-ͯ]/g, '')
    .replace(/[^a-z0-9 ]/g, ' ')
    .replace(/\s+/g, ' ')
    .trim()
}

const SECTION_WORK = new Set(['trabalho realizado', 'o que foi feito', 'feito', 'realizado', 'trabalho'])
const SECTION_FILES = new Set(['ficheiros modificados', 'ficheiros alterados', 'files modified', 'ficheiros'])
const SECTION_PROBLEMS = new Set(['problemas solucoes', 'problemas', 'solucoes', 'problemas e solucoes', 'problemas solucao'])
const SECTION_PATTERNS = new Set(['padroes detectados', 'padroes', 'patterns', 'insights'])
const SECTION_ACTIONS = new Set(['accoes sugeridas', 'accoes', 'acoes sugeridas', 'acoes', 'actions', 'accoes de melhoria'])

/** Extrai data ISO do título (YYYY-MM-DD [HH:MM]) ou devolve null. */
function parseDateFromTitle(title: string): string | null {
  const m = title.match(/(\d{4})-(\d{2})-(\d{2})(?:[ T](\d{2}):(\d{2}))?/)
  if (!m) return null
  const [, y, mo, d, hh, mm] = m
  if (hh && mm) return `${y}-${mo}-${d}T${hh}:${mm}:00Z`
  return `${y}-${mo}-${d}T00:00:00Z`
}

/** Tenta extrair "Tarefa: #ID" ou similar. */
function parseTaskRef(text: string): string | null {
  const m = text.match(/(?:Tarefa|Task|Ticket)[:\s]*(#?\d+)/i)
  if (m) return m[1].startsWith('#') ? m[1] : `#${m[1]}`
  const bare = text.match(/#(\d{3,6})/)
  return bare ? `#${bare[1]}` : null
}

/** "~2h 30m" / "~45 min" / "5 minutos" → segundos. */
function parseDuration(text: string): number | null {
  const m = text.match(/~?\s*(\d+)\s*h\s*(\d+)?\s*m?/i)
  if (m) {
    const h = parseInt(m[1], 10)
    const mm = m[2] ? parseInt(m[2], 10) : 0
    return h * 3600 + mm * 60
  }
  const mm = text.match(/~?\s*(\d+)\s*(?:min|minutos|m)/i)
  if (mm) return parseInt(mm[1], 10) * 60
  return null
}

/** Extrai texto de um elemento, incluindo inner HTML como plain text. */
function textOf(el: HTMLElement): string {
  return norm(el.text ?? '')
}

/** Colecta items de uma UL ou lista no mesmo nível que vem depois de um cabeçalho. */
function collectFollowingListItems(heading: HTMLElement): string[] {
  const items: string[] = []
  let cur: HTMLElement | null = heading.nextElementSibling
  while (cur) {
    const tag = cur.rawTagName?.toLowerCase()
    if (tag && /^h[1-6]$/.test(tag)) break
    if (tag === 'ul' || tag === 'ol') {
      for (const li of cur.querySelectorAll('li')) {
        const t = textOf(li)
        if (t) items.push(t)
      }
    } else if (tag === 'p') {
      // Alguns comentários partem o UL em múltiplos <p>; vasculha <li> dentro
      for (const li of cur.querySelectorAll('li')) {
        const t = textOf(li)
        if (t) items.push(t)
      }
    }
    cur = cur.nextElementSibling
  }
  return items
}

/** Parse item "[Tipo] descrição" ou "Tipo: descrição (Px)". */
function parseActionItem(raw: string): { tipo: string; descricao: string; prioridade: string | null } {
  // Remove checkbox inicial "[ ]" ou "[x]" se existir
  let s = raw.trim().replace(/^\[[\s xX✓]\]\s*/, '')
  const bracket = s.match(/^\[([^\]]+)\]\s*(.+)$/)
  let tipo = 'Geral'
  let rest = s
  if (bracket) {
    tipo = bracket[1].trim()
    rest = bracket[2].trim()
  }
  const prio = rest.match(/\b(P[0-4])\b/i)
  return {
    tipo,
    descricao: rest,
    prioridade: prio ? prio[1].toUpperCase() : null,
  }
}

/** Parse problema/solução. Heurística: "Problema: X | Solução: Y" ou pares de <li>. */
function parseProblemItem(raw: string): { problema: string; solucao: string } {
  const s = raw.trim()
  const split = s.split(/\s*(?:->|→|\|\s*Solu[çc][ãa]o:|\s*Solu[çc][ãa]o:)\s*/i)
  if (split.length >= 2) {
    return {
      problema: split[0].replace(/^Problema:\s*/i, '').trim(),
      solucao: split.slice(1).join(' ').trim(),
    }
  }
  return { problema: s, solucao: '' }
}

/** Extrai lista "bruta" de todas as <li> dentro do HTML (fallback). */
function extractAllLiItems(root: HTMLElement): string[] {
  return root
    .querySelectorAll('li')
    .map((li) => textOf(li))
    .filter(Boolean)
}

export function parseWorklogHtml(
  html: string,
  meta: { id: number; discussion_id: number; created_at: string; staff_id?: number | null },
): ParsedWorklogComment {
  const root = parse(html || '')
  const headings = root.querySelectorAll('h1, h2, h3, h4, h5, h6')

  // Título: primeiro heading não vazio
  let title: string | null = null
  for (const h of headings) {
    const t = textOf(h)
    if (t) { title = t; break }
  }

  // Data: preferir `meta.created_at` se válido; senão extrair do título ou do texto
  let createdAt = meta.created_at
  if (!createdAt || createdAt === '1970-01-01T00:00:00.000Z' || createdAt.startsWith('1970')) {
    const fromTitle = title ? parseDateFromTitle(title) : null
    if (fromTitle) createdAt = fromTitle
    else {
      const fromText = parseDateFromTitle(textOf(root).slice(0, 500))
      createdAt = fromText ?? new Date().toISOString()
    }
  }

  const fullText = textOf(root)
  const taskRef = parseTaskRef(fullText)
  const durationSec = parseDuration(fullText)

  // Indexa secções por chave normalizada
  const sections = new Map<string, HTMLElement>()
  for (const h of headings) {
    const key = sectionKey(textOf(h))
    if (!sections.has(key)) sections.set(key, h)
  }

  function findSection(target: Set<string>): HTMLElement | null {
    for (const [k, el] of sections) {
      if (target.has(k)) return el
    }
    // match parcial (ex.: "trabalho realizado manutenção" — começa com)
    for (const [k, el] of sections) {
      for (const t of target) {
        if (k.startsWith(t) || t.startsWith(k)) return el
      }
    }
    return null
  }

  const workHeading = findSection(SECTION_WORK)
  const filesHeading = findSection(SECTION_FILES)
  const problemsHeading = findSection(SECTION_PROBLEMS)
  const patternsHeading = findSection(SECTION_PATTERNS)
  const actionsHeading = findSection(SECTION_ACTIONS)

  const workItems = workHeading ? collectFollowingListItems(workHeading) : []
  const filesModified = filesHeading ? collectFollowingListItems(filesHeading) : []
  const problemsRaw = problemsHeading ? collectFollowingListItems(problemsHeading) : []
  const patternsText = patternsHeading ? collectFollowingListItems(patternsHeading) : []
  const actionsRaw = actionsHeading ? collectFollowingListItems(actionsHeading) : []

  // Fallback: se nenhuma secção encontrada mas existem <li>, e a discussão é #33,
  // tratar tudo como acções (formato diferente das outras discussões)
  let actions = actionsRaw.map(parseActionItem)
  if (meta.discussion_id === 33 && actions.length === 0) {
    actions = extractAllLiItems(root).map(parseActionItem)
  }

  const problems = problemsRaw.map(parseProblemItem)

  return {
    id: meta.id,
    discussion_id: meta.discussion_id,
    created_at: createdAt,
    staff_id: meta.staff_id ?? null,
    title,
    task_ref: taskRef,
    duration_sec: durationSec,
    work_items: workItems,
    files_modified: filesModified,
    problems,
    patterns_text: patternsText,
    actions,
    raw_html: html,
  }
}

/** Converte o campo `created` devolvido pelo Desk MCP (pode ser objecto vazio). */
function normalizeMcpDate(v: unknown): string {
  if (!v) return ''
  if (typeof v === 'string') return v
  if (typeof v === 'object') {
    const obj = v as Record<string, unknown>
    if (typeof obj.date === 'string') return obj.date
    if (typeof obj.datetime === 'string') return obj.datetime
  }
  return ''
}

/** Achata a árvore de comentários (comentários com children recursivos). */
function flattenComments(comments: RawComment[]): RawComment[] {
  const out: RawComment[] = []
  for (const c of comments) {
    out.push(c)
    if (c.children && c.children.length) {
      out.push(...flattenComments(c.children))
    }
  }
  return out
}

export interface ImportResult {
  discussion_id: number
  fetched: number
  imported: number
  updated: number
  skipped: number
  errors: number
}

/**
 * Importa todos os comentários de uma discussão Desk. Paginação por `limit`/`offset`.
 * Idempotente por `id` — comentários já existentes sofrem update (raw_html pode mudar).
 */
export async function importWorklogDiscussion(
  db: SessionsDb,
  discussionId: number,
  opts: { sinceIso?: string; pageSize?: number; maxPages?: number } = {},
): Promise<ImportResult> {
  // O MCP desk-crm parece clampar resultados em 200/página independentemente do limit.
  // Pedimos 200 e iteramos offset até a resposta vir vazia.
  const pageSize = opts.pageSize ?? 200
  const maxPages = opts.maxPages ?? 20
  const result: ImportResult = {
    discussion_id: discussionId,
    fetched: 0,
    imported: 0,
    updated: 0,
    skipped: 0,
    errors: 0,
  }

  let offset = 0
  for (let page = 0; page < maxPages; page++) {
    const raw = await callMcpTool('get_discussion_comments', {
      discussion_id: discussionId,
      limit: pageSize,
      offset,
    })
    const payload = extractMcpJsonPayload<{
      success?: boolean
      comments?: RawComment[]
    }>(raw)
    const pageComments = flattenComments(payload.comments ?? [])
    if (pageComments.length === 0) break
    result.fetched += pageComments.length

    const importedAt = new Date().toISOString()
    for (const c of pageComments) {
      try {
        const createdStr = normalizeMcpDate(c.created)
        const parsed = parseWorklogHtml(c.content ?? '', {
          id: c.id,
          discussion_id: c.discussion_id ?? discussionId,
          created_at: createdStr || '',
          staff_id: c.staff_id,
        })
        if (opts.sinceIso && parsed.created_at < opts.sinceIso) {
          result.skipped++
          continue
        }
        const record: WorklogCommentRecord = {
          ...parsed,
          imported_at: importedAt,
        }
        const { inserted } = db.upsertWorklogComment(record)
        if (inserted) result.imported++
        else result.updated++
      } catch (e) {
        console.error(`[worklog-import] erro a parsear comentário #${c.id}:`, (e as Error).message)
        result.errors++
      }
    }

    // Avança offset; quando próxima página vier vazia, o while quebra na próxima iter.
    offset += pageComments.length
    // Safety: se MCP devolveu 0, para
    if (pageComments.length === 0) break
  }
  return result
}