From 37522386994671d62675e8d23213c42be0117194 Mon Sep 17 00:00:00 2001
From: Emanuel Almeida <emanuel@descomplicar.pt>
Date: Sun, 28 Jun 2026 22:39:23 +0100
Subject: [PATCH] =?UTF-8?q?feat(okf-hub):=20Fase=20F=20=E2=80=94=20convert?=
 =?UTF-8?q?er=20fence-aware=20(path-style/acentos/pipe=20escapado)=20+=20v?=
 =?UTF-8?q?alidador=20fence-aware?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 okf-hub/okf-classify-dev.py      |  22 +++
 okf-hub/okf-convert-wikilinks.py | 300 ++++++++++++++++++++-----------
 okf-hub/okf-normalize.py         |   9 +
 okf-hub/okf-validate.sh          |   9 +-
 4 files changed, 233 insertions(+), 107 deletions(-)
 create mode 100644 okf-hub/okf-classify-dev.py
 mode change 100644 => 100755 okf-hub/okf-convert-wikilinks.py

diff --git a/okf-hub/okf-classify-dev.py b/okf-hub/okf-classify-dev.py
new file mode 100644
index 0000000..f4d9d4b
--- /dev/null
+++ b/okf-hub/okf-classify-dev.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+"""layer raw|wiki nos .md do Dev (código: docs=wiki, efémeros/testes/samples=raw)."""
+import sys,re
+from pathlib import Path
+ROOT=Path("/media/ealmeida/Dados/Dev"); DRY="--dry-run" in sys.argv
+EXCL_DIR=re.compile(r'/(node_modules|\.git|venv|\.venv|vendor|dist|build|site-packages|__pycache__|target|3rdparty)/')
+RAW=re.compile(r'(worklog|/logs?/|sess[aã]o|di[aá]rio|checkup|deep-research|/fontes?/|pesquisa|/tests?/|/__tests__/|/fixtures?/|/examples?/|/samples?/|/test-data/|/\.cache/|CHANGELOG-old|reuni[aã]o)', re.I)
+add=0
+for f in ROOT.rglob("*.md"):
+    s=str(f)
+    if EXCL_DIR.search(s) or any(p.startswith(".") for p in f.parts): continue
+    if f.name in ("index.md","log.md"): continue
+    try: t=f.read_text(encoding="utf-8")
+    except: continue
+    if not t.startswith("---"): continue
+    e=t.find("\n---",3)
+    if e<0: continue
+    if re.search(r'^layer:',t[3:e],re.M): continue
+    layer = "raw" if RAW.search(s) else "wiki"
+    if not DRY: f.write_text(t[:e]+f"\nlayer: {layer}"+t[e:],encoding="utf-8")
+    add+=1
+print(f"{'[DRY] ' if DRY else ''}layer adicionado: {add}")
diff --git a/okf-hub/okf-convert-wikilinks.py b/okf-hub/okf-convert-wikilinks.py
old mode 100644
new mode 100755
index 4fb0464..f281a2b
--- a/okf-hub/okf-convert-wikilinks.py
+++ b/okf-hub/okf-convert-wikilinks.py
@@ -1,153 +1,241 @@
 #!/usr/bin/env python3
 """
-okf-convert-wikilinks.py — Fase 3: Converte [[wikilinks]] → [texto](path.md) nos index.md
-OKF §5: links bundle-relative para navegação entre conceitos
+okf-convert-wikilinks.py — Fase F: converte [[wikilinks]] → [texto](path.md) no CORPO
+de notas `layer: wiki`, para compatibilidade com agentes CLI (que não resolvem [[ ]]).
 
-Âmbito: apenas ficheiros index.md (navegação)
-Corpo de documentos (PROC, QR, etc.) mantém wikilinks — OKF tolera e Obsidian renderiza ambos.
+Propriedades de segurança:
+  - FENCE-AWARE: nunca toca em code fences (``` ~~~) nem inline `code` — preserva
+    sintaxe bash `[[ -f x ]]` e exemplos de documentação.
+  - Resolução robusta: mesmo-directório → path-style (relativo, depois vault-root,
+    depois basename único) → stem global único. Acentos normalizados (NFKD), pipe
+    escapado `\\|` tratado, anchors `#`/`^` preservados no URL.
+  - Conservador: alvos ambíguos (stem repetido) ou não encontrados ficam [[ ]] e são
+    reportados — NUNCA se adivinha um destino.
+  - Exclusões alinhadas com okf-validate.sh (MEMORY/CLAUDE/AGENTS, 90-Templates, etc).
 
-Uso:
-  python3 okf-convert-wikilinks.py [--dry-run] [--dir /path/to/Hub]
-
-Criado: 28-06-2026
+Uso: python3 okf-convert-wikilinks.py [--dry-run] [--dir=/path/Hub] [--include-index]
 """
-
 import os
 import re
 import sys
+import unicodedata
 from pathlib import Path
+from collections import defaultdict, Counter
 
 HUB_DEFAULT = "/media/ealmeida/Dados/Hub"
 
-EXCLUDE_DIRS = {".stversions", "node_modules", ".git", ".obsidian", ".trash"}
+EXCLUDE_DIRS = {
+    ".stversions", "node_modules", ".git", ".obsidian", ".trash", ".ijfw", "ijfw",
+    ".github", ".wayland", ".hermes", ".vscode", ".cursor", ".gstack",
+    "_templates", "99-Arquivo", "90-Templates",
+}
+EXCLUDE_FILES = {"MEMORY.md", "CLAUDE.md", "GEMINI.md", "AGENTS.md", "copilot-instructions.md"}
 
-# Padrão wikilink: [[NomeFicheiro]] ou [[NomeFicheiro|Alias]]
-WIKILINK_RE = re.compile(r'\[\[([^\]|]+)(?:\|([^\]]+))?\]\]')
+WIKILINK_RE = re.compile(r'\[\[([^\]]+?)\]\]')
+FENCE_RE = re.compile(r'^(\s*)(```|~~~)')
+FM_RE = re.compile(r'^---\n(.*?)\n---', re.DOTALL)
+SKIP_PROSE = {"", " ", "...", "wikilinks", "Wikilinks"}
 
 
-def build_file_index(hub: Path) -> dict:
-    """Constrói índice nome→path para resolução de wikilinks."""
-    index = {}  # stem → Path relativo ao hub
+def fold(s: str) -> str:
+    return "".join(c for c in unicodedata.normalize("NFKD", s) if not unicodedata.combining(c)).lower()
+
+
+def is_excluded(rel: str) -> bool:
+    base = Path(rel).name
+    if base in EXCLUDE_FILES:
+        return True
+    if re.match(r'MEMORY-.*\.md$', base):
+        return True
+    return False
+
+
+def layer_of(text: str):
+    m = FM_RE.match(text)
+    if not m:
+        return None
+    lm = re.search(r'^layer:\s*(\w+)', m.group(1), re.M)
+    return lm.group(1) if lm else None
+
+
+def build_stem_index(hub: Path) -> dict:
+    """folded stem → [rel Path, ...] (lista para detectar ambiguidade)."""
+    idx = defaultdict(list)
     for root, dirs, files in os.walk(hub):
         dirs[:] = [d for d in dirs if d not in EXCLUDE_DIRS and not d.startswith(".")]
-        for fname in files:
-            if fname.endswith(".md"):
-                fp = Path(root) / fname
-                stem = fp.stem.lower()
-                rel = fp.relative_to(hub)
-                # Guardar o primeiro match (mais provável no vault activo)
-                if stem not in index:
-                    index[stem] = rel
-                # Também indexar o nome completo sem extensão
-                full_name = fname.lower()
-                if full_name not in index:
-                    index[full_name] = rel
-    return index
+        for f in files:
+            if f.endswith(".md"):
+                fp = Path(root) / f
+                idx[fold(fp.stem)].append(fp.relative_to(hub))
+    return idx
 
 
-def resolve_wikilink(target: str, current_file: Path, file_index: dict, hub: Path) -> str:
-    """Resolve [[target]] para um caminho relativo ao ficheiro actual."""
-    # Limpar o target (remover ^anchor, #heading, etc.)
-    target_clean = re.split(r'[#^]', target)[0].strip()
-    target_lower = target_clean.lower()
-    target_with_ext = target_lower + ".md" if not target_lower.endswith(".md") else target_lower
-
-    # Tentar resolver
-    resolved = file_index.get(target_with_ext) or file_index.get(target_lower)
-
-    if resolved:
-        # Calcular path relativo a partir do directório do ficheiro actual
-        try:
-            rel_path = os.path.relpath(hub / resolved, current_file.parent)
-            return rel_path.replace("\\", "/")
-        except Exception:
-            return str(resolved)
+def ci_dir_lookup(dirpath: Path, basename: str, hub: Path):
+    """Procura case-insensitive por basename(.md) em dirpath. Devolve rel Path ou None."""
+    if not dirpath.exists():
+        return None
+    want = fold(basename if basename.endswith(".md") else basename + ".md")
+    try:
+        for e in dirpath.iterdir():
+            if e.is_file() and fold(e.name) == want:
+                return e.relative_to(hub)
+    except Exception:
+        pass
     return None
 
 
-def convert_wikilinks_in_file(filepath: Path, file_index: dict, hub: Path, dry_run: bool) -> dict:
-    """Converte wikilinks no ficheiro. Retorna estatísticas."""
-    result = {"file": str(filepath.relative_to(hub)), "converted": 0, "unresolved": [], "action": "skip"}
-
-    try:
-        content = filepath.read_text(encoding="utf-8")
-    except Exception as e:
-        result["action"] = "error"
-        result["error"] = str(e)
-        return result
-
-    if "[[" not in content:
-        result["action"] = "no_wikilinks"
-        return result
-
-    def replace_wikilink(m):
-        target = m.group(1)
-        alias = m.group(2)
-        display = alias if alias else target
-
-        resolved_path = resolve_wikilink(target, filepath, file_index, hub)
-        if resolved_path:
-            result["converted"] += 1
-            return f"[{display}]({resolved_path})"
-        else:
-            # Manter como wikilink se não resolvível
-            result["unresolved"].append(target)
-            return m.group(0)
-
-    new_content = WIKILINK_RE.sub(replace_wikilink, content)
-
-    if new_content != content:
-        result["action"] = "converted"
-        if not dry_run:
-            filepath.write_text(new_content, encoding="utf-8")
+def resolve(target: str, current: Path, hub: Path, stem_idx: dict):
+    """Devolve (relpath, anchor) ou None. 'AMBIG' como relpath sinaliza ambiguidade."""
+    raw = target.strip()
+    split = re.split(r'(?=[#^])', raw, maxsplit=1)
+    name = split[0].strip().rstrip("\\").strip()
+    anchor = raw[len(split[0]):].strip() if len(split) > 1 else ""
+    if not name:
+        return None
+    cand = None
+    if "/" in name:
+        bn = Path(name).name
+        td = (current.parent / Path(name).parent).resolve()
+        if td == hub or hub in td.parents:
+            cand = ci_dir_lookup(td, bn, hub)
+        if cand is None:
+            td2 = (hub / Path(name).parent).resolve()
+            if td2 == hub or hub in td2.parents:
+                cand = ci_dir_lookup(td2, bn, hub)
+        if cand is None:
+            lst = stem_idx.get(fold(Path(bn).stem), [])
+            if len(lst) == 1:
+                cand = lst[0]
     else:
-        result["action"] = "no_changes"
+        cand = ci_dir_lookup(current.parent, name, hub)
+        if cand is None:
+            lst = stem_idx.get(fold(Path(name).stem), [])
+            if len(lst) == 1:
+                cand = lst[0]
+            elif len(lst) > 1:
+                return ("AMBIG", "")
+    if cand is None:
+        return None
+    relp = os.path.relpath(hub / cand, current.parent).replace("\\", "/")
+    return (relp, anchor)
 
-    return result
+
+def convert_file(fp: Path, hub: Path, stem_idx: dict, stats: dict) -> str:
+    content = fp.read_text(encoding="utf-8", errors="replace")
+    lines = content.split("\n")
+    out = []
+    in_fence = False
+    fmark = None
+    fm_end = -1
+    if lines and lines[0].strip() == "---":
+        for i in range(1, len(lines)):
+            if lines[i].strip() == "---":
+                fm_end = i
+                break
+    for idx, line in enumerate(lines):
+        if idx <= fm_end:
+            out.append(line)
+            continue
+        fm = FENCE_RE.match(line)
+        if not in_fence and fm:
+            in_fence = True
+            fmark = fm.group(2)
+            out.append(line)
+            continue
+        if in_fence:
+            if line.strip().startswith(fmark):
+                in_fence = False
+            out.append(line)
+            continue
+        parts = re.split(r'(`[^`]*`)', line)
+        for j, seg in enumerate(parts):
+            if seg.startswith("`"):
+                continue
+
+            def repl(mm):
+                inner = mm.group(1)
+                if "|" in inner:
+                    tgt, alias = inner.split("|", 1)
+                    tgt = tgt.strip().rstrip("\\").strip()
+                    alias = alias.strip()
+                else:
+                    tgt, alias = inner.strip(), None
+                if tgt in SKIP_PROSE:
+                    stats["prose"] += 1
+                    return mm.group(0)
+                r = resolve(tgt, fp, hub, stem_idx)
+                if r is None:
+                    stats["unresolved"][tgt] += 1
+                    return mm.group(0)
+                if r[0] == "AMBIG":
+                    stats["ambiguous"][tgt] += 1
+                    return mm.group(0)
+                relp, anchor = r
+                disp = alias if alias else tgt
+                url = relp + ("#" + anchor.lstrip("#^") if anchor else "")
+                stats["converted"] += 1
+                return f"[{disp}]({url})"
+
+            parts[j] = WIKILINK_RE.sub(repl, seg)
+        out.append("".join(parts))
+    return "\n".join(out)
 
 
 def main():
     dry_run = "--dry-run" in sys.argv
+    include_index = "--include-index" in sys.argv
     hub = Path(HUB_DEFAULT)
     for arg in sys.argv[1:]:
         if arg.startswith("--dir="):
             hub = Path(arg[6:])
-
     if not hub.exists():
         print(f"ERRO: Hub não encontrado em {hub}", file=sys.stderr)
         sys.exit(1)
 
-    print(f"{'[DRY-RUN] ' if dry_run else ''}A construir índice de ficheiros…")
-    file_index = build_file_index(hub)
-    print(f"  {len(file_index)} ficheiros indexados")
+    print(f"{'[DRY-RUN] ' if dry_run else ''}A indexar ficheiros…")
+    stem_idx = build_stem_index(hub)
+    print(f"  {len(stem_idx)} stems indexados")
 
-    print(f"A converter wikilinks nos index.md…")
-    total_converted = 0
-    total_unresolved = []
+    stats = {"converted": 0, "prose": 0, "unresolved": Counter(), "ambiguous": Counter()}
     files_changed = 0
-
     for root, dirs, files in os.walk(hub):
         dirs[:] = [d for d in dirs if d not in EXCLUDE_DIRS and not d.startswith(".")]
-        for fname in files:
-            if fname != "index.md":
+        for f in files:
+            if not f.endswith(".md"):
                 continue
-            filepath = Path(root) / fname
-            result = convert_wikilinks_in_file(filepath, file_index, hub, dry_run)
-
-            if result["action"] == "converted":
+            if f == "index.md" and not include_index:
+                continue
+            rel = str((Path(root) / f).relative_to(hub))
+            if is_excluded(rel):
+                continue
+            fp = Path(root) / f
+            try:
+                txt = fp.read_text(encoding="utf-8", errors="replace")
+            except Exception:
+                continue
+            if layer_of(txt) != "wiki":
+                continue
+            if "[[" not in txt:
+                continue
+            new = convert_file(fp, hub, stem_idx, stats)
+            if new != txt:
                 files_changed += 1
-                total_converted += result["converted"]
-                total_unresolved.extend(result["unresolved"])
-                print(f"  [OK] {result['file']}: {result['converted']} convertidos"
-                      + (f", {len(result['unresolved'])} não resolvidos" if result["unresolved"] else ""))
-            elif result["action"] == "error":
-                print(f"  [ERRO] {result['file']}: {result.get('error')}")
+                if not dry_run:
+                    fp.write_text(new, encoding="utf-8")
+                print(f"  [{'DRY' if dry_run else 'OK'}] {rel}")
 
-    print(f"\n=== Resultado ===")
+    print("\n=== Resultado ===")
     print(f"Ficheiros alterados: {files_changed}")
-    print(f"Wikilinks convertidos: {total_converted}")
-    if total_unresolved:
-        print(f"Não resolvidos ({len(total_unresolved)}): {', '.join(set(total_unresolved))[:200]}")
+    print(f"Wikilinks convertidos: {stats['converted']}")
+    print(f"Prosa-sobre-sintaxe saltada: {stats['prose']}")
+    amb = sum(stats["ambiguous"].values())
+    unr = sum(stats["unresolved"].values())
+    if amb:
+        print(f"AMBÍGUOS (deixados): {amb} → {dict(stats['ambiguous'].most_common(10))}")
+    if unr:
+        print(f"NÃO-RESOLVÍVEIS (refs partidas, deixados como [[ ]]): {unr} (distintos {len(stats['unresolved'])})")
+        for t, n in stats["unresolved"].most_common(30):
+            print(f"   {n:3} [[{t}]]")
 
 
 if __name__ == "__main__":
diff --git a/okf-hub/okf-normalize.py b/okf-hub/okf-normalize.py
index 7124566..5295ecf 100644
--- a/okf-hub/okf-normalize.py
+++ b/okf-hub/okf-normalize.py
@@ -27,6 +27,15 @@ EXCLUDE_DIRS = {
     ".obsidian",
     ".trash",
     "99-Arquivo",
+    # dependências / builds / caches (relevante p/ Dev) — dot-dirs já são saltados
+    "venv",
+    "vendor",
+    "dist",
+    "build",
+    "site-packages",
+    "target",
+    "__pycache__",
+    "3rdparty",
 }
 
 # Ficheiros reservados OKF — sem frontmatter obrigatório
diff --git a/okf-hub/okf-validate.sh b/okf-hub/okf-validate.sh
index a80274f..388176e 100755
--- a/okf-hub/okf-validate.sh
+++ b/okf-hub/okf-validate.sh
@@ -213,7 +213,14 @@ validate_file() {
     # Check for wikilinks in body (wiki layer)
     if echo "$content" | grep -q '^layer: wiki'; then
         local body
-        body="$(sed -n '/^---$/,/^---$/d; p' "$f")"
+        body="$(awk '
+            NR==1 && $0=="---" {infm=1; next}
+            infm && $0=="---" {infm=0; next}
+            infm {next}
+            /^[[:space:]]*```/ || /^[[:space:]]*~~~/ {infence=!infence; next}
+            infence {next}
+            {gsub(/`[^`]*`/,""); print}
+        ' "$f")"
         if echo "$body" | grep -q '\[\['; then
             echo -e "${YEL}WARN${NC}  $rel [wiki]: contains wikilinks [[ ]] — convert to [text](path)"
             ((WARNINGS++)) || true