feat(okf-hub): Fase F — converter fence-aware (path-style/acentos/pipe escapado) + validador fence-aware

This commit is contained in:
2026-06-28 22:39:23 +01:00
parent 0094b45bcf
commit 3752238699
4 changed files with 233 additions and 107 deletions
+22
View File
@@ -0,0 +1,22 @@
#!/usr/bin/env python3
"""layer raw|wiki nos .md do Dev (código: docs=wiki, efémeros/testes/samples=raw)."""
import sys,re
from pathlib import Path
ROOT=Path("/media/ealmeida/Dados/Dev"); DRY="--dry-run" in sys.argv
EXCL_DIR=re.compile(r'/(node_modules|\.git|venv|\.venv|vendor|dist|build|site-packages|__pycache__|target|3rdparty)/')
RAW=re.compile(r'(worklog|/logs?/|sess[aã]o|di[aá]rio|checkup|deep-research|/fontes?/|pesquisa|/tests?/|/__tests__/|/fixtures?/|/examples?/|/samples?/|/test-data/|/\.cache/|CHANGELOG-old|reuni[aã]o)', re.I)
add=0
for f in ROOT.rglob("*.md"):
s=str(f)
if EXCL_DIR.search(s) or any(p.startswith(".") for p in f.parts): continue
if f.name in ("index.md","log.md"): continue
try: t=f.read_text(encoding="utf-8")
except: continue
if not t.startswith("---"): continue
e=t.find("\n---",3)
if e<0: continue
if re.search(r'^layer:',t[3:e],re.M): continue
layer = "raw" if RAW.search(s) else "wiki"
if not DRY: f.write_text(t[:e]+f"\nlayer: {layer}"+t[e:],encoding="utf-8")
add+=1
print(f"{'[DRY] ' if DRY else ''}layer adicionado: {add}")
Regular → Executable
+189 -101
View File
@@ -1,153 +1,241 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
okf-convert-wikilinks.py — Fase 3: Converte [[wikilinks]] → [texto](path.md) nos index.md okf-convert-wikilinks.py — Fase F: converte [[wikilinks]] → [texto](path.md) no CORPO
OKF §5: links bundle-relative para navegação entre conceitos de notas `layer: wiki`, para compatibilidade com agentes CLI (que não resolvem [[ ]]).
Âmbito: apenas ficheiros index.md (navegação) Propriedades de segurança:
Corpo de documentos (PROC, QR, etc.) mantém wikilinks — OKF tolera e Obsidian renderiza ambos. - FENCE-AWARE: nunca toca em code fences (``` ~~~) nem inline `code` — preserva
sintaxe bash `[[ -f x ]]` e exemplos de documentação.
- Resolução robusta: mesmo-directório → path-style (relativo, depois vault-root,
depois basename único) → stem global único. Acentos normalizados (NFKD), pipe
escapado `\\|` tratado, anchors `#`/`^` preservados no URL.
- Conservador: alvos ambíguos (stem repetido) ou não encontrados ficam [[ ]] e são
reportados — NUNCA se adivinha um destino.
- Exclusões alinhadas com okf-validate.sh (MEMORY/CLAUDE/AGENTS, 90-Templates, etc).
Uso: Uso: python3 okf-convert-wikilinks.py [--dry-run] [--dir=/path/Hub] [--include-index]
python3 okf-convert-wikilinks.py [--dry-run] [--dir /path/to/Hub]
Criado: 28-06-2026
""" """
import os import os
import re import re
import sys import sys
import unicodedata
from pathlib import Path from pathlib import Path
from collections import defaultdict, Counter
HUB_DEFAULT = "/media/ealmeida/Dados/Hub" HUB_DEFAULT = "/media/ealmeida/Dados/Hub"
EXCLUDE_DIRS = {".stversions", "node_modules", ".git", ".obsidian", ".trash"} EXCLUDE_DIRS = {
".stversions", "node_modules", ".git", ".obsidian", ".trash", ".ijfw", "ijfw",
".github", ".wayland", ".hermes", ".vscode", ".cursor", ".gstack",
"_templates", "99-Arquivo", "90-Templates",
}
EXCLUDE_FILES = {"MEMORY.md", "CLAUDE.md", "GEMINI.md", "AGENTS.md", "copilot-instructions.md"}
# Padrão wikilink: [[NomeFicheiro]] ou [[NomeFicheiro|Alias]] WIKILINK_RE = re.compile(r'\[\[([^\]]+?)\]\]')
WIKILINK_RE = re.compile(r'\[\[([^\]|]+)(?:\|([^\]]+))?\]\]') FENCE_RE = re.compile(r'^(\s*)(```|~~~)')
FM_RE = re.compile(r'^---\n(.*?)\n---', re.DOTALL)
SKIP_PROSE = {"", " ", "...", "wikilinks", "Wikilinks"}
def build_file_index(hub: Path) -> dict: def fold(s: str) -> str:
"""Constrói índice nome→path para resolução de wikilinks.""" return "".join(c for c in unicodedata.normalize("NFKD", s) if not unicodedata.combining(c)).lower()
index = {} # stem → Path relativo ao hub
def is_excluded(rel: str) -> bool:
base = Path(rel).name
if base in EXCLUDE_FILES:
return True
if re.match(r'MEMORY-.*\.md$', base):
return True
return False
def layer_of(text: str):
m = FM_RE.match(text)
if not m:
return None
lm = re.search(r'^layer:\s*(\w+)', m.group(1), re.M)
return lm.group(1) if lm else None
def build_stem_index(hub: Path) -> dict:
"""folded stem → [rel Path, ...] (lista para detectar ambiguidade)."""
idx = defaultdict(list)
for root, dirs, files in os.walk(hub): for root, dirs, files in os.walk(hub):
dirs[:] = [d for d in dirs if d not in EXCLUDE_DIRS and not d.startswith(".")] dirs[:] = [d for d in dirs if d not in EXCLUDE_DIRS and not d.startswith(".")]
for fname in files: for f in files:
if fname.endswith(".md"): if f.endswith(".md"):
fp = Path(root) / fname fp = Path(root) / f
stem = fp.stem.lower() idx[fold(fp.stem)].append(fp.relative_to(hub))
rel = fp.relative_to(hub) return idx
# Guardar o primeiro match (mais provável no vault activo)
if stem not in index:
index[stem] = rel
# Também indexar o nome completo sem extensão
full_name = fname.lower()
if full_name not in index:
index[full_name] = rel
return index
def resolve_wikilink(target: str, current_file: Path, file_index: dict, hub: Path) -> str: def ci_dir_lookup(dirpath: Path, basename: str, hub: Path):
"""Resolve [[target]] para um caminho relativo ao ficheiro actual.""" """Procura case-insensitive por basename(.md) em dirpath. Devolve rel Path ou None."""
# Limpar o target (remover ^anchor, #heading, etc.) if not dirpath.exists():
target_clean = re.split(r'[#^]', target)[0].strip() return None
target_lower = target_clean.lower() want = fold(basename if basename.endswith(".md") else basename + ".md")
target_with_ext = target_lower + ".md" if not target_lower.endswith(".md") else target_lower
# Tentar resolver
resolved = file_index.get(target_with_ext) or file_index.get(target_lower)
if resolved:
# Calcular path relativo a partir do directório do ficheiro actual
try: try:
rel_path = os.path.relpath(hub / resolved, current_file.parent) for e in dirpath.iterdir():
return rel_path.replace("\\", "/") if e.is_file() and fold(e.name) == want:
return e.relative_to(hub)
except Exception: except Exception:
return str(resolved) pass
return None return None
def convert_wikilinks_in_file(filepath: Path, file_index: dict, hub: Path, dry_run: bool) -> dict: def resolve(target: str, current: Path, hub: Path, stem_idx: dict):
"""Converte wikilinks no ficheiro. Retorna estatísticas.""" """Devolve (relpath, anchor) ou None. 'AMBIG' como relpath sinaliza ambiguidade."""
result = {"file": str(filepath.relative_to(hub)), "converted": 0, "unresolved": [], "action": "skip"} raw = target.strip()
split = re.split(r'(?=[#^])', raw, maxsplit=1)
try: name = split[0].strip().rstrip("\\").strip()
content = filepath.read_text(encoding="utf-8") anchor = raw[len(split[0]):].strip() if len(split) > 1 else ""
except Exception as e: if not name:
result["action"] = "error" return None
result["error"] = str(e) cand = None
return result if "/" in name:
bn = Path(name).name
if "[[" not in content: td = (current.parent / Path(name).parent).resolve()
result["action"] = "no_wikilinks" if td == hub or hub in td.parents:
return result cand = ci_dir_lookup(td, bn, hub)
if cand is None:
def replace_wikilink(m): td2 = (hub / Path(name).parent).resolve()
target = m.group(1) if td2 == hub or hub in td2.parents:
alias = m.group(2) cand = ci_dir_lookup(td2, bn, hub)
display = alias if alias else target if cand is None:
lst = stem_idx.get(fold(Path(bn).stem), [])
resolved_path = resolve_wikilink(target, filepath, file_index, hub) if len(lst) == 1:
if resolved_path: cand = lst[0]
result["converted"] += 1
return f"[{display}]({resolved_path})"
else: else:
# Manter como wikilink se não resolvível cand = ci_dir_lookup(current.parent, name, hub)
result["unresolved"].append(target) if cand is None:
return m.group(0) lst = stem_idx.get(fold(Path(name).stem), [])
if len(lst) == 1:
cand = lst[0]
elif len(lst) > 1:
return ("AMBIG", "")
if cand is None:
return None
relp = os.path.relpath(hub / cand, current.parent).replace("\\", "/")
return (relp, anchor)
new_content = WIKILINK_RE.sub(replace_wikilink, content)
if new_content != content: def convert_file(fp: Path, hub: Path, stem_idx: dict, stats: dict) -> str:
result["action"] = "converted" content = fp.read_text(encoding="utf-8", errors="replace")
if not dry_run: lines = content.split("\n")
filepath.write_text(new_content, encoding="utf-8") out = []
in_fence = False
fmark = None
fm_end = -1
if lines and lines[0].strip() == "---":
for i in range(1, len(lines)):
if lines[i].strip() == "---":
fm_end = i
break
for idx, line in enumerate(lines):
if idx <= fm_end:
out.append(line)
continue
fm = FENCE_RE.match(line)
if not in_fence and fm:
in_fence = True
fmark = fm.group(2)
out.append(line)
continue
if in_fence:
if line.strip().startswith(fmark):
in_fence = False
out.append(line)
continue
parts = re.split(r'(`[^`]*`)', line)
for j, seg in enumerate(parts):
if seg.startswith("`"):
continue
def repl(mm):
inner = mm.group(1)
if "|" in inner:
tgt, alias = inner.split("|", 1)
tgt = tgt.strip().rstrip("\\").strip()
alias = alias.strip()
else: else:
result["action"] = "no_changes" tgt, alias = inner.strip(), None
if tgt in SKIP_PROSE:
stats["prose"] += 1
return mm.group(0)
r = resolve(tgt, fp, hub, stem_idx)
if r is None:
stats["unresolved"][tgt] += 1
return mm.group(0)
if r[0] == "AMBIG":
stats["ambiguous"][tgt] += 1
return mm.group(0)
relp, anchor = r
disp = alias if alias else tgt
url = relp + ("#" + anchor.lstrip("#^") if anchor else "")
stats["converted"] += 1
return f"[{disp}]({url})"
return result parts[j] = WIKILINK_RE.sub(repl, seg)
out.append("".join(parts))
return "\n".join(out)
def main(): def main():
dry_run = "--dry-run" in sys.argv dry_run = "--dry-run" in sys.argv
include_index = "--include-index" in sys.argv
hub = Path(HUB_DEFAULT) hub = Path(HUB_DEFAULT)
for arg in sys.argv[1:]: for arg in sys.argv[1:]:
if arg.startswith("--dir="): if arg.startswith("--dir="):
hub = Path(arg[6:]) hub = Path(arg[6:])
if not hub.exists(): if not hub.exists():
print(f"ERRO: Hub não encontrado em {hub}", file=sys.stderr) print(f"ERRO: Hub não encontrado em {hub}", file=sys.stderr)
sys.exit(1) sys.exit(1)
print(f"{'[DRY-RUN] ' if dry_run else ''}A construir índice de ficheiros…") print(f"{'[DRY-RUN] ' if dry_run else ''}A indexar ficheiros…")
file_index = build_file_index(hub) stem_idx = build_stem_index(hub)
print(f" {len(file_index)} ficheiros indexados") print(f" {len(stem_idx)} stems indexados")
print(f"A converter wikilinks nos index.md…") stats = {"converted": 0, "prose": 0, "unresolved": Counter(), "ambiguous": Counter()}
total_converted = 0
total_unresolved = []
files_changed = 0 files_changed = 0
for root, dirs, files in os.walk(hub): for root, dirs, files in os.walk(hub):
dirs[:] = [d for d in dirs if d not in EXCLUDE_DIRS and not d.startswith(".")] dirs[:] = [d for d in dirs if d not in EXCLUDE_DIRS and not d.startswith(".")]
for fname in files: for f in files:
if fname != "index.md": if not f.endswith(".md"):
continue continue
filepath = Path(root) / fname if f == "index.md" and not include_index:
result = convert_wikilinks_in_file(filepath, file_index, hub, dry_run) continue
rel = str((Path(root) / f).relative_to(hub))
if result["action"] == "converted": if is_excluded(rel):
continue
fp = Path(root) / f
try:
txt = fp.read_text(encoding="utf-8", errors="replace")
except Exception:
continue
if layer_of(txt) != "wiki":
continue
if "[[" not in txt:
continue
new = convert_file(fp, hub, stem_idx, stats)
if new != txt:
files_changed += 1 files_changed += 1
total_converted += result["converted"] if not dry_run:
total_unresolved.extend(result["unresolved"]) fp.write_text(new, encoding="utf-8")
print(f" [OK] {result['file']}: {result['converted']} convertidos" print(f" [{'DRY' if dry_run else 'OK'}] {rel}")
+ (f", {len(result['unresolved'])} não resolvidos" if result["unresolved"] else ""))
elif result["action"] == "error":
print(f" [ERRO] {result['file']}: {result.get('error')}")
print(f"\n=== Resultado ===") print("\n=== Resultado ===")
print(f"Ficheiros alterados: {files_changed}") print(f"Ficheiros alterados: {files_changed}")
print(f"Wikilinks convertidos: {total_converted}") print(f"Wikilinks convertidos: {stats['converted']}")
if total_unresolved: print(f"Prosa-sobre-sintaxe saltada: {stats['prose']}")
print(f"Não resolvidos ({len(total_unresolved)}): {', '.join(set(total_unresolved))[:200]}") amb = sum(stats["ambiguous"].values())
unr = sum(stats["unresolved"].values())
if amb:
print(f"AMBÍGUOS (deixados): {amb}{dict(stats['ambiguous'].most_common(10))}")
if unr:
print(f"NÃO-RESOLVÍVEIS (refs partidas, deixados como [[ ]]): {unr} (distintos {len(stats['unresolved'])})")
for t, n in stats["unresolved"].most_common(30):
print(f" {n:3} [[{t}]]")
if __name__ == "__main__": if __name__ == "__main__":
+9
View File
@@ -27,6 +27,15 @@ EXCLUDE_DIRS = {
".obsidian", ".obsidian",
".trash", ".trash",
"99-Arquivo", "99-Arquivo",
# dependências / builds / caches (relevante p/ Dev) — dot-dirs já são saltados
"venv",
"vendor",
"dist",
"build",
"site-packages",
"target",
"__pycache__",
"3rdparty",
} }
# Ficheiros reservados OKF — sem frontmatter obrigatório # Ficheiros reservados OKF — sem frontmatter obrigatório
+8 -1
View File
@@ -213,7 +213,14 @@ validate_file() {
# Check for wikilinks in body (wiki layer) # Check for wikilinks in body (wiki layer)
if echo "$content" | grep -q '^layer: wiki'; then if echo "$content" | grep -q '^layer: wiki'; then
local body local body
body="$(sed -n '/^---$/,/^---$/d; p' "$f")" body="$(awk '
NR==1 && $0=="---" {infm=1; next}
infm && $0=="---" {infm=0; next}
infm {next}
/^[[:space:]]*```/ || /^[[:space:]]*~~~/ {infence=!infence; next}
infence {next}
{gsub(/`[^`]*`/,""); print}
' "$f")"
if echo "$body" | grep -q '\[\['; then if echo "$body" | grep -q '\[\['; then
echo -e "${YEL}WARN${NC} $rel [wiki]: contains wikilinks [[ ]] — convert to [text](path)" echo -e "${YEL}WARN${NC} $rel [wiki]: contains wikilinks [[ ]] — convert to [text](path)"
((WARNINGS++)) || true ((WARNINGS++)) || true