#!/usr/bin/env bash # okf-classify-layer.sh — atribui layer: raw|wiki a todos os .md do Hub # Heurísticas de path + conteúdo (§6 Fase C do OKF-Compliance-Plan-Global.md) # Uso: okf-classify-layer.sh [--dry-run] # --dry-run mostra o que faria sem alterar ficheiros set -euo pipefail HUB_ROOT="/media/ealmeida/Dados/Hub" DRY_RUN=false [[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true # --- Exclusion patterns --- is_excluded() { local rel="$1" [[ "$rel" == *99-Arquivo/* ]] && return 0 [[ "$rel" == *.stversions/* ]] && return 0 [[ "$rel" == *.obsidian/* ]] && return 0 [[ "$rel" == *.ijfw/* ]] && return 0 [[ "$rel" == *_templates/* ]] && return 0 [[ "$rel" == *90-Templates/* ]] && return 0 [[ "$rel" == *.git/* ]] && return 0 [[ "$rel" == *node_modules/* ]] && return 0 [[ "$rel" == *.github/* ]] && return 0 [[ "$rel" == *.wayland/* ]] && return 0 [[ "$rel" == *.hermes/* ]] && return 0 local base base="$(basename "$rel")" [[ "$base" == "MEMORY.md" ]] && return 0 [[ "$base" == MEMORY-*.md ]] && return 0 [[ "$base" == "index.md" ]] && return 0 [[ "$base" == "log.md" ]] && return 0 [[ "$base" == "CLAUDE.md" ]] && return 0 [[ "$base" == "GEMINI.md" ]] && return 0 [[ "$base" == "AGENTS.md" ]] && return 0 [[ "$base" == "copilot-instructions.md" ]] && return 0 return 1 } # --- Classification rules (ordered by priority, first match wins) --- classify_file() { local f="$1" local rel="${f#$HUB_ROOT/}" local base base="$(basename "$rel")" local desc_len=0 # --- RAW rules (signals: machine-generated, regenerable, dumps) --- # 1. YouTube transcript dumps [[ "$rel" == *KB/MDs/YouTube/* ]] && echo "raw" && return # 2. Deep research in bulk [[ "$rel" == *deep-research* ]] && echo "raw" && return # 3. Raw research data (FinalKintsugi/research/) [[ "$rel" == *FinalKintsugi/research/* ]] && echo "raw" && return # 4. YouTube transcript dumps (Knowledge-Base/MDs/YouTube/) [[ "$rel" == *Knowledge-Base/MDs/YouTube/* ]] && echo "raw" && return # 5. Inteligência Competitiva daily reports (dated pattern) if [[ "$rel" == *Inteligencia/* ]]; then # Dated IC reports if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then echo "raw" && return fi [[ "$base" == *"deep-research"* ]] && echo "raw" && return [[ "$base" == *"Fontes"*"Canal"* ]] && echo "raw" && return fi # 5. SistemaBackups dated reports if [[ "$rel" == *SistemaBackups/* ]]; then if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then echo "raw" && return fi # Audit/relatório reports if echo "$base" | grep -qiE '^(AUDITORIA|RELATORIO|TRIANGULACAO|SINCRONIZACAO)-'; then echo "raw" && return fi fi # 6. Observabilidade reports if [[ "$rel" == *Observabilidade/* ]]; then if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then echo "raw" && return fi fi # 7. SelfImprovement dated docs if [[ "$rel" == *SelfImprovement/* ]]; then if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then echo "raw" && return fi fi # 8. Clip dated reports if [[ "$rel" == *Clip/* ]]; then if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then echo "raw" && return fi # Clip incident reports [[ "$base" == *"incidentes"* ]] && echo "raw" && return [[ "$base" == *"wordpress-update-report"* ]] && echo "raw" && return fi # 9. Worklogs, daily reports, ponto-situação if echo "$base" | grep -qiE '(worklog|ponto-situacao|checkup|diario|daily)'; then echo "raw" && return fi # 10. OpenDesign scratch if [[ "$rel" == *OpenDesign/* ]] || [[ "$rel" == *open-design/* ]]; then if echo "$base" | grep -qiE 'analise-capacidades'; then echo "raw" && return fi fi # 11. Knowledge-Base large reference files (from old import) if [[ "$rel" == *Knowledge-Base/* ]]; then local fsize fsize=$(wc -c < "$f" 2>/dev/null || echo 0) if [[ "$fsize" -gt 50000 ]]; then echo "raw" && return fi fi # --- WIKI rules (everything else) --- # Standard wiki patterns case "$base" in PROC-*|QR-*|SPEC-*|*-SPEC.md|STATUS.md|CHANGELOG.md|index.md) echo "wiki" && return ;; esac # Has PROC/QR/SPEC in path [[ "$rel" == *Procedimentos/* ]] && echo "wiki" && return [[ "$rel" == *Quick-Reference/* ]] && echo "wiki" && return [[ "$rel" == *specs/* ]] && echo "wiki" && return [[ "$rel" == *plans/* ]] && echo "wiki" && return # Default to wiki echo "wiki" } # --- Inject layer into frontmatter --- inject_layer() { local f="$1" local layer="$2" local rel="${f#$HUB_ROOT/}" # Check if already has layer if head -20 "$f" | grep -q '^layer:'; then return fi if [[ "$DRY_RUN" == "true" ]]; then echo "DRY-RUN: $rel → layer: $layer" return fi # Insert layer after first line (---) if frontmatter exists if head -1 "$f" | grep -q '^---$'; then # Insert after line 1 sed -i "1a\\layer: $layer" "$f" else # No frontmatter — add layer block local tmp tmp=$(mktemp) { echo "---" echo "layer: $layer" echo "---" cat "$f" } > "$tmp" mv "$tmp" "$f" fi } # --- Main --- RAW=0 WIKI=0 SKIPPED=0 TOTAL=0 echo "Classifying files in $HUB_ROOT..." [[ "$DRY_RUN" == "true" ]] && echo "(DRY-RUN mode — no files modified)" echo "" while IFS= read -r -d '' f; do rel="${f#$HUB_ROOT/}" if is_excluded "$rel"; then ((SKIPPED++)) || true continue fi ((TOTAL++)) || true layer="$(classify_file "$f")" inject_layer "$f" "$layer" case "$layer" in raw) ((RAW++)) || true ;; wiki) ((WIKI++)) || true ;; esac done < <(find "$HUB_ROOT" -name '*.md' -not -path '*/.git/*' -not -path '*/node_modules/*' -print0) echo "" echo "────────────────────────────────────" echo "Total: $TOTAL | Raw: $RAW | Wiki: $WIKI | Skipped: $SKIPPED" echo "" [[ "$DRY_RUN" == "true" ]] && echo "(Re-run without --dry-run to apply)"