fix(okf-hub): adoptar validador layer-aware (Fase D) + corrigir bugs (reserved-skip, folded-scalar desc, tr range); classify exclui index.md/log.md
This commit is contained in:
Executable
+210
@@ -0,0 +1,210 @@
|
||||
#!/usr/bin/env bash
|
||||
# okf-classify-layer.sh — atribui layer: raw|wiki a todos os .md do Hub
|
||||
# Heurísticas de path + conteúdo (§6 Fase C do OKF-Compliance-Plan-Global.md)
|
||||
# Uso: okf-classify-layer.sh [--dry-run]
|
||||
# --dry-run mostra o que faria sem alterar ficheiros
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
HUB_ROOT="/media/ealmeida/Dados/Hub"
|
||||
DRY_RUN=false
|
||||
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
|
||||
|
||||
# --- Exclusion patterns ---
|
||||
is_excluded() {
|
||||
local rel="$1"
|
||||
[[ "$rel" == *99-Arquivo/* ]] && return 0
|
||||
[[ "$rel" == *.stversions/* ]] && return 0
|
||||
[[ "$rel" == *.obsidian/* ]] && return 0
|
||||
[[ "$rel" == *.ijfw/* ]] && return 0
|
||||
[[ "$rel" == *_templates/* ]] && return 0
|
||||
[[ "$rel" == *90-Templates/* ]] && return 0
|
||||
[[ "$rel" == *.git/* ]] && return 0
|
||||
[[ "$rel" == *node_modules/* ]] && return 0
|
||||
[[ "$rel" == *.github/* ]] && return 0
|
||||
[[ "$rel" == *.wayland/* ]] && return 0
|
||||
[[ "$rel" == *.hermes/* ]] && return 0
|
||||
local base
|
||||
base="$(basename "$rel")"
|
||||
[[ "$base" == "MEMORY.md" ]] && return 0
|
||||
[[ "$base" == "index.md" ]] && return 0
|
||||
[[ "$base" == "log.md" ]] && return 0
|
||||
[[ "$base" == "CLAUDE.md" ]] && return 0
|
||||
[[ "$base" == "GEMINI.md" ]] && return 0
|
||||
[[ "$base" == "AGENTS.md" ]] && return 0
|
||||
[[ "$base" == "copilot-instructions.md" ]] && return 0
|
||||
return 1
|
||||
}
|
||||
|
||||
# --- Classification rules (ordered by priority, first match wins) ---
|
||||
classify_file() {
|
||||
local f="$1"
|
||||
local rel="${f#$HUB_ROOT/}"
|
||||
local base
|
||||
base="$(basename "$rel")"
|
||||
local desc_len=0
|
||||
|
||||
# --- RAW rules (signals: machine-generated, regenerable, dumps) ---
|
||||
|
||||
# 1. YouTube transcript dumps
|
||||
[[ "$rel" == *KB/MDs/YouTube/* ]] && echo "raw" && return
|
||||
|
||||
# 2. Deep research in bulk
|
||||
[[ "$rel" == *deep-research* ]] && echo "raw" && return
|
||||
|
||||
# 3. Raw research data (FinalKintsugi/research/)
|
||||
[[ "$rel" == *FinalKintsugi/research/* ]] && echo "raw" && return
|
||||
|
||||
# 4. YouTube transcript dumps (Knowledge-Base/MDs/YouTube/)
|
||||
[[ "$rel" == *Knowledge-Base/MDs/YouTube/* ]] && echo "raw" && return
|
||||
|
||||
# 5. Inteligência Competitiva daily reports (dated pattern)
|
||||
if [[ "$rel" == *Inteligencia/* ]]; then
|
||||
# Dated IC reports
|
||||
if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then
|
||||
echo "raw" && return
|
||||
fi
|
||||
[[ "$base" == *"deep-research"* ]] && echo "raw" && return
|
||||
[[ "$base" == *"Fontes"*"Canal"* ]] && echo "raw" && return
|
||||
fi
|
||||
|
||||
# 5. SistemaBackups dated reports
|
||||
if [[ "$rel" == *SistemaBackups/* ]]; then
|
||||
if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then
|
||||
echo "raw" && return
|
||||
fi
|
||||
# Audit/relatório reports
|
||||
if echo "$base" | grep -qiE '^(AUDITORIA|RELATORIO|TRIANGULACAO|SINCRONIZACAO)-'; then
|
||||
echo "raw" && return
|
||||
fi
|
||||
fi
|
||||
|
||||
# 6. Observabilidade reports
|
||||
if [[ "$rel" == *Observabilidade/* ]]; then
|
||||
if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then
|
||||
echo "raw" && return
|
||||
fi
|
||||
fi
|
||||
|
||||
# 7. SelfImprovement dated docs
|
||||
if [[ "$rel" == *SelfImprovement/* ]]; then
|
||||
if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then
|
||||
echo "raw" && return
|
||||
fi
|
||||
fi
|
||||
|
||||
# 8. Clip dated reports
|
||||
if [[ "$rel" == *Clip/* ]]; then
|
||||
if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then
|
||||
echo "raw" && return
|
||||
fi
|
||||
# Clip incident reports
|
||||
[[ "$base" == *"incidentes"* ]] && echo "raw" && return
|
||||
[[ "$base" == *"wordpress-update-report"* ]] && echo "raw" && return
|
||||
fi
|
||||
|
||||
# 9. Worklogs, daily reports, ponto-situação
|
||||
if echo "$base" | grep -qiE '(worklog|ponto-situacao|checkup|diario|daily)'; then
|
||||
echo "raw" && return
|
||||
fi
|
||||
|
||||
# 10. OpenDesign scratch
|
||||
if [[ "$rel" == *OpenDesign/* ]] || [[ "$rel" == *open-design/* ]]; then
|
||||
if echo "$base" | grep -qiE 'analise-capacidades'; then
|
||||
echo "raw" && return
|
||||
fi
|
||||
fi
|
||||
|
||||
# 11. Knowledge-Base large reference files (from old import)
|
||||
if [[ "$rel" == *Knowledge-Base/* ]]; then
|
||||
local fsize
|
||||
fsize=$(wc -c < "$f" 2>/dev/null || echo 0)
|
||||
if [[ "$fsize" -gt 50000 ]]; then
|
||||
echo "raw" && return
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- WIKI rules (everything else) ---
|
||||
|
||||
# Standard wiki patterns
|
||||
case "$base" in
|
||||
PROC-*|QR-*|SPEC-*|*-SPEC.md|STATUS.md|CHANGELOG.md|index.md)
|
||||
echo "wiki" && return ;;
|
||||
esac
|
||||
|
||||
# Has PROC/QR/SPEC in path
|
||||
[[ "$rel" == *Procedimentos/* ]] && echo "wiki" && return
|
||||
[[ "$rel" == *Quick-Reference/* ]] && echo "wiki" && return
|
||||
[[ "$rel" == *specs/* ]] && echo "wiki" && return
|
||||
[[ "$rel" == *plans/* ]] && echo "wiki" && return
|
||||
|
||||
# Default to wiki
|
||||
echo "wiki"
|
||||
}
|
||||
|
||||
# --- Inject layer into frontmatter ---
|
||||
inject_layer() {
|
||||
local f="$1"
|
||||
local layer="$2"
|
||||
local rel="${f#$HUB_ROOT/}"
|
||||
|
||||
# Check if already has layer
|
||||
if head -20 "$f" | grep -q '^layer:'; then
|
||||
return
|
||||
fi
|
||||
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
echo "DRY-RUN: $rel → layer: $layer"
|
||||
return
|
||||
fi
|
||||
|
||||
# Insert layer after first line (---) if frontmatter exists
|
||||
if head -1 "$f" | grep -q '^---$'; then
|
||||
# Insert after line 1
|
||||
sed -i "1a\\layer: $layer" "$f"
|
||||
else
|
||||
# No frontmatter — add layer block
|
||||
local tmp
|
||||
tmp=$(mktemp)
|
||||
{
|
||||
echo "---"
|
||||
echo "layer: $layer"
|
||||
echo "---"
|
||||
cat "$f"
|
||||
} > "$tmp"
|
||||
mv "$tmp" "$f"
|
||||
fi
|
||||
}
|
||||
|
||||
# --- Main ---
|
||||
RAW=0
|
||||
WIKI=0
|
||||
SKIPPED=0
|
||||
TOTAL=0
|
||||
|
||||
echo "Classifying files in $HUB_ROOT..."
|
||||
[[ "$DRY_RUN" == "true" ]] && echo "(DRY-RUN mode — no files modified)"
|
||||
echo ""
|
||||
|
||||
while IFS= read -r -d '' f; do
|
||||
rel="${f#$HUB_ROOT/}"
|
||||
if is_excluded "$rel"; then
|
||||
((SKIPPED++)) || true
|
||||
continue
|
||||
fi
|
||||
((TOTAL++)) || true
|
||||
|
||||
layer="$(classify_file "$f")"
|
||||
inject_layer "$f" "$layer"
|
||||
|
||||
case "$layer" in
|
||||
raw) ((RAW++)) || true ;;
|
||||
wiki) ((WIKI++)) || true ;;
|
||||
esac
|
||||
done < <(find "$HUB_ROOT" -name '*.md' -not -path '*/.git/*' -not -path '*/node_modules/*' -print0)
|
||||
|
||||
echo ""
|
||||
echo "────────────────────────────────────"
|
||||
echo "Total: $TOTAL | Raw: $RAW | Wiki: $WIKI | Skipped: $SKIPPED"
|
||||
echo ""
|
||||
[[ "$DRY_RUN" == "true" ]] && echo "(Re-run without --dry-run to apply)"
|
||||
Reference in New Issue
Block a user