Files
scripts/okf-hub/okf-classify-layer.sh
T

212 lines
6.4 KiB
Bash
Executable File

#!/usr/bin/env bash
# okf-classify-layer.sh — atribui layer: raw|wiki a todos os .md do Hub
# Heurísticas de path + conteúdo (§6 Fase C do OKF-Compliance-Plan-Global.md)
# Uso: okf-classify-layer.sh [--dry-run]
# --dry-run mostra o que faria sem alterar ficheiros
set -euo pipefail
HUB_ROOT="/media/ealmeida/Dados/Hub"
DRY_RUN=false
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
# --- Exclusion patterns ---
is_excluded() {
local rel="$1"
[[ "$rel" == *99-Arquivo/* ]] && return 0
[[ "$rel" == *.stversions/* ]] && return 0
[[ "$rel" == *.obsidian/* ]] && return 0
[[ "$rel" == *.ijfw/* ]] && return 0
[[ "$rel" == *_templates/* ]] && return 0
[[ "$rel" == *90-Templates/* ]] && return 0
[[ "$rel" == *.git/* ]] && return 0
[[ "$rel" == *node_modules/* ]] && return 0
[[ "$rel" == *.github/* ]] && return 0
[[ "$rel" == *.wayland/* ]] && return 0
[[ "$rel" == *.hermes/* ]] && return 0
local base
base="$(basename "$rel")"
[[ "$base" == "MEMORY.md" ]] && return 0
[[ "$base" == MEMORY-*.md ]] && return 0
[[ "$base" == "index.md" ]] && return 0
[[ "$base" == "log.md" ]] && return 0
[[ "$base" == "CLAUDE.md" ]] && return 0
[[ "$base" == "GEMINI.md" ]] && return 0
[[ "$base" == "AGENTS.md" ]] && return 0
[[ "$base" == "copilot-instructions.md" ]] && return 0
return 1
}
# --- Classification rules (ordered by priority, first match wins) ---
classify_file() {
local f="$1"
local rel="${f#$HUB_ROOT/}"
local base
base="$(basename "$rel")"
local desc_len=0
# --- RAW rules (signals: machine-generated, regenerable, dumps) ---
# 1. YouTube transcript dumps
[[ "$rel" == *KB/MDs/YouTube/* ]] && echo "raw" && return
# 2. Deep research in bulk
[[ "$rel" == *deep-research* ]] && echo "raw" && return
# 3. Raw research data (FinalKintsugi/research/)
[[ "$rel" == *FinalKintsugi/research/* ]] && echo "raw" && return
# 4. YouTube transcript dumps (Knowledge-Base/MDs/YouTube/)
[[ "$rel" == *Knowledge-Base/MDs/YouTube/* ]] && echo "raw" && return
# 5. Inteligência Competitiva daily reports (dated pattern)
if [[ "$rel" == *Inteligencia/* ]]; then
# Dated IC reports
if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then
echo "raw" && return
fi
[[ "$base" == *"deep-research"* ]] && echo "raw" && return
[[ "$base" == *"Fontes"*"Canal"* ]] && echo "raw" && return
fi
# 5. SistemaBackups dated reports
if [[ "$rel" == *SistemaBackups/* ]]; then
if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then
echo "raw" && return
fi
# Audit/relatório reports
if echo "$base" | grep -qiE '^(AUDITORIA|RELATORIO|TRIANGULACAO|SINCRONIZACAO)-'; then
echo "raw" && return
fi
fi
# 6. Observabilidade reports
if [[ "$rel" == *Observabilidade/* ]]; then
if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then
echo "raw" && return
fi
fi
# 7. SelfImprovement dated docs
if [[ "$rel" == *SelfImprovement/* ]]; then
if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then
echo "raw" && return
fi
fi
# 8. Clip dated reports
if [[ "$rel" == *Clip/* ]]; then
if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then
echo "raw" && return
fi
# Clip incident reports
[[ "$base" == *"incidentes"* ]] && echo "raw" && return
[[ "$base" == *"wordpress-update-report"* ]] && echo "raw" && return
fi
# 9. Worklogs, daily reports, ponto-situação
if echo "$base" | grep -qiE '(worklog|ponto-situacao|checkup|diario|daily)'; then
echo "raw" && return
fi
# 10. OpenDesign scratch
if [[ "$rel" == *OpenDesign/* ]] || [[ "$rel" == *open-design/* ]]; then
if echo "$base" | grep -qiE 'analise-capacidades'; then
echo "raw" && return
fi
fi
# 11. Knowledge-Base large reference files (from old import)
if [[ "$rel" == *Knowledge-Base/* ]]; then
local fsize
fsize=$(wc -c < "$f" 2>/dev/null || echo 0)
if [[ "$fsize" -gt 50000 ]]; then
echo "raw" && return
fi
fi
# --- WIKI rules (everything else) ---
# Standard wiki patterns
case "$base" in
PROC-*|QR-*|SPEC-*|*-SPEC.md|STATUS.md|CHANGELOG.md|index.md)
echo "wiki" && return ;;
esac
# Has PROC/QR/SPEC in path
[[ "$rel" == *Procedimentos/* ]] && echo "wiki" && return
[[ "$rel" == *Quick-Reference/* ]] && echo "wiki" && return
[[ "$rel" == *specs/* ]] && echo "wiki" && return
[[ "$rel" == *plans/* ]] && echo "wiki" && return
# Default to wiki
echo "wiki"
}
# --- Inject layer into frontmatter ---
inject_layer() {
local f="$1"
local layer="$2"
local rel="${f#$HUB_ROOT/}"
# Check if already has layer
if head -20 "$f" | grep -q '^layer:'; then
return
fi
if [[ "$DRY_RUN" == "true" ]]; then
echo "DRY-RUN: $rel → layer: $layer"
return
fi
# Insert layer after first line (---) if frontmatter exists
if head -1 "$f" | grep -q '^---$'; then
# Insert after line 1
sed -i "1a\\layer: $layer" "$f"
else
# No frontmatter — add layer block
local tmp
tmp=$(mktemp)
{
echo "---"
echo "layer: $layer"
echo "---"
cat "$f"
} > "$tmp"
mv "$tmp" "$f"
fi
}
# --- Main ---
RAW=0
WIKI=0
SKIPPED=0
TOTAL=0
echo "Classifying files in $HUB_ROOT..."
[[ "$DRY_RUN" == "true" ]] && echo "(DRY-RUN mode — no files modified)"
echo ""
while IFS= read -r -d '' f; do
rel="${f#$HUB_ROOT/}"
if is_excluded "$rel"; then
((SKIPPED++)) || true
continue
fi
((TOTAL++)) || true
layer="$(classify_file "$f")"
inject_layer "$f" "$layer"
case "$layer" in
raw) ((RAW++)) || true ;;
wiki) ((WIKI++)) || true ;;
esac
done < <(find "$HUB_ROOT" -name '*.md' -not -path '*/.git/*' -not -path '*/node_modules/*' -print0)
echo ""
echo "────────────────────────────────────"
echo "Total: $TOTAL | Raw: $RAW | Wiki: $WIKI | Skipped: $SKIPPED"
echo ""
[[ "$DRY_RUN" == "true" ]] && echo "(Re-run without --dry-run to apply)"