214 lines
6.5 KiB
Bash
Executable File
214 lines
6.5 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# okf-classify-layer.sh — atribui layer: raw|wiki a todos os .md do Hub
|
|
# Heurísticas de path + conteúdo (§6 Fase C do OKF-Compliance-Plan-Global.md)
|
|
# Uso: okf-classify-layer.sh [--dry-run]
|
|
# --dry-run mostra o que faria sem alterar ficheiros
|
|
|
|
set -euo pipefail
|
|
|
|
HUB_ROOT="/media/ealmeida/Dados/Hub"
|
|
DRY_RUN=false
|
|
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
|
|
|
|
# --- Exclusion patterns ---
|
|
is_excluded() {
|
|
local rel="$1"
|
|
[[ "$rel" == *99-Arquivo/* ]] && return 0
|
|
[[ "$rel" == *.stversions/* ]] && return 0
|
|
[[ "$rel" == *.obsidian/* ]] && return 0
|
|
[[ "$rel" == *.ijfw/* ]] && return 0
|
|
[[ "$rel" == *_templates/* ]] && return 0
|
|
[[ "$rel" == *90-Templates/* ]] && return 0
|
|
[[ "$rel" == *.git/* ]] && return 0
|
|
[[ "$rel" == *node_modules/* ]] && return 0
|
|
[[ "$rel" == *.github/* ]] && return 0
|
|
[[ "$rel" == *.wayland/* ]] && return 0
|
|
[[ "$rel" == *.hermes/* ]] && return 0
|
|
[[ "$rel" == *ijfw/* ]] && return 0
|
|
[[ "$rel" == *.sync-conflict-* ]] && return 0
|
|
local base
|
|
base="$(basename "$rel")"
|
|
[[ "$base" == "MEMORY.md" ]] && return 0
|
|
[[ "$base" == MEMORY-*.md ]] && return 0
|
|
[[ "$base" == "index.md" ]] && return 0
|
|
[[ "$base" == "log.md" ]] && return 0
|
|
[[ "$base" == "CLAUDE.md" ]] && return 0
|
|
[[ "$base" == "GEMINI.md" ]] && return 0
|
|
[[ "$base" == "AGENTS.md" ]] && return 0
|
|
[[ "$base" == "copilot-instructions.md" ]] && return 0
|
|
return 1
|
|
}
|
|
|
|
# --- Classification rules (ordered by priority, first match wins) ---
|
|
classify_file() {
|
|
local f="$1"
|
|
local rel="${f#$HUB_ROOT/}"
|
|
local base
|
|
base="$(basename "$rel")"
|
|
local desc_len=0
|
|
|
|
# --- RAW rules (signals: machine-generated, regenerable, dumps) ---
|
|
|
|
# 1. YouTube transcript dumps
|
|
[[ "$rel" == *KB/MDs/YouTube/* ]] && echo "raw" && return
|
|
|
|
# 2. Deep research in bulk
|
|
[[ "$rel" == *deep-research* ]] && echo "raw" && return
|
|
|
|
# 3. Raw research data (FinalKintsugi/research/)
|
|
[[ "$rel" == *FinalKintsugi/research/* ]] && echo "raw" && return
|
|
|
|
# 4. YouTube transcript dumps (Knowledge-Base/MDs/YouTube/)
|
|
[[ "$rel" == *Knowledge-Base/MDs/YouTube/* ]] && echo "raw" && return
|
|
|
|
# 5. Inteligência Competitiva daily reports (dated pattern)
|
|
if [[ "$rel" == *Inteligencia/* ]]; then
|
|
# Dated IC reports
|
|
if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then
|
|
echo "raw" && return
|
|
fi
|
|
[[ "$base" == *"deep-research"* ]] && echo "raw" && return
|
|
[[ "$base" == *"Fontes"*"Canal"* ]] && echo "raw" && return
|
|
fi
|
|
|
|
# 5. SistemaBackups dated reports
|
|
if [[ "$rel" == *SistemaBackups/* ]]; then
|
|
if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then
|
|
echo "raw" && return
|
|
fi
|
|
# Audit/relatório reports
|
|
if echo "$base" | grep -qiE '^(AUDITORIA|RELATORIO|TRIANGULACAO|SINCRONIZACAO)-'; then
|
|
echo "raw" && return
|
|
fi
|
|
fi
|
|
|
|
# 6. Observabilidade reports
|
|
if [[ "$rel" == *Observabilidade/* ]]; then
|
|
if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then
|
|
echo "raw" && return
|
|
fi
|
|
fi
|
|
|
|
# 7. SelfImprovement dated docs
|
|
if [[ "$rel" == *SelfImprovement/* ]]; then
|
|
if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then
|
|
echo "raw" && return
|
|
fi
|
|
fi
|
|
|
|
# 8. Clip dated reports
|
|
if [[ "$rel" == *Clip/* ]]; then
|
|
if echo "$base" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[-_]'; then
|
|
echo "raw" && return
|
|
fi
|
|
# Clip incident reports
|
|
[[ "$base" == *"incidentes"* ]] && echo "raw" && return
|
|
[[ "$base" == *"wordpress-update-report"* ]] && echo "raw" && return
|
|
fi
|
|
|
|
# 9. Worklogs, daily reports, ponto-situação
|
|
if echo "$base" | grep -qiE '(worklog|ponto-situacao|checkup|diario|daily)'; then
|
|
echo "raw" && return
|
|
fi
|
|
|
|
# 10. OpenDesign scratch
|
|
if [[ "$rel" == *OpenDesign/* ]] || [[ "$rel" == *open-design/* ]]; then
|
|
if echo "$base" | grep -qiE 'analise-capacidades'; then
|
|
echo "raw" && return
|
|
fi
|
|
fi
|
|
|
|
# 11. Knowledge-Base large reference files (from old import)
|
|
if [[ "$rel" == *Knowledge-Base/* ]]; then
|
|
local fsize
|
|
fsize=$(wc -c < "$f" 2>/dev/null || echo 0)
|
|
if [[ "$fsize" -gt 50000 ]]; then
|
|
echo "raw" && return
|
|
fi
|
|
fi
|
|
|
|
# --- WIKI rules (everything else) ---
|
|
|
|
# Standard wiki patterns
|
|
case "$base" in
|
|
PROC-*|QR-*|SPEC-*|*-SPEC.md|STATUS.md|CHANGELOG.md|index.md)
|
|
echo "wiki" && return ;;
|
|
esac
|
|
|
|
# Has PROC/QR/SPEC in path
|
|
[[ "$rel" == *Procedimentos/* ]] && echo "wiki" && return
|
|
[[ "$rel" == *Quick-Reference/* ]] && echo "wiki" && return
|
|
[[ "$rel" == *specs/* ]] && echo "wiki" && return
|
|
[[ "$rel" == *plans/* ]] && echo "wiki" && return
|
|
|
|
# Default to wiki
|
|
echo "wiki"
|
|
}
|
|
|
|
# --- Inject layer into frontmatter ---
|
|
inject_layer() {
|
|
local f="$1"
|
|
local layer="$2"
|
|
local rel="${f#$HUB_ROOT/}"
|
|
|
|
# Check if already has layer
|
|
if head -20 "$f" | grep -q '^layer:'; then
|
|
return
|
|
fi
|
|
|
|
if [[ "$DRY_RUN" == "true" ]]; then
|
|
echo "DRY-RUN: $rel → layer: $layer"
|
|
return
|
|
fi
|
|
|
|
# Insert layer after first line (---) if frontmatter exists
|
|
if head -1 "$f" | grep -q '^---$'; then
|
|
# Insert after line 1
|
|
sed -i "1a\\layer: $layer" "$f"
|
|
else
|
|
# No frontmatter — add layer block
|
|
local tmp
|
|
tmp=$(mktemp)
|
|
{
|
|
echo "---"
|
|
echo "layer: $layer"
|
|
echo "---"
|
|
cat "$f"
|
|
} > "$tmp"
|
|
mv "$tmp" "$f"
|
|
fi
|
|
}
|
|
|
|
# --- Main ---
|
|
RAW=0
|
|
WIKI=0
|
|
SKIPPED=0
|
|
TOTAL=0
|
|
|
|
echo "Classifying files in $HUB_ROOT..."
|
|
[[ "$DRY_RUN" == "true" ]] && echo "(DRY-RUN mode — no files modified)"
|
|
echo ""
|
|
|
|
while IFS= read -r -d '' f; do
|
|
rel="${f#$HUB_ROOT/}"
|
|
if is_excluded "$rel"; then
|
|
((SKIPPED++)) || true
|
|
continue
|
|
fi
|
|
((TOTAL++)) || true
|
|
|
|
layer="$(classify_file "$f")"
|
|
inject_layer "$f" "$layer"
|
|
|
|
case "$layer" in
|
|
raw) ((RAW++)) || true ;;
|
|
wiki) ((WIKI++)) || true ;;
|
|
esac
|
|
done < <(find "$HUB_ROOT" -name '*.md' -not -path '*/.git/*' -not -path '*/node_modules/*' -print0)
|
|
|
|
echo ""
|
|
echo "────────────────────────────────────"
|
|
echo "Total: $TOTAL | Raw: $RAW | Wiki: $WIKI | Skipped: $SKIPPED"
|
|
echo ""
|
|
[[ "$DRY_RUN" == "true" ]] && echo "(Re-run without --dry-run to apply)"
|