Files
scripts/scraper/monitor_extraction_batch2.sh

48 lines
2.4 KiB
Bash
Executable File

#!/bin/bash
# Monitor de Extração Batch 2 - CTF Knowledge Base
# Author: Descomplicar® Crescimento Digital
# Link: https://descomplicar.pt
# Copyright: 2025 Descomplicar®
LOG_FILE="extraction_production_batch2.log"
OUTPUT_DIR="/media/ealmeida/Dados/GDrive/Cloud/Clientes_360/CTF_Carstuff/KB/Scrapper/sites/knowledge_base_final"
echo "═══════════════════════════════════════════════════════════"
echo " MONITOR EXTRAÇÃO BATCH 2 - CTF KNOWLEDGE BASE"
echo "═══════════════════════════════════════════════════════════"
echo ""
while true; do
# Contar ficheiros extraídos
COUNT=$(ls -1 "$OUTPUT_DIR"/*.json 2>/dev/null | wc -l)
# Última linha do log
LAST_LINE=$(tail -1 "$LOG_FILE" 2>/dev/null)
# Timestamp
TIMESTAMP=$(date '+%H:%M:%S')
clear
echo "═══════════════════════════════════════════════════════════"
echo " MONITOR EXTRAÇÃO BATCH 2 - 14 SITES (3,285 ficheiros)"
echo " [$TIMESTAMP] - Atualizado a cada 30 segundos"
echo "═══════════════════════════════════════════════════════════"
echo ""
echo "📊 Casos Extraídos (JSON): $COUNT"
echo ""
echo "📈 Última Linha Log:"
echo "$LAST_LINE"
echo ""
echo "🎯 Sites em Processamento:"
echo " Batch 1: thehogring.com, forums.pelicanparts.com, thesamba.com, sailrite.com"
echo " Batch 2: relicate.com, trawlerforum.com, alfabb.com, vansairforce.net"
echo " mgexp.com, cruisersforum.com, ultrafabricsinc.com, sunbrella.com"
echo " camirafabrics.com, keystonbros.com"
echo ""
echo "═══════════════════════════════════════════════════════════"
echo "Pressiona Ctrl+C para parar o monitor"
echo "═══════════════════════════════════════════════════════════"
sleep 30
done