feat: scripts de projectos vindos do Hub (podcast, alojadamaria, clip, ocr, etc.)

Movidos do vault Hub para centralizar scripts. Hub mantem symlinks.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-28 20:53:29 +01:00
parent e810bbb114
commit 6035542b67
27 changed files with 4246 additions and 0 deletions
+141
View File
@@ -0,0 +1,141 @@
#!/usr/bin/env python3
"""
Captura screenshots de alojadamaria.com para auditoria visual SEO/UX
"""
from playwright.sync_api import sync_playwright
import json
import time
import os
BASE_URL = "https://alojadamaria.com/"
OUTPUT_DIR = "/media/ealmeida/Dados/Hub/03-Propostas/ALojaDaMaria/screenshots/alojadamaria"
os.makedirs(OUTPUT_DIR, exist_ok=True)
VIEWPORTS = {
"desktop": {"width": 1440, "height": 900},
"mobile": {"width": 375, "height": 812},
}
PAGES = {
"homepage": BASE_URL,
"categoria": BASE_URL + "product-category/novidades/",
"contacto": BASE_URL + "contactos/",
}
def capturar(page, url, nome, viewport):
"""Captura acima da dobra e página completa"""
print(f" -> A capturar: {nome} ({viewport['width']}x{viewport['height']})")
try:
page.goto(url, wait_until="networkidle", timeout=30000)
time.sleep(2)
# Fechar pop-ups comuns (cookie consent, newsletter)
for selector in [
"button[class*='close']",
"button[class*='dismiss']",
"[class*='cookie'] button",
"[id*='cookie'] button",
"[class*='popup-close']",
".pum-close",
"button[aria-label*='Close']",
"button[aria-label*='close']",
]:
try:
el = page.query_selector(selector)
if el and el.is_visible():
el.click()
time.sleep(0.5)
except Exception:
pass
# Above the fold (viewport apenas)
page.screenshot(
path=f"{OUTPUT_DIR}/{nome}_atf.png",
full_page=False,
clip={"x": 0, "y": 0, "width": viewport["width"], "height": viewport["height"]},
)
# Página completa
page.screenshot(
path=f"{OUTPUT_DIR}/{nome}_full.png",
full_page=True,
)
# Recolher metadados
title = page.title()
h1_els = page.query_selector_all("h1")
h1_texts = [el.inner_text().strip() for el in h1_els if el.is_visible()]
nav_visible = bool(page.query_selector("nav, [class*='nav'], [class*='menu']"))
ctas = []
for sel in ["a[class*='btn'], a[class*='button'], button[class*='btn'], .add-to-cart, [class*='cta']"]:
els = page.query_selector_all(sel)
for el in els[:5]:
try:
if el.is_visible():
ctas.append(el.inner_text().strip()[:50])
except Exception:
pass
popup_visible = bool(page.query_selector(".pum-overlay, [class*='popup'][style*='display: block'], [class*='modal'][style*='display: block']"))
# Dimensões do logo
logo = page.query_selector("img[class*='logo'], a[class*='logo'] img, header img, .site-logo img")
logo_info = None
if logo:
try:
bb = logo.bounding_box()
logo_info = bb
except Exception:
pass
return {
"url": url,
"title": title,
"h1": h1_texts,
"nav_visible": nav_visible,
"ctas_sample": ctas[:8],
"popup_detected": popup_visible,
"logo_bounding_box": logo_info,
}
except Exception as e:
print(f" ERRO: {e}")
return {"error": str(e)}
def main():
resultados = {}
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
for device_name, viewport in VIEWPORTS.items():
print(f"\n[{device_name.upper()}] {viewport['width']}x{viewport['height']}")
context = browser.new_context(
viewport=viewport,
user_agent="Mozilla/5.0 (compatible; AuditBot/1.0)",
locale="pt-PT",
)
page = context.new_page()
for page_name, url in PAGES.items():
chave = f"{device_name}_{page_name}"
print(f" Página: {page_name}")
dados = capturar(page, url, chave, viewport)
resultados[chave] = dados
context.close()
browser.close()
with open(f"{OUTPUT_DIR}/metadados.json", "w", encoding="utf-8") as f:
json.dump(resultados, f, ensure_ascii=False, indent=2)
print("\nCaptura concluída. Ficheiros em:", OUTPUT_DIR)
return resultados
if __name__ == "__main__":
main()
+122
View File
@@ -0,0 +1,122 @@
#!/usr/bin/env python3
"""
Captura detalhes adicionais: hero CTA, produto, footer, barra anúncio
"""
from playwright.sync_api import sync_playwright
import time
OUTPUT_DIR = "/media/ealmeida/Dados/Hub/03-Propostas/ALojaDaMaria/screenshots/alojadamaria"
BASE_URL = "https://alojadamaria.com/"
def crop(page, path, clip):
page.screenshot(path=path, clip=clip, full_page=False)
print(f" Guardado: {path}")
def main():
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
# --- Desktop 1440px ---
ctx = browser.new_context(viewport={"width": 1440, "height": 900}, locale="pt-PT")
page = ctx.new_page()
page.goto(BASE_URL, wait_until="networkidle", timeout=40000)
time.sleep(2)
# Hero completo com CTA visível
page.screenshot(path=f"{OUTPUT_DIR}/desktop_hero_zoom.png",
clip={"x": 0, "y": 0, "width": 1440, "height": 600})
print(" Hero desktop guardado")
# Header/nav
page.screenshot(path=f"{OUTPUT_DIR}/desktop_header.png",
clip={"x": 0, "y": 0, "width": 1440, "height": 80})
print(" Header desktop guardado")
# Barra topo (announcement bar)
page.screenshot(path=f"{OUTPUT_DIR}/desktop_announcebar.png",
clip={"x": 0, "y": 0, "width": 1440, "height": 35})
print(" Barra anúncio guardada")
# Produtos (scroll para secção)
page.evaluate("window.scrollTo(0, 700)")
time.sleep(1)
page.screenshot(path=f"{OUTPUT_DIR}/desktop_produtos.png",
clip={"x": 0, "y": 0, "width": 1440, "height": 900})
print(" Produtos desktop guardados")
# Footer
page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(1)
page.screenshot(path=f"{OUTPUT_DIR}/desktop_footer.png",
clip={"x": 0, "y": 0, "width": 1440, "height": 900})
print(" Footer desktop guardado")
ctx.close()
# --- Mobile 375px ---
ctx_m = browser.new_context(viewport={"width": 375, "height": 812}, locale="pt-PT")
page_m = ctx_m.new_page()
page_m.goto(BASE_URL, wait_until="networkidle", timeout=40000)
time.sleep(2)
# Header mobile
page_m.screenshot(path=f"{OUTPUT_DIR}/mobile_header.png",
clip={"x": 0, "y": 0, "width": 375, "height": 120})
print(" Header mobile guardado")
# Hero mobile
page_m.screenshot(path=f"{OUTPUT_DIR}/mobile_hero.png",
clip={"x": 0, "y": 0, "width": 375, "height": 500})
print(" Hero mobile guardado")
# Produtos mobile
page_m.evaluate("window.scrollTo(0, 500)")
time.sleep(1)
page_m.screenshot(path=f"{OUTPUT_DIR}/mobile_produtos.png",
clip={"x": 0, "y": 0, "width": 375, "height": 812})
print(" Produtos mobile guardados")
# Footer mobile
page_m.evaluate("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(1)
page_m.screenshot(path=f"{OUTPUT_DIR}/mobile_footer.png",
clip={"x": 0, "y": 0, "width": 375, "height": 812})
print(" Footer mobile guardado")
# Tentar obter URL de produto real
links = page_m.query_selector_all("a[href*='product']")
product_url = None
for l in links:
href = l.get_attribute("href")
if href and "product-category" not in href and "alojadamaria.com/product" in href:
product_url = href
break
if product_url:
print(f"\n URL produto encontrado: {product_url}")
page_m.goto(product_url, wait_until="networkidle", timeout=30000)
time.sleep(2)
page_m.screenshot(path=f"{OUTPUT_DIR}/mobile_produto_detalhe_atf.png",
full_page=False)
page_m.screenshot(path=f"{OUTPUT_DIR}/mobile_produto_detalhe_full.png",
full_page=True)
print(" Produto detalhe mobile guardado")
# Desktop produto
ctx_d2 = browser.new_context(viewport={"width": 1440, "height": 900}, locale="pt-PT")
page_d2 = ctx_d2.new_page()
page_d2.goto(product_url, wait_until="networkidle", timeout=30000)
time.sleep(2)
page_d2.screenshot(path=f"{OUTPUT_DIR}/desktop_produto_detalhe_atf.png",
full_page=False)
page_d2.screenshot(path=f"{OUTPUT_DIR}/desktop_produto_detalhe_full.png",
full_page=True)
print(" Produto detalhe desktop guardado")
ctx_d2.close()
ctx_m.close()
browser.close()
print("\nCapturas extra concluídas.")
if __name__ == "__main__":
main()
+246
View File
@@ -0,0 +1,246 @@
#!/usr/bin/env python3
"""
Script de captura e análise visual SEO para descomplicar.pt
Analisa: capturas desktop/mobile, above-the-fold, imagens, CTAs
"""
import json
import re
from playwright.sync_api import sync_playwright
URL = "https://descomplicar.pt"
SCREENSHOTS_DIR = "/media/ealmeida/Dados/Hub/03-Propostas/ALojaDaMaria/screenshots"
VIEWPORTS = {
"desktop": {"width": 1920, "height": 1080},
"laptop": {"width": 1366, "height": 768},
"tablet": {"width": 768, "height": 1024},
"mobile": {"width": 375, "height": 812},
}
def capture(url, output_path, viewport_width=1920, viewport_height=1080):
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page(viewport={"width": viewport_width, "height": viewport_height})
page.goto(url, wait_until="networkidle", timeout=30000)
page.screenshot(path=output_path, full_page=False)
browser.close()
def analyse_page(url):
results = {}
with sync_playwright() as p:
browser = p.chromium.launch()
# --- Desktop 1920x1080 ---
page = browser.new_page(viewport=VIEWPORTS["desktop"])
page.goto(url, wait_until="networkidle", timeout=30000)
page.screenshot(
path=f"{SCREENSHOTS_DIR}/desktop_1920.png", full_page=False
)
page.screenshot(
path=f"{SCREENSHOTS_DIR}/desktop_1920_full.png", full_page=True
)
# Dados above-the-fold (desktop)
atf = page.evaluate("""() => {
const vw = window.innerWidth;
const vh = window.innerHeight;
// H1
const h1s = Array.from(document.querySelectorAll('h1'));
const h1Visible = h1s.filter(el => {
const r = el.getBoundingClientRect();
return r.top >= 0 && r.bottom <= vh && r.width > 0;
});
// CTAs (botões e links com texto de acção)
const ctaKeywords = /contacto|falar|orçamento|começar|saber mais|ver mais|agendar|demo|serviços|get started|contact/i;
const allBtns = Array.from(document.querySelectorAll('a, button'));
const ctasAtf = allBtns.filter(el => {
const r = el.getBoundingClientRect();
return r.top >= 0 && r.bottom <= vh && r.width > 0 && ctaKeywords.test(el.textContent);
}).map(el => ({text: el.textContent.trim().substring(0,60), tag: el.tagName, top: Math.round(el.getBoundingClientRect().top)}));
// Value proposition (primeiro parágrafo/subtítulo visível)
const textEls = Array.from(document.querySelectorAll('h2, h3, p, .subtitle, .hero-text, [class*="hero"] p, [class*="tagline"]'));
const vpEl = textEls.find(el => {
const r = el.getBoundingClientRect();
return r.top >= 0 && r.bottom <= vh && el.textContent.trim().length > 30;
});
// Sinais de confiança (logos, testimonials, reviews)
const trustSelectors = '[class*="client"], [class*="partner"], [class*="logo"], [class*="review"], [class*="testim"], [class*="trust"], .stars, [class*="rating"]';
const trustEls = Array.from(document.querySelectorAll(trustSelectors));
const trustAtf = trustEls.filter(el => {
const r = el.getBoundingClientRect();
return r.top >= 0 && r.bottom <= vh && r.width > 0;
}).length;
return {
viewport: {width: vw, height: vh},
h1Count: h1s.length,
h1Texts: h1s.map(el => ({text: el.textContent.trim().substring(0,100), visible: h1Visible.includes(el)})),
h1AboveFold: h1Visible.length,
ctasAboveFold: ctasAtf,
valueProposition: vpEl ? vpEl.textContent.trim().substring(0,200) : null,
trustSignalsAboveFold: trustAtf,
};
}""")
# Análise de imagens
images = page.evaluate("""() => {
return Array.from(document.querySelectorAll('img')).map(img => ({
src: img.src.substring(0, 120),
alt: img.alt,
hasAlt: img.alt.trim().length > 0,
loading: img.loading,
width: img.width,
height: img.height,
hasWidthAttr: img.hasAttribute('width'),
hasHeightAttr: img.hasAttribute('height'),
isWebP: img.src.includes('.webp'),
isAvif: img.src.includes('.avif'),
naturalWidth: img.naturalWidth,
naturalHeight: img.naturalHeight,
rect: (() => { const r = img.getBoundingClientRect(); return {top: Math.round(r.top), visible: r.width > 0}; })()
}));
}""")
# Dados de meta SEO
meta_seo = page.evaluate("""() => {
const getMeta = (name) => {
const el = document.querySelector(`meta[name="${name}"], meta[property="${name}"]`);
return el ? el.getAttribute('content') : null;
};
return {
title: document.title,
metaDescription: getMeta('description'),
ogTitle: getMeta('og:title'),
ogDescription: getMeta('og:description'),
ogImage: getMeta('og:image'),
canonical: (() => { const l = document.querySelector('link[rel="canonical"]'); return l ? l.href : null; })(),
lang: document.documentElement.lang,
h2Count: document.querySelectorAll('h2').length,
h3Count: document.querySelectorAll('h3').length,
};
}""")
# Desempenho básico (recursos)
perf = page.evaluate("""() => {
const entries = performance.getEntriesByType('resource');
const imgs = entries.filter(e => e.initiatorType === 'img');
const scripts = entries.filter(e => e.initiatorType === 'script');
const styles = entries.filter(e => e.initiatorType === 'link' || e.initiatorType === 'css');
return {
totalResources: entries.length,
imgCount: imgs.length,
scriptCount: scripts.length,
styleCount: styles.length,
};
}""")
results["desktop_atf"] = atf
results["images"] = images
results["meta_seo"] = meta_seo
results["perf"] = perf
# --- Mobile 375x812 ---
mobile_page = browser.new_page(
viewport=VIEWPORTS["mobile"],
user_agent="Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1"
)
mobile_page.goto(url, wait_until="networkidle", timeout=30000)
mobile_page.screenshot(
path=f"{SCREENSHOTS_DIR}/mobile_375.png", full_page=False
)
mobile_page.screenshot(
path=f"{SCREENSHOTS_DIR}/mobile_375_full.png", full_page=True
)
mobile_checks = mobile_page.evaluate("""() => {
const vw = window.innerWidth;
const vh = window.innerHeight;
const docWidth = document.documentElement.scrollWidth;
// Verificar overflow horizontal
const hasHorizontalScroll = docWidth > vw;
// Navegação móvel
const nav = document.querySelector('nav, [class*="nav"], [class*="menu"], header');
const navVisible = nav ? nav.getBoundingClientRect().width > 0 : false;
const hamburger = document.querySelector('[class*="hamburger"], [class*="toggle"], [class*="burger"], .menu-icon, [aria-label*="menu"], [aria-label*="Menu"]');
// Tamanho dos tap targets (mínimo 48x48px)
const allTapTargets = Array.from(document.querySelectorAll('a, button, input, select, textarea'));
const smallTargets = allTapTargets.filter(el => {
const r = el.getBoundingClientRect();
return r.width > 0 && r.height > 0 && (r.width < 44 || r.height < 44);
}).slice(0, 10).map(el => ({
tag: el.tagName,
text: el.textContent.trim().substring(0, 40),
w: Math.round(el.getBoundingClientRect().width),
h: Math.round(el.getBoundingClientRect().height)
}));
// Tamanho de fonte base
const bodyFontSize = parseFloat(window.getComputedStyle(document.body).fontSize);
// H1 visível no mobile
const h1s = Array.from(document.querySelectorAll('h1'));
const h1MobileVisible = h1s.filter(el => {
const r = el.getBoundingClientRect();
return r.top >= 0 && r.bottom <= vh && r.width > 0;
});
// CTAs mobile
const ctaKeywords = /contacto|falar|orçamento|começar|saber mais|ver mais|agendar|demo|serviços/i;
const ctasMobile = Array.from(document.querySelectorAll('a, button')).filter(el => {
const r = el.getBoundingClientRect();
return r.top >= 0 && r.bottom <= vh && r.width > 0 && ctaKeywords.test(el.textContent);
}).map(el => ({text: el.textContent.trim().substring(0,50), w: Math.round(el.getBoundingClientRect().width), h: Math.round(el.getBoundingClientRect().height)}));
return {
viewport: {width: vw, height: vh},
documentWidth: docWidth,
hasHorizontalScroll,
navVisible,
hasHamburger: !!hamburger,
hamburgerClass: hamburger ? hamburger.className.substring(0,60) : null,
smallTapTargets: smallTargets,
smallTapTargetCount: smallTargets.length,
bodyFontSize,
h1AboveFoldMobile: h1MobileVisible.length,
h1TextMobile: h1MobileVisible[0] ? h1MobileVisible[0].textContent.trim().substring(0,100) : null,
ctasMobileAtf: ctasMobile,
};
}""")
results["mobile"] = mobile_checks
# --- Laptop 1366x768 ---
laptop_page = browser.new_page(viewport=VIEWPORTS["laptop"])
laptop_page.goto(url, wait_until="networkidle", timeout=30000)
laptop_page.screenshot(
path=f"{SCREENSHOTS_DIR}/laptop_1366.png", full_page=False
)
browser.close()
return results
if __name__ == "__main__":
print("A capturar screenshots e analisar descomplicar.pt...")
data = analyse_page(URL)
output_file = f"{SCREENSHOTS_DIR}/analysis_data.json"
with open(output_file, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f"Análise concluída. Dados guardados em: {output_file}")
print(f"Screenshots em: {SCREENSHOTS_DIR}/")
print("\n--- RESUMO ---")
print(json.dumps(data, ensure_ascii=False, indent=2))
+11
View File
@@ -0,0 +1,11 @@
#!/bin/bash
PERFEX_URL="https://desk.descomplicar.pt"
PERFEX_API_KEY="eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyIjoibWNwIiwibmFtZSI6Im1jcCIsIkFQSV9USU1FIjoxNzQxOTY1MDQ3fQ.hNv_dMzijjbNTI9-wVxsHXUm-K8ckGN5v4f9Kgk-dPc"
CLAUDE_LOG_DIR="/home/ealmeida/.logs/claude-agent"
TASK_JSON=$1
TASK_ID=$(echo $TASK_JSON | jq -r ".task_id // .id")
TASK_NAME=$(echo $TASK_JSON | jq -r ".task_name // .name")
LOG_FILE="$CLAUDE_LOG_DIR/task-$TASK_ID-$(date +%Y%m%d).log"
echo "[$(date)] INÍCIO — Tarefa #$TASK_ID: $TASK_NAME" >> "$LOG_FILE"
curl -s -X PUT -H "authtoken: $PERFEX_API_KEY" -d "status=4" "$PERFEX_URL/api/v1/tasks/$TASK_ID" >> "$LOG_FILE" 2>&1
claude -p "És o AIkTop. Resolve a tarefa #$TASK_ID. No final, usa MCP para marcar status 5." --allowedTools Read,Edit,Write,Bash,Command --max-turns 20 --yes >> "$LOG_FILE" 2>&1
+129
View File
@@ -0,0 +1,129 @@
import json
import re
from datetime import datetime
import os
# Helper to get issues from MCP tool output, handling truncation info
def parse_mcp_output(mcp_output_string):
json_start_index = mcp_output_string.find("{")
if json_start_index == -1:
return None, "Error: No JSON content found in tool output."
clean_content = mcp_output_string[json_start_index:]
try:
parsed_content = json.loads(clean_content)
if "Result" in parsed_content:
return parsed_content["Result"], None
elif isinstance(parsed_content, list):
return parsed_content, None
else:
return None, f"Error: Unexpected JSON structure after cleaning: {clean_content[:200]}..."
except json.JSONDecodeError as e:
return None, f"Error: Could not parse JSON content after cleaning: {e} - {clean_content[:200]}..."
# Function to fetch all issues using pagination and save them to a file
def fetch_all_issues_and_save(owner, repo, state, file_path, page_size=100):
all_issues = []
page = 1
while True:
# Simulate calling mcp_gitea_list_repo_issues
# In a real scenario, this would be a direct call to the MCP tool
# For this script, we assume this function will be called with a placeholder for the actual MCP tool output
# since direct MCP tool calls are not possible within this embedded script context.
# This function needs to be invoked in a way that allows external MCP calls.
# --- THIS PART NEEDS TO BE EXECUTED OUTSIDE THIS SCRIPT OR BY A TOOL THAT CAN CALL MCP ---
# For now, this script will only process an already existing file.
# The external loop will call mcp_gitea_list_repo_issues and write the combined output to file_path
print(f"DEBUG: Placeholder for fetching page {page} from {owner}/{repo}")
break # Break as we cannot truly paginate from within this isolated script
# This part assumes file_path already contains the FULL JSON from all pages
if not os.path.exists(file_path):
print(f"Error: Issue data file not found at {file_path}. Please ensure it is created with full data.")
return
with open(file_path, "r") as f:
full_issues_content = f.read()
issues_data, error = parse_mcp_output(full_issues_content)
if error:
print(error)
return
return issues_data
def find_first_unhandled_original_issue(issues_data):
if not issues_data:
return None
delegation_prefixes = [
"[Dir. Automação]",
"[Dir. Desenvolvimento]",
"[Dir. Infraestrutura]",
"[COO]",
"[Improvement Evaluator]"
]
issues_data.sort(key=lambda x: datetime.strptime(x["created_at"], "%Y-%m-%dT%H:%M:%SZ")) # Sort by creation date in ascending order
for issue in issues_data:
if issue["state"] == "closed":
continue
is_delegated_by_prefix = False
for prefix in delegation_prefixes:
if issue["title"].startswith(prefix):
is_delegated_by_prefix = True
break
if is_delegated_by_prefix:
continue
# Check for delegation comments or if comments exist for n8n workflow issues (implying delegation)
# This check is a simplification and might need to fetch comments for accurate check
if issue["comments"] > 0 and ("Tarefa delegada ao Dir." in issue["body"] or "n8n Workflow" in issue["title"]):
continue
# If we reach here, it's an open, non-delegated, original issue
return {
"number": issue["number"],
"title": issue["title"],
"body": issue["body"]
}
return None # No unhandled original issues found.
# Main execution flow
temp_file_path = "open_issues.json"
owner = "ealmeida"
repo = "mcp-paperclip"
state = "open"
# This part needs to be handled externally to call MCP tools iteratively
# For now, let's just process the existing open_issues.json
# issues_data = fetch_all_issues_and_save(owner, repo, state, temp_file_path)
# Instead, read the pre-existing full JSON data
if not os.path.exists(temp_file_path):
print(f"Error: Issue data file not found at {temp_file_path}. Please create it manually with full data.")
exit(1)
with open(temp_file_path, "r") as f:
full_issues_content = f.read()
issues_data, error = parse_mcp_output(full_issues_content)
if error:
print(error)
exit(1)
unhandled_issue = find_first_unhandled_original_issue(issues_data)
if unhandled_issue:
print(f"Oldest unhandled original issue found:")
print(f"Issue Number: {unhandled_issue["number"]}")
print(f"Issue Title: {unhandled_issue["title"]}")
print(f"Issue Body: {unhandled_issue["body"]}")
else:
print("No unhandled original issues found in the provided data.")
+4
View File
@@ -0,0 +1,4 @@
#!/bin/bash
echo \"$(date): Validating instructionsFilePath...\"
PGPASSWORD=paperclip psql -h localhost -p 54329 -U paperclip -d paperclip -c \"SELECT name, COALESCE(adapter_config->>'instructionsFilePath', 'none') as path, status FROM agents WHERE adapter_config ? 'instructionsFilePath' ORDER BY name;\" | while IFS='|' read name path status; do name=\$(echo $name | xargs); path=\$(echo $path | xargs); if [[ \"$path\" != 'none' ]] &amp;&amp; [ -f \"$path\" ]; then echo \"OK: $name ($status) -> $path\"; else echo \"MISSING: $name ($status) -> $path\"; fi; done
echo \"---\"
+151
View File
@@ -0,0 +1,151 @@
"""Lightweight connection handling for MCP servers."""
from abc import ABC, abstractmethod
from contextlib import AsyncExitStack
from typing import Any
from mcp import ClientSession, StdioServerParameters
from mcp.client.sse import sse_client
from mcp.client.stdio import stdio_client
from mcp.client.streamable_http import streamablehttp_client
class MCPConnection(ABC):
"""Base class for MCP server connections."""
def __init__(self):
self.session = None
self._stack = None
@abstractmethod
def _create_context(self):
"""Create the connection context based on connection type."""
async def __aenter__(self):
"""Initialize MCP server connection."""
self._stack = AsyncExitStack()
await self._stack.__aenter__()
try:
ctx = self._create_context()
result = await self._stack.enter_async_context(ctx)
if len(result) == 2:
read, write = result
elif len(result) == 3:
read, write, _ = result
else:
raise ValueError(f"Unexpected context result: {result}")
session_ctx = ClientSession(read, write)
self.session = await self._stack.enter_async_context(session_ctx)
await self.session.initialize()
return self
except BaseException:
await self._stack.__aexit__(None, None, None)
raise
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Clean up MCP server connection resources."""
if self._stack:
await self._stack.__aexit__(exc_type, exc_val, exc_tb)
self.session = None
self._stack = None
async def list_tools(self) -> list[dict[str, Any]]:
"""Retrieve available tools from the MCP server."""
response = await self.session.list_tools()
return [
{
"name": tool.name,
"description": tool.description,
"input_schema": tool.inputSchema,
}
for tool in response.tools
]
async def call_tool(self, tool_name: str, arguments: dict[str, Any]) -> Any:
"""Call a tool on the MCP server with provided arguments."""
result = await self.session.call_tool(tool_name, arguments=arguments)
return result.content
class MCPConnectionStdio(MCPConnection):
"""MCP connection using standard input/output."""
def __init__(self, command: str, args: list[str] = None, env: dict[str, str] = None):
super().__init__()
self.command = command
self.args = args or []
self.env = env
def _create_context(self):
return stdio_client(
StdioServerParameters(command=self.command, args=self.args, env=self.env)
)
class MCPConnectionSSE(MCPConnection):
"""MCP connection using Server-Sent Events."""
def __init__(self, url: str, headers: dict[str, str] = None):
super().__init__()
self.url = url
self.headers = headers or {}
def _create_context(self):
return sse_client(url=self.url, headers=self.headers)
class MCPConnectionHTTP(MCPConnection):
"""MCP connection using Streamable HTTP."""
def __init__(self, url: str, headers: dict[str, str] = None):
super().__init__()
self.url = url
self.headers = headers or {}
def _create_context(self):
return streamablehttp_client(url=self.url, headers=self.headers)
def create_connection(
transport: str,
command: str = None,
args: list[str] = None,
env: dict[str, str] = None,
url: str = None,
headers: dict[str, str] = None,
) -> MCPConnection:
"""Factory function to create the appropriate MCP connection.
Args:
transport: Connection type ("stdio", "sse", or "http")
command: Command to run (stdio only)
args: Command arguments (stdio only)
env: Environment variables (stdio only)
url: Server URL (sse and http only)
headers: HTTP headers (sse and http only)
Returns:
MCPConnection instance
"""
transport = transport.lower()
if transport == "stdio":
if not command:
raise ValueError("Command is required for stdio transport")
return MCPConnectionStdio(command=command, args=args, env=env)
elif transport == "sse":
if not url:
raise ValueError("URL is required for sse transport")
return MCPConnectionSSE(url=url, headers=headers)
elif transport in ["http", "streamable_http", "streamable-http"]:
if not url:
raise ValueError("URL is required for http transport")
return MCPConnectionHTTP(url=url, headers=headers)
else:
raise ValueError(f"Unsupported transport type: {transport}. Use 'stdio', 'sse', or 'http'")
+373
View File
@@ -0,0 +1,373 @@
"""MCP Server Evaluation Harness
This script evaluates MCP servers by running test questions against them using Claude.
"""
import argparse
import asyncio
import json
import re
import sys
import time
import traceback
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Any
from anthropic import Anthropic
from connections import create_connection
EVALUATION_PROMPT = """You are an AI assistant with access to tools.
When given a task, you MUST:
1. Use the available tools to complete the task
2. Provide summary of each step in your approach, wrapped in <summary> tags
3. Provide feedback on the tools provided, wrapped in <feedback> tags
4. Provide your final response, wrapped in <response> tags
Summary Requirements:
- In your <summary> tags, you must explain:
- The steps you took to complete the task
- Which tools you used, in what order, and why
- The inputs you provided to each tool
- The outputs you received from each tool
- A summary for how you arrived at the response
Feedback Requirements:
- In your <feedback> tags, provide constructive feedback on the tools:
- Comment on tool names: Are they clear and descriptive?
- Comment on input parameters: Are they well-documented? Are required vs optional parameters clear?
- Comment on descriptions: Do they accurately describe what the tool does?
- Comment on any errors encountered during tool usage: Did the tool fail to execute? Did the tool return too many tokens?
- Identify specific areas for improvement and explain WHY they would help
- Be specific and actionable in your suggestions
Response Requirements:
- Your response should be concise and directly address what was asked
- Always wrap your final response in <response> tags
- If you cannot solve the task return <response>NOT_FOUND</response>
- For numeric responses, provide just the number
- For IDs, provide just the ID
- For names or text, provide the exact text requested
- Your response should go last"""
def parse_evaluation_file(file_path: Path) -> list[dict[str, Any]]:
"""Parse XML evaluation file with qa_pair elements."""
try:
tree = ET.parse(file_path)
root = tree.getroot()
evaluations = []
for qa_pair in root.findall(".//qa_pair"):
question_elem = qa_pair.find("question")
answer_elem = qa_pair.find("answer")
if question_elem is not None and answer_elem is not None:
evaluations.append({
"question": (question_elem.text or "").strip(),
"answer": (answer_elem.text or "").strip(),
})
return evaluations
except Exception as e:
print(f"Error parsing evaluation file {file_path}: {e}")
return []
def extract_xml_content(text: str, tag: str) -> str | None:
"""Extract content from XML tags."""
pattern = rf"<{tag}>(.*?)</{tag}>"
matches = re.findall(pattern, text, re.DOTALL)
return matches[-1].strip() if matches else None
async def agent_loop(
client: Anthropic,
model: str,
question: str,
tools: list[dict[str, Any]],
connection: Any,
) -> tuple[str, dict[str, Any]]:
"""Run the agent loop with MCP tools."""
messages = [{"role": "user", "content": question}]
response = await asyncio.to_thread(
client.messages.create,
model=model,
max_tokens=4096,
system=EVALUATION_PROMPT,
messages=messages,
tools=tools,
)
messages.append({"role": "assistant", "content": response.content})
tool_metrics = {}
while response.stop_reason == "tool_use":
tool_use = next(block for block in response.content if block.type == "tool_use")
tool_name = tool_use.name
tool_input = tool_use.input
tool_start_ts = time.time()
try:
tool_result = await connection.call_tool(tool_name, tool_input)
tool_response = json.dumps(tool_result) if isinstance(tool_result, (dict, list)) else str(tool_result)
except Exception as e:
tool_response = f"Error executing tool {tool_name}: {str(e)}\n"
tool_response += traceback.format_exc()
tool_duration = time.time() - tool_start_ts
if tool_name not in tool_metrics:
tool_metrics[tool_name] = {"count": 0, "durations": []}
tool_metrics[tool_name]["count"] += 1
tool_metrics[tool_name]["durations"].append(tool_duration)
messages.append({
"role": "user",
"content": [{
"type": "tool_result",
"tool_use_id": tool_use.id,
"content": tool_response,
}]
})
response = await asyncio.to_thread(
client.messages.create,
model=model,
max_tokens=4096,
system=EVALUATION_PROMPT,
messages=messages,
tools=tools,
)
messages.append({"role": "assistant", "content": response.content})
response_text = next(
(block.text for block in response.content if hasattr(block, "text")),
None,
)
return response_text, tool_metrics
async def evaluate_single_task(
client: Anthropic,
model: str,
qa_pair: dict[str, Any],
tools: list[dict[str, Any]],
connection: Any,
task_index: int,
) -> dict[str, Any]:
"""Evaluate a single QA pair with the given tools."""
start_time = time.time()
print(f"Task {task_index + 1}: Running task with question: {qa_pair['question']}")
response, tool_metrics = await agent_loop(client, model, qa_pair["question"], tools, connection)
response_value = extract_xml_content(response, "response")
summary = extract_xml_content(response, "summary")
feedback = extract_xml_content(response, "feedback")
duration_seconds = time.time() - start_time
return {
"question": qa_pair["question"],
"expected": qa_pair["answer"],
"actual": response_value,
"score": int(response_value == qa_pair["answer"]) if response_value else 0,
"total_duration": duration_seconds,
"tool_calls": tool_metrics,
"num_tool_calls": sum(len(metrics["durations"]) for metrics in tool_metrics.values()),
"summary": summary,
"feedback": feedback,
}
REPORT_HEADER = """
# Evaluation Report
## Summary
- **Accuracy**: {correct}/{total} ({accuracy:.1f}%)
- **Average Task Duration**: {average_duration_s:.2f}s
- **Average Tool Calls per Task**: {average_tool_calls:.2f}
- **Total Tool Calls**: {total_tool_calls}
---
"""
TASK_TEMPLATE = """
### Task {task_num}
**Question**: {question}
**Ground Truth Answer**: `{expected_answer}`
**Actual Answer**: `{actual_answer}`
**Correct**: {correct_indicator}
**Duration**: {total_duration:.2f}s
**Tool Calls**: {tool_calls}
**Summary**
{summary}
**Feedback**
{feedback}
---
"""
async def run_evaluation(
eval_path: Path,
connection: Any,
model: str = "claude-3-7-sonnet-20250219",
) -> str:
"""Run evaluation with MCP server tools."""
print("🚀 Starting Evaluation")
client = Anthropic()
tools = await connection.list_tools()
print(f"📋 Loaded {len(tools)} tools from MCP server")
qa_pairs = parse_evaluation_file(eval_path)
print(f"📋 Loaded {len(qa_pairs)} evaluation tasks")
results = []
for i, qa_pair in enumerate(qa_pairs):
print(f"Processing task {i + 1}/{len(qa_pairs)}")
result = await evaluate_single_task(client, model, qa_pair, tools, connection, i)
results.append(result)
correct = sum(r["score"] for r in results)
accuracy = (correct / len(results)) * 100 if results else 0
average_duration_s = sum(r["total_duration"] for r in results) / len(results) if results else 0
average_tool_calls = sum(r["num_tool_calls"] for r in results) / len(results) if results else 0
total_tool_calls = sum(r["num_tool_calls"] for r in results)
report = REPORT_HEADER.format(
correct=correct,
total=len(results),
accuracy=accuracy,
average_duration_s=average_duration_s,
average_tool_calls=average_tool_calls,
total_tool_calls=total_tool_calls,
)
report += "".join([
TASK_TEMPLATE.format(
task_num=i + 1,
question=qa_pair["question"],
expected_answer=qa_pair["answer"],
actual_answer=result["actual"] or "N/A",
correct_indicator="" if result["score"] else "",
total_duration=result["total_duration"],
tool_calls=json.dumps(result["tool_calls"], indent=2),
summary=result["summary"] or "N/A",
feedback=result["feedback"] or "N/A",
)
for i, (qa_pair, result) in enumerate(zip(qa_pairs, results))
])
return report
def parse_headers(header_list: list[str]) -> dict[str, str]:
"""Parse header strings in format 'Key: Value' into a dictionary."""
headers = {}
if not header_list:
return headers
for header in header_list:
if ":" in header:
key, value = header.split(":", 1)
headers[key.strip()] = value.strip()
else:
print(f"Warning: Ignoring malformed header: {header}")
return headers
def parse_env_vars(env_list: list[str]) -> dict[str, str]:
"""Parse environment variable strings in format 'KEY=VALUE' into a dictionary."""
env = {}
if not env_list:
return env
for env_var in env_list:
if "=" in env_var:
key, value = env_var.split("=", 1)
env[key.strip()] = value.strip()
else:
print(f"Warning: Ignoring malformed environment variable: {env_var}")
return env
async def main():
parser = argparse.ArgumentParser(
description="Evaluate MCP servers using test questions",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Evaluate a local stdio MCP server
python evaluation.py -t stdio -c python -a my_server.py eval.xml
# Evaluate an SSE MCP server
python evaluation.py -t sse -u https://example.com/mcp -H "Authorization: Bearer token" eval.xml
# Evaluate an HTTP MCP server with custom model
python evaluation.py -t http -u https://example.com/mcp -m claude-3-5-sonnet-20241022 eval.xml
""",
)
parser.add_argument("eval_file", type=Path, help="Path to evaluation XML file")
parser.add_argument("-t", "--transport", choices=["stdio", "sse", "http"], default="stdio", help="Transport type (default: stdio)")
parser.add_argument("-m", "--model", default="claude-3-7-sonnet-20250219", help="Claude model to use (default: claude-3-7-sonnet-20250219)")
stdio_group = parser.add_argument_group("stdio options")
stdio_group.add_argument("-c", "--command", help="Command to run MCP server (stdio only)")
stdio_group.add_argument("-a", "--args", nargs="+", help="Arguments for the command (stdio only)")
stdio_group.add_argument("-e", "--env", nargs="+", help="Environment variables in KEY=VALUE format (stdio only)")
remote_group = parser.add_argument_group("sse/http options")
remote_group.add_argument("-u", "--url", help="MCP server URL (sse/http only)")
remote_group.add_argument("-H", "--header", nargs="+", dest="headers", help="HTTP headers in 'Key: Value' format (sse/http only)")
parser.add_argument("-o", "--output", type=Path, help="Output file for evaluation report (default: stdout)")
args = parser.parse_args()
if not args.eval_file.exists():
print(f"Error: Evaluation file not found: {args.eval_file}")
sys.exit(1)
headers = parse_headers(args.headers) if args.headers else None
env_vars = parse_env_vars(args.env) if args.env else None
try:
connection = create_connection(
transport=args.transport,
command=args.command,
args=args.args,
env=env_vars,
url=args.url,
headers=headers,
)
except ValueError as e:
print(f"Error: {e}")
sys.exit(1)
print(f"🔗 Connecting to MCP server via {args.transport}...")
async with connection:
print("✅ Connected successfully")
report = await run_evaluation(args.eval_file, connection, args.model)
if args.output:
args.output.write_text(report)
print(f"\n✅ Report saved to {args.output}")
else:
print("\n" + report)
if __name__ == "__main__":
asyncio.run(main())
+145
View File
@@ -0,0 +1,145 @@
#!/usr/bin/env bash
# propagate-structure.sh — Aplica a estrutura padrão (PROC-DEV-STANDARD) a outro projecto
#
# Uso:
# ./propagate-structure.sh /caminho/para/projecto [--dry-run]
#
# Cria (se não existirem): MEMORY.md, STATUS.md, AGENTS.md, CHANGELOG.md, desk.project,
# docs/PLANS/, docs/SPECS/, docs/audit/, docs/audit/findings/
#
# Não sobrescreve ficheiros existentes — só preenche lacunas.
set -euo pipefail
TARGET="${1:-}"
DRY="${2:-}"
if [[ -z "$TARGET" || ! -d "$TARGET" ]]; then
echo "Uso: $0 /caminho/projecto [--dry-run]" >&2
exit 1
fi
PROJECT_NAME="$(basename "$TARGET")"
TODAY="$(date +%Y-%m-%d)"
CREATED=0
SKIPPED=0
log() { echo " $1 $2"; }
run() {
if [[ "$DRY" == "--dry-run" ]]; then
log "[DRY]" "$1"
else
eval "$1"
fi
}
ensure_dir() {
if [[ -d "$1" ]]; then
log "[SKIP]" "dir $1"
SKIPPED=$((SKIPPED+1))
else
run "mkdir -p '$1'"
log "[NEW]" "dir $1"
CREATED=$((CREATED+1))
fi
}
ensure_file() {
local path="$1"
local content="$2"
if [[ -f "$path" ]]; then
log "[SKIP]" "file $(basename "$path")"
SKIPPED=$((SKIPPED+1))
else
if [[ "$DRY" == "--dry-run" ]]; then
log "[DRY]" "would create $path"
else
printf '%s' "$content" > "$path"
log "[NEW]" "file $(basename "$path")"
fi
CREATED=$((CREATED+1))
fi
}
echo "=== Propagação de estrutura PROC-DEV-STANDARD ==="
echo "Projecto: $TARGET"
[[ "$DRY" == "--dry-run" ]] && echo "Modo: DRY-RUN (nada será escrito)"
echo
# Directorias
ensure_dir "$TARGET/docs/PLANS"
ensure_dir "$TARGET/docs/SPECS"
ensure_dir "$TARGET/docs/audit/findings"
# Ficheiros standard
ensure_file "$TARGET/MEMORY.md" \
"# MEMORY.md — $PROJECT_NAME
## $TODAY
- Estrutura padrão aplicada via propagate-structure.sh
"
ensure_file "$TARGET/STATUS.md" \
"# STATUS.md — $PROJECT_NAME
**Versão:** 0.1 | **Data:** $TODAY | **Agente:** —
## Estado actual
- Em curso: —
- Bloqueios: —
- Próximos passos: —
"
ensure_file "$TARGET/AGENTS.md" \
"# AGENTS.md — $PROJECT_NAME
Directrizes específicas deste projecto para agentes IA.
## Referências obrigatórias
- \`ECOSYSTEM.md\` — Hub/06-Operacoes/Documentacao/ECOSYSTEM.md
- \`PROC-DEV-STANDARD\` — D7-SIS-006
- \`PROC-AUDIT-STANDARD\` — D7-SIS-007
## Protocolo de sessão
Início: ler MEMORY.md + STATUS.md. Fim: actualizar ambos.
## Convenções específicas
- (a definir)
"
ensure_file "$TARGET/CHANGELOG.md" \
"# CHANGELOG — $PROJECT_NAME
## 0.1.0 — $TODAY
### Adicionado
- Estrutura padrão (PROC-DEV-STANDARD)
"
if [[ ! -f "$TARGET/desk.project" ]]; then
ensure_file "$TARGET/desk.project" \
'{
"task_id": null,
"project_id": null,
"customer_id": null,
"gitea_repo": null
}
'
echo " [AVISO] desk.project criado vazio — preencher task_id Desk CRM"
fi
echo
echo "=== Resultado ==="
echo " Criados: $CREATED"
echo " Existentes: $SKIPPED"
[[ "$DRY" == "--dry-run" ]] && echo " (dry-run — nada foi escrito)"
echo
# Validação final
if [[ "$DRY" != "--dry-run" && -x "$(dirname "$0")/validate-structure.sh" ]]; then
echo "=== Validação ==="
bash "$(dirname "$0")/validate-structure.sh" "$TARGET" || true
fi
+107
View File
@@ -0,0 +1,107 @@
#!/bin/bash
#
# validate-structure.sh — Valida a estrutura de um projecto contra o standard
# ESTRUTURA.md do Modelo-Organizacao-Proc-Dev
#
# Uso: ./scripts/validate-structure.sh [caminho_do_projecto]
# Se não for especificado caminho, valida o projecto actual (raiz)
#
# Exit codes:
# 0 — Em conformidade
# 1 — Não conforme (estrutura em falta)
# 2 — Erro de execução
set -euo pipefail
ROOT="${1:-.}"
ERRORS=0
WARNINGS=0
echo "=== Validação Estrutural (ESTRUTURA.md) ==="
echo "Projecto: $(cd "$ROOT" && pwd)"
echo ""
# 1. docs/ existe
if [ -d "$ROOT/docs" ]; then
echo " [OK] docs/ existe"
else
echo " [FALHA] docs/ não existe"
ERRORS=$((ERRORS + 1))
fi
# 2. docs/PLANS/ existe
if [ -d "$ROOT/docs/PLANS" ]; then
echo " [OK] docs/PLANS/ existe"
else
echo " [FALHA] docs/PLANS/ não existe"
ERRORS=$((ERRORS + 1))
fi
# 3. docs/SPECS/ existe
if [ -d "$ROOT/docs/SPECS" ]; then
echo " [OK] docs/SPECS/ existe"
else
echo " [FALHA] docs/SPECS/ não existe"
ERRORS=$((ERRORS + 1))
fi
# 4. AGENTS.md existe
if [ -f "$ROOT/AGENTS.md" ]; then
# Verificar se tem conteúdo mínimo
LINES=$(wc -l < "$ROOT/AGENTS.md")
if [ "$LINES" -ge 5 ]; then
echo " [OK] AGENTS.md existe ($LINES linhas)"
else
echo " [AVISO] AGENTS.md existe mas tem apenas $LINES linhas"
WARNINGS=$((WARNINGS + 1))
fi
else
echo " [FALHA] AGENTS.md não existe"
ERRORS=$((ERRORS + 1))
fi
# 5. MEMORY.md existe
if [ -f "$ROOT/MEMORY.md" ]; then
echo " [OK] MEMORY.md existe"
else
echo " [FALHA] MEMORY.md não existe"
ERRORS=$((ERRORS + 1))
fi
# 6. desk.project existe e é JSON válido
if [ -f "$ROOT/desk.project" ]; then
if jq -e . "$ROOT/desk.project" > /dev/null 2>&1; then
TASK_ID=$(jq -r '.task_id // "vazio"' "$ROOT/desk.project")
echo " [OK] desk.project existe (task_id: $TASK_ID)"
else
echo " [FALHA] desk.project existe mas não é JSON válido"
ERRORS=$((ERRORS + 1))
fi
else
echo " [FALHA] desk.project não existe"
ERRORS=$((ERRORS + 1))
fi
# 7. Anti-pattern: verificar se existem pastas docs/PROC ou docs/RUNBOOK
if [ -d "$ROOT/docs/PROC" ]; then
echo " [AVISO] docs/PROC existe — não devia. Usar 06-Operacoes/Procedimentos/ no Hub"
WARNINGS=$((WARNINGS + 1))
fi
if [ -d "$ROOT/docs/RUNBOOK" ]; then
echo " [AVISO] docs/RUNBOOK existe — não devia. Usar 06-Operacoes/Runbooks/ no Hub"
WARNINGS=$((WARNINGS + 1))
fi
echo ""
echo "=== Resultado ==="
if [ "$ERRORS" -gt 0 ]; then
echo " FALHAS: $ERRORS"
echo " AVISOS: $WARNINGS"
echo " VEREDICTO: NÃO CONFORME"
exit 1
else
echo " FALHAS: 0"
echo " AVISOS: $WARNINGS"
echo " VEREDICTO: CONFORME"
exit 0
fi
+191
View File
@@ -0,0 +1,191 @@
#!/usr/bin/env python3
"""
OCR Pipeline para Documentos Contabilísticos
PDF → imagem → RapidOCR → DeepSeek → JSON estruturado
Uso: python3 ocr-invoice.py <caminho_do_pdf>
"""
import sys
import os
import json
import time
from pathlib import Path
# ── CONFIG ───────────────────────────────────────────────────────
OPENCODE_GO_KEY = ""
# Ler .env automaticamente
for line in open(os.path.expanduser("~/.hermes/.env")):
line = line.strip()
if line.startswith("OPENCODE_GO_API_KEY=") and not line.startswith("#"):
OPENCODE_GO_KEY = line.split("=", 1)[1]
break
OPENCODE_GO_URL = "https://opencode.ai/zen/go/v1"
MODEL = "deepseek-v4-flash"
OCR_DPI = 200
def pdf_to_image(pdf_path: str, dpi: int = OCR_DPI) -> str:
"""Converte primeira página do PDF para imagem PNG."""
import pymupdf
doc = pymupdf.open(pdf_path)
page = doc[0]
pix = page.get_pixmap(dpi=dpi)
img_path = f"/tmp/ocr_{Path(pdf_path).stem}.png"
pix.save(img_path)
return img_path
def ocr_image(img_path: str) -> list[dict]:
"""Executa RapidOCR na imagem. Retorna lista de {text, score, bbox}."""
from rapidocr import RapidOCR
engine = RapidOCR()
result = engine(img_path)
lines = []
if result and result.txts:
for txt, score in zip(result.txts, result.scores):
lines.append({"text": txt, "score": float(score)})
return lines
def extract_structured(ocr_lines: list[dict], pdf_name: str) -> dict:
"""Usa DeepSeek para extrair campos estruturados do texto OCR."""
from openai import OpenAI
client = OpenAI(
api_key=OPENCODE_GO_KEY,
base_url=OPENCODE_GO_URL,
)
# Texto OCR como bloco
ocr_text = "\n".join(f"[{l['score']:.2f}] {l['text']}" for l in ocr_lines)
prompt = f"""Analisa o seguinte texto extraído de um documento contabilístico (factura/recibo) via OCR.
Extrai os campos estruturados e devolve APENAS JSON válido (sem markdown, sem ```).
Texto OCR:
{ocr_text}
Nome do ficheiro: {pdf_name}
Devolve JSON com esta estrutura exata:
{{
"tipo_documento": "factura|recibo|nota_de_credito|outro",
"fornecedor": {{
"nome": "string",
"nif_cif": "string",
"morada": "string",
"telefone": "string"
}},
"cliente": {{
"nome": "string",
"nif": "string",
"morada": "string",
"telefone": "string"
}},
"documento": {{
"numero": "string",
"data": "YYYY-MM-DD",
"metodo_pagamento": "string",
"referencia": "string"
}},
"artigos": [
{{
"codigo": "string",
"descricao": "string",
"preco_unitario": 0.00,
"quantidade": 1,
"total": 0.00,
"notas": "string"
}}
],
"resumo": {{
"base_tributavel": 0.00,
"taxa_iva_percent": 0,
"iva_valor": 0.00,
"recargo_percent": 0,
"recargo_valor": 0.00,
"total_pagar": 0.00,
"moeda": "EUR"
}},
"notas": "string com observações relevantes"
}}
Regras:
- Preços com vírgula decimal (formato PT: 1.234,56 → 1234.56)
- Se campo não encontrado, usar null
- Se artigos não detectados, array vazio
- IVA: se não explícito, calcular a partir de base + total
- Moeda: EUR por defeito"""
response = client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": "És um assistente especializado em extração de dados de documentos contabilísticos portugueses. Devolves sempre JSON válido."},
{"role": "user", "content": prompt}
],
temperature=0.0,
max_tokens=16384,
)
raw = response.choices[0].message.content.strip()
# Limpar possíveis wrappers markdown
if raw.startswith("```"):
raw = raw.split("\n", 1)[1]
if raw.endswith("```"):
raw = raw[:-3]
raw = raw.strip()
return json.loads(raw)
def process_invoice(pdf_path: str) -> dict:
"""Pipeline completo: PDF → JSON estruturado."""
print(f"📄 A processar: {pdf_path}")
t0 = time.time()
# 1. PDF → imagem
print(" [1/3] PDF → imagem...")
img = pdf_to_image(pdf_path)
print(f" OK ({time.time()-t0:.1f}s)")
# 2. OCR
print(" [2/3] OCR (RapidOCR)...")
t1 = time.time()
lines = ocr_image(img)
print(f" {len(lines)} linhas em {time.time()-t1:.1f}s")
# 3. Structured extraction
print(" [3/3] Extração estruturada (DeepSeek)...")
t2 = time.time()
structured = extract_structured(lines, os.path.basename(pdf_path))
print(f" OK ({time.time()-t2:.1f}s)")
elapsed = time.time() - t0
print(f"\n✅ Pipeline completo em {elapsed:.1f}s")
# Cleanup
os.remove(img)
return {
"source_file": pdf_path,
"ocr_lines": len(lines),
"processing_time_seconds": round(elapsed, 1),
"extracted_data": structured,
}
if __name__ == "__main__":
if len(sys.argv) < 2:
print(f"Uso: python3 {sys.argv[0]} <caminho_do_pdf>")
sys.exit(1)
pdf_path = sys.argv[1]
if not os.path.exists(pdf_path):
print(f"Erro: ficheiro não encontrado: {pdf_path}")
sys.exit(1)
result = process_invoice(pdf_path)
print("\n" + json.dumps(result, indent=2, ensure_ascii=False))
+161
View File
@@ -0,0 +1,161 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/scripts/lib.sh"
usage() {
cat << EOF
Podcast Descomplicar Digital — Pipeline Automatizado
Usage: $0 <command> [args]
Commands:
batch N Process next N episodes (content generation)
status Show pipeline state
retry NNN Re-run failed stage for episode NNN
csv [start] [N] Generate Canva CSV (default: next 7 episodes)
publish NNN D Generate publish commands for episode NNN on date D (YYYY-MM-DD)
init NNN Add episode NNN to pipeline state
produce NNN F Post-produce episode NNN from raw audio file F
Examples:
$0 batch 5
$0 status
$0 csv 20 10
$0 produce 20 /path/to/raw.wav
$0 publish 20 2026-04-14
EOF
exit 1
}
[[ $# -lt 1 ]] && usage
CMD="$1"; shift
ensure_state_file
mkdir -p "$LOG_DIR"
case "$CMD" in
batch)
COUNT="${1:-5}"
START="$(jq -r '.next_episode' "$STATE_FILE")"
END=$((START + COUNT - 1))
log_info "=== BATCH: Processing episodes ${START}-${END} ==="
CURRENT_DATE="$(jq -r '.next_publish_date' "$STATE_FILE")"
[[ -z "$CURRENT_DATE" || "$CURRENT_DATE" == "null" ]] && CURRENT_DATE="$(date '+%Y-%m-%d')"
for ((ep=START; ep<=END; ep++)); do
log_info "--- Episode ${ep} ---"
STATUS="$(get_episode_status "$ep")"
if [[ "$STATUS" == "not_found" || "$STATUS" == "pending" ]]; then
"${SCRIPT_DIR}/scripts/generate-content.sh" "$ep" || {
log_error "EP$(pad_number "$ep"): Content generation failed, stopping batch"
exit 1
}
else
log_info "EP$(pad_number "$ep"): Already at status '${STATUS}', skipping content generation"
fi
STATUS="$(get_episode_status "$ep")"
if [[ "$STATUS" == "script_done" ]]; then
log_warn "EP$(pad_number "$ep"): Audio TTS needed (manual step via AI Studio)"
log_warn " -> Generate audio, save to Episodios/Audios/raw/ep_$(pad_number "$ep")_raw.wav"
log_warn " -> Then run: $0 produce ${ep} <raw_audio_path>"
fi
CURRENT_DATE="$(next_weekday "$CURRENT_DATE")"
done
jq --argjson n "$((END + 1))" --arg d "$CURRENT_DATE" \
'.next_episode = $n | .next_publish_date = $d' \
"$STATE_FILE" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "$STATE_FILE"
log_info "=== BATCH COMPLETE. Next episode: $((END + 1)), next date: ${CURRENT_DATE} ==="
;;
status)
echo "=== Pipeline Status ==="
echo "Next episode: $(jq -r '.next_episode' "$STATE_FILE")"
echo "Next publish date: $(jq -r '.next_publish_date' "$STATE_FILE")"
echo ""
EPISODE_COUNT="$(jq '.episodes | length' "$STATE_FILE")"
if [[ "$EPISODE_COUNT" -gt 0 ]]; then
echo "Episodes in pipeline:"
jq -r '.episodes[] | " EP\(.number | tostring | if length < 3 then "0" * (3 - length) + . else . end): \(.status) - \(.title)"' "$STATE_FILE"
echo ""
echo "Counts:"
jq -r '.episodes | group_by(.status) | map({status: .[0].status, count: length}) | .[] | " \(.status): \(.count)"' "$STATE_FILE"
else
echo "No episodes in pipeline yet. Run 'batch' to start."
fi
;;
retry)
EP="${1:?Episode number required}"
STATUS="$(get_episode_status "$EP")"
log_info "Retrying EP$(pad_number "$EP") (current status: ${STATUS})"
case "$STATUS" in
pending|not_found)
"${SCRIPT_DIR}/scripts/generate-content.sh" "$EP"
;;
script_done)
log_warn "Audio TTS needed — manual step via AI Studio"
;;
audio_done)
RAW=""
for f in "${PROJECT_ROOT}/Episodios/Audios/raw/ep_$(pad_number "$EP")_"*; do
[[ -f "$f" ]] && RAW="$f" && break
done
if [[ -n "$RAW" ]]; then
"${SCRIPT_DIR}/scripts/post-produce.sh" "$EP" "$RAW"
else
log_error "Raw audio not found. Use: $0 produce ${EP} <path>"
fi
;;
produced)
log_info "Ready to publish. Use: $0 publish ${EP} YYYY-MM-DD"
;;
*)
log_warn "Unknown status: ${STATUS}"
;;
esac
;;
csv)
"${SCRIPT_DIR}/scripts/generate-csv.sh" "${@}"
;;
produce)
EP="${1:?Episode number required}"
RAW="${2:?Raw audio path required}"
"${SCRIPT_DIR}/scripts/post-produce.sh" "$EP" "$RAW"
;;
publish)
EP="${1:?Episode number required}"
SCHED_DATE="${2:-$(jq -r '.next_publish_date' "$STATE_FILE")}"
"${SCRIPT_DIR}/scripts/publish-episode.sh" "$EP" "$SCHED_DATE"
;;
init)
EP="${1:?Episode number required}"
MAP_FILE="${PROJECT_ROOT}/config/episode-guide-map.json"
EP_DATA="$(jq -r --arg n "$EP" '.[$n] // empty' "$MAP_FILE")"
if [[ -n "$EP_DATA" ]]; then
TITLE="$(echo "$EP_DATA" | jq -r '.podcast_title')"
URL="$(echo "$EP_DATA" | jq -r '.guide_url')"
add_episode "$EP" "$TITLE" "$URL"
log_info "Added EP$(pad_number "$EP"): ${TITLE}"
else
log_error "Episode $EP not found in episode-guide-map.json"
fi
;;
*)
usage
;;
esac
+224
View File
@@ -0,0 +1,224 @@
#!/usr/bin/env bash
set -euo pipefail
# apply-wp-content.sh — Aplica conteúdo _wp.json a posts já agendados no WordPress
#
# Uso: ./scripts/apply-wp-content.sh <ep_num> [post_id]
# Se post_id não for fornecido, pesquisa por data na pipeline-state.json
#
# Útil para: episódios que foram agendados antes do _wp.json existir
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
# SSH config
SSH_KEY="${HOME}/.ssh/id_ed25519"
SSH_PORT=9443
SSH_HOST="server.descomplicar.pt"
SSH_USER="root"
SSH_OPTS="-o IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
WP_PATH="/home/ealmeida/public_html"
ssh_cmd() {
SSH_AUTH_SOCK= ssh -p "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "${SSH_USER}@${SSH_HOST}" "$@"
}
usage() {
echo "Usage: $0 <ep_num> [post_id]"
exit 1
}
[[ $# -lt 1 ]] && usage
EP_NUM="$1"
MANUAL_POST_ID="${2:-}"
EP_PAD="$(pad_number "$EP_NUM")"
# Encontrar _wp.json (find para lidar com nomes com acentos)
WP_JSON=""
while IFS= read -r f; do
[[ -f "$f" ]] && WP_JSON="$f" && break
done < <(find "${PROJECT_ROOT}" -maxdepth 3 -name "Episodio_${EP_PAD}_*_wp.json" 2>/dev/null)
if [[ -z "$WP_JSON" ]]; then
log_error "EP${EP_PAD}: _wp.json não encontrado"
exit 1
fi
# Validar e auto-reparar JSON se necessário
if ! python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
log_warn "EP${EP_PAD}: JSON inválido — a tentar reparação automática..."
python3 - "$WP_JSON" << 'PYFIX'
import sys, re, json
filepath = sys.argv[1]
with open(filepath, 'r') as f:
raw = f.read()
marker = '"content_html": "'
start = raw.find(marker)
if start == -1: sys.exit(1)
content_start = start + len(marker)
end_pattern = re.search(r'",\s*\n\s*"hashtags"', raw[content_start:]) or re.search(r'",\s*\n\s*"wp_tags"', raw[content_start:])
if not end_pattern: sys.exit(1)
content_end = content_start + end_pattern.start()
fixed = raw[:content_start] + re.sub(r'(?<!\\)"', '\\"', raw[content_start:content_end]) + raw[content_end:]
json.loads(fixed)
with open(filepath, 'w') as f: f.write(fixed)
print("reparado")
PYFIX
if ! python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
log_error "EP${EP_PAD}: JSON inválido após reparação"
exit 1
fi
log_info "EP${EP_PAD}: JSON reparado automaticamente"
fi
log_info "EP${EP_PAD}: Usando $(basename "$WP_JSON")"
# Encontrar post_id
POST_ID="$MANUAL_POST_ID"
if [[ -z "$POST_ID" ]]; then
# Tentar obter por data agendada da pipeline-state
SCHED_DATE="$(jq -r --argjson n "$EP_NUM" '.episodes[] | select(.num == $n) | .scheduled // empty' "$STATE_FILE")"
if [[ -z "$SCHED_DATE" ]]; then
log_error "EP${EP_PAD}: Não encontrado em pipeline-state.json, especifique post_id manualmente"
exit 1
fi
log_info "EP${EP_PAD}: A pesquisar post por data ${SCHED_DATE}..."
POST_ID="$(ssh_cmd "cd '${WP_PATH}' && wp db query \
\"SELECT ID FROM wpah_posts WHERE post_type='podcast' AND post_status='future' AND DATE(post_date)='${SCHED_DATE}' LIMIT 1\" \
--skip-column-names \
--allow-root 2>/dev/null")"
fi
if [[ -z "$POST_ID" ]]; then
log_error "EP${EP_PAD}: Post não encontrado no WordPress"
exit 1
fi
log_info "EP${EP_PAD}: Post ID = ${POST_ID}"
# Extrair campos do JSON via Python (mais robusto que jq para HTML com acentos)
WP_CONTENT="$(python3 -c "import json,sys; d=json.load(open('$WP_JSON')); print(d.get('content_html',''))" 2>/dev/null)"
WP_META="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('meta_description',''))" 2>/dev/null)"
WP_KEYWORD="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('keyword',''))" 2>/dev/null)"
WP_TAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(','.join(d.get('wp_tags',[])))" 2>/dev/null)"
WP_HASHTAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(' '.join(d.get('hashtags',[])))" 2>/dev/null)"
WP_SEO_TITLE="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('seo_title',''))" 2>/dev/null)"
WP_SLUG="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('slug',''))" 2>/dev/null)"
# Auto-fix RankMath: slug sem keyword → reconstruir; título sem número → adicionar ano
_AUTOFIX="$(python3 - "$WP_JSON" "$WP_SLUG" "$WP_SEO_TITLE" <<'PYFIX'
import json, sys, re, unicodedata, time
def slugify(text):
text = unicodedata.normalize("NFD", text.lower())
text = "".join(c for c in text if unicodedata.category(c) != "Mn")
text = re.sub(r'[^a-z0-9\s-]', '', text)
return re.sub(r'[-\s]+', '-', text.strip()).rstrip('-')
STOPWORDS = {"a","o","as","os","de","da","do","das","dos","e","em","no","na","nos","nas","para","por","pelo","pela"}
def strip_sw(s):
return " ".join(t for t in slugify(s).replace("-", " ").split() if t not in STOPWORDS)
d = json.load(open(sys.argv[1]))
slug = sys.argv[2]
seo_title = sys.argv[3]
kw = d.get('keyword', '')
# Fix 1: slug sem keyword
slug_fixed = slug
if kw and slug and strip_sw(kw) not in strip_sw(slug):
fk_slug = slugify(kw)
orig_words = [w for w in slug.split('-') if w not in fk_slug.split('-') and len(w) > 3][:2]
slug_fixed = (fk_slug + ('-' + '-'.join(orig_words) if orig_words else ''))[:75].rstrip('-')
# Fix 2: seo_title sem número → adicionar ano
seo_fixed = seo_title
if seo_title and not re.search(r'\d', seo_title):
year = time.strftime('%Y')
# Inserir ano antes do separador "|" se existir, senão no fim do título
# Não truncar o texto principal — o ano vai imediatamente antes do "|"
if ' | ' in seo_title:
parts = seo_title.split(' | ', 1)
seo_fixed = f"{parts[0].rstrip()} {year} | {parts[1]}"
else:
t = seo_title.rstrip()
seo_fixed = (t[:55].rsplit(' ', 1)[0] + f' {year}') if len(t) > 55 else f'{t} {year}'
print(slug_fixed)
print(seo_fixed)
PYFIX
)"
# Aplicar valores corrigidos
WP_SLUG_NEW="$(echo "$_AUTOFIX" | sed -n '1p')"
WP_SEO_TITLE_NEW="$(echo "$_AUTOFIX" | sed -n '2p')"
if [[ -n "$WP_SLUG_NEW" && "$WP_SLUG_NEW" != "$WP_SLUG" ]]; then
log_info "EP${EP_PAD}: slug auto-corrigido: '${WP_SLUG}' → '${WP_SLUG_NEW}'"
WP_SLUG="$WP_SLUG_NEW"
fi
if [[ -n "$WP_SEO_TITLE_NEW" && "$WP_SEO_TITLE_NEW" != "$WP_SEO_TITLE" ]]; then
log_info "EP${EP_PAD}: seo_title auto-corrigido: ano adicionado → '${WP_SEO_TITLE_NEW}'"
WP_SEO_TITLE="$WP_SEO_TITLE_NEW"
fi
# Excerpt = meta + hashtags
WP_EXCERPT=""
if [[ -n "$WP_META" && -n "$WP_HASHTAGS" ]]; then
WP_EXCERPT="${WP_META}
${WP_HASHTAGS}"
fi
# Aplicar post_content
if [[ -n "$WP_CONTENT" ]]; then
ESCAPED_CONTENT="$(printf '%s' "$WP_CONTENT" | python3 -c "import sys; data=sys.stdin.read(); print(data.replace(\"'\", \"'\\\\''\" ))" 2>/dev/null || echo "$WP_CONTENT" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_content='${ESCAPED_CONTENT}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: post_content aplicado"
fi
# Aplicar excerpt
if [[ -n "$WP_EXCERPT" ]]; then
ESCAPED_EXCERPT="$(printf '%s' "$WP_EXCERPT" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_excerpt='${ESCAPED_EXCERPT}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: post_excerpt aplicado"
fi
# Aplicar slug
if [[ -n "$WP_SLUG" ]]; then
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_name='${WP_SLUG}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: slug actualizado -> ${WP_SLUG}"
fi
# Aplicar tags
if [[ -n "$WP_TAGS" ]]; then
ssh_cmd "cd '${WP_PATH}' && wp post term set ${POST_ID} post_tag ${WP_TAGS} --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: tags aplicadas"
fi
# Rank Math: description
if [[ -n "$WP_META" ]]; then
ESCAPED_META="$(echo "$WP_META" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_description '${ESCAPED_META}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: rank_math_description aplicado"
fi
# Rank Math: focus keyword
if [[ -n "$WP_KEYWORD" ]]; then
ESCAPED_KW="$(echo "$WP_KEYWORD" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_focus_keyword '${ESCAPED_KW}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: rank_math_focus_keyword aplicado"
fi
# Rank Math: SEO title
if [[ -z "$WP_SEO_TITLE" ]]; then
WP_TITLE_FALLBACK="$(jq -r '.title // empty' "$WP_JSON")"
[[ -n "$WP_TITLE_FALLBACK" ]] && WP_SEO_TITLE="${WP_TITLE_FALLBACK} | Podcast Descomplicar Digital"
fi
if [[ -n "$WP_SEO_TITLE" ]]; then
ESCAPED_SEO_TITLE="$(echo "$WP_SEO_TITLE" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_title '${ESCAPED_SEO_TITLE}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: rank_math_title aplicado"
fi
log_info "EP${EP_PAD}: Conteúdo WP aplicado com sucesso (post ${POST_ID})"
echo "${POST_ID}"
+177
View File
@@ -0,0 +1,177 @@
#!/usr/bin/env bash
# batch-prepare.sh — Pipeline completo para lote de episódios
# Uso: ./scripts/batch-prepare.sh [--dry-run]
# Para cada episódio: TTS → pós-produção → agendar no WordPress
# Retomável: salta episódios já prontos
set -uo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
DRY_RUN=false
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
# Verificar GEMINI_API_KEY
if [[ -z "${GEMINI_API_KEY:-}" ]]; then
log_error "GEMINI_API_KEY não definida. Exporta primeiro: export GEMINI_API_KEY=..."
exit 1
fi
# Lote: episódios e datas de publicação (ordem do calendário)
EP_ORDER=(65 132 137 66 67 134 139 69 78 79 80 81 82 83 84)
declare -A EP_DATES=(
[65]="2026-07-02"
[132]="2026-07-06"
[137]="2026-07-07"
[66]="2026-07-08"
[67]="2026-07-09"
[134]="2026-07-13"
[139]="2026-07-14"
[69]="2026-07-15"
[78]="2026-07-16"
[79]="2026-07-20"
[80]="2026-07-21"
[81]="2026-07-22"
[82]="2026-07-23"
[83]="2026-07-27"
[84]="2026-07-28"
)
BATCH_LOG="${LOG_DIR}/batch-$(date +%Y%m%d-%H%M%S).log"
mkdir -p "${LOG_DIR}"
declare -A RESULTS
_batch_log() {
local msg="$*"
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ${msg}" | tee -a "${BATCH_LOG}"
}
_batch_log "=== BATCH START — $(date) ==="
_batch_log "Episódios: ${EP_ORDER[*]}"
[[ "$DRY_RUN" == true ]] && _batch_log "MODO DRY-RUN activado"
for EP_NUM in "${EP_ORDER[@]}"; do
EP_PAD="$(pad_number "$EP_NUM")"
SCHED_DATE="${EP_DATES[$EP_NUM]}"
_batch_log "--- EP${EP_PAD} (${SCHED_DATE}) ---"
# Verificar se já está agendado (status ready no pipeline-state.json)
ALREADY_STATUS=$(jq -r --argjson n "$EP_NUM" \
'.episodes[] | select(.num == $n) | .status // ""' \
"${STATE_FILE}" 2>/dev/null || echo "")
if [[ "$ALREADY_STATUS" == "ready" ]]; then
_batch_log "EP${EP_PAD}: já agendado — a saltar"
RESULTS[$EP_NUM]="skip"
continue
fi
# --- Etapa 1: TTS ---
FINAL_MP3=""
for f in "${PROJECT_ROOT}/Episodios/Audios/final/ep_${EP_PAD}_"*.mp3; do
[[ -f "$f" ]] && FINAL_MP3="$f" && break
done
if [[ -z "$FINAL_MP3" ]]; then
_batch_log "EP${EP_PAD}: [1/3] A gerar áudio TTS..."
if [[ "$DRY_RUN" == false ]]; then
if ! bash "${SCRIPT_DIR}/generate-audio.sh" "$EP_NUM" >> "${BATCH_LOG}" 2>&1; then
_batch_log "EP${EP_PAD}: ERRO no TTS — a saltar episódio"
RESULTS[$EP_NUM]="erro_tts"
continue
fi
else
_batch_log "EP${EP_PAD}: [DRY-RUN] generate-audio.sh ${EP_NUM}"
fi
# --- Etapa 2: Pós-produção ---
RAW_FILES=()
for f in "${PROJECT_ROOT}/Episodios/Audios/raw/ep_${EP_PAD}_p"*.wav; do
[[ -f "$f" ]] && RAW_FILES+=("$f")
done
if [[ ${#RAW_FILES[@]} -eq 0 && "$DRY_RUN" == false ]]; then
_batch_log "EP${EP_PAD}: ERRO — sem ficheiros raw após TTS — a saltar episódio"
RESULTS[$EP_NUM]="erro_sem_raw"
continue
fi
_batch_log "EP${EP_PAD}: [2/3] Pós-produção (${#RAW_FILES[@]} partes)..."
if [[ "$DRY_RUN" == false ]]; then
if ! bash "${SCRIPT_DIR}/post-produce.sh" "$EP_NUM" "${RAW_FILES[@]}" >> "${BATCH_LOG}" 2>&1; then
_batch_log "EP${EP_PAD}: ERRO na pós-produção — a saltar episódio"
RESULTS[$EP_NUM]="erro_postprod"
continue
fi
else
_batch_log "EP${EP_PAD}: [DRY-RUN] post-produce.sh ${EP_NUM} <raw_files>"
fi
# Encontrar MP3 final
for f in "${PROJECT_ROOT}/Episodios/Audios/final/ep_${EP_PAD}_"*.mp3; do
[[ -f "$f" ]] && FINAL_MP3="$f" && break
done
if [[ -z "$FINAL_MP3" && "$DRY_RUN" == false ]]; then
_batch_log "EP${EP_PAD}: ERRO — MP3 final não encontrado após pós-produção"
RESULTS[$EP_NUM]="erro_sem_mp3"
continue
fi
else
_batch_log "EP${EP_PAD}: [1/3] Áudio já existe: $(basename "${FINAL_MP3}") — a saltar TTS+pós-prod"
fi
# --- Etapa 3: Agendar no WordPress ---
_batch_log "EP${EP_PAD}: [3/3] A agendar no WordPress para ${SCHED_DATE}..."
if [[ "$DRY_RUN" == false ]]; then
if ! bash "${SCRIPT_DIR}/schedule-episode.sh" "$EP_NUM" "$SCHED_DATE" >> "${BATCH_LOG}" 2>&1; then
_batch_log "EP${EP_PAD}: ERRO no agendamento"
RESULTS[$EP_NUM]="erro_schedule"
continue
fi
else
_batch_log "EP${EP_PAD}: [DRY-RUN] schedule-episode.sh ${EP_NUM} ${SCHED_DATE}"
fi
RESULTS[$EP_NUM]="ok"
_batch_log "EP${EP_PAD}: ✓ COMPLETO"
done
# Resumo final
_batch_log ""
_batch_log "=== RESUMO ==="
OK_COUNT=0
ERR_COUNT=0
SKIP_COUNT=0
for EP_NUM in "${EP_ORDER[@]}"; do
EP_PAD="$(pad_number "$EP_NUM")"
STATUS="${RESULTS[$EP_NUM]:-desconhecido}"
case "$STATUS" in
ok)
_batch_log "✓ EP${EP_PAD} — completo"
((OK_COUNT++)) || true
;;
skip)
_batch_log "⏭ EP${EP_PAD} — já agendado"
((SKIP_COUNT++)) || true
;;
*)
_batch_log "✗ EP${EP_PAD} — ERRO: ${STATUS}"
((ERR_COUNT++)) || true
;;
esac
done
_batch_log ""
_batch_log "Total: ${OK_COUNT} ok | ${SKIP_COUNT} saltados | ${ERR_COUNT} erros"
_batch_log "Log completo: ${BATCH_LOG}"
_batch_log "=== BATCH END — $(date) ==="
[[ "$ERR_COUNT" -gt 0 ]] && exit 1
exit 0
+68
View File
@@ -0,0 +1,68 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
# Episodios a produzir (ordem do calendario)
EPISODES=(136 37 38 39 40 41 42 44 135 46 47 48 49 50 51)
TOTAL=${#EPISODES[@]}
DONE=0
FAILED=0
log_info "=== BATCH PRODUCE: ${TOTAL} episodios ==="
for EP in "${EPISODES[@]}"; do
EP_PAD="$(pad_number "$EP")"
DONE=$((DONE + 1))
log_info "--- [${DONE}/${TOTAL}] EP${EP_PAD} ---"
# Step 1: Generate audio (split + TTS)
log_info "EP${EP_PAD}: Generating audio..."
if bash "${SCRIPT_DIR}/generate-audio.sh" "$EP" 2>&1; then
log_info "EP${EP_PAD}: Audio generation OK"
else
log_error "EP${EP_PAD}: Audio generation FAILED, skipping"
FAILED=$((FAILED + 1))
continue
fi
# Step 2: Collect raw audio parts
RAW_DIR="${PROJECT_ROOT}/Episodios/Audios/raw"
RAW_FILES=()
for p in "${RAW_DIR}/ep_${EP_PAD}_p"*.wav; do
[[ -f "$p" ]] && RAW_FILES+=("$p")
done
if [[ ${#RAW_FILES[@]} -eq 0 ]]; then
# Try alternate path with accent
RAW_DIR="${PROJECT_ROOT}/Episodios/Audios/raw"
for p in "${RAW_DIR}/ep_${EP_PAD}_p"*.wav; do
[[ -f "$p" ]] && RAW_FILES+=("$p")
done
fi
if [[ ${#RAW_FILES[@]} -eq 0 ]]; then
log_error "EP${EP_PAD}: No raw audio files found, skipping post-production"
FAILED=$((FAILED + 1))
continue
fi
# Sort parts numerically
IFS=$'\n' RAW_FILES_SORTED=($(printf '%s\n' "${RAW_FILES[@]}" | sort)); unset IFS
# Step 3: Post-produce
log_info "EP${EP_PAD}: Post-producing ${#RAW_FILES_SORTED[@]} parts..."
if bash "${SCRIPT_DIR}/post-produce.sh" "$EP" "${RAW_FILES_SORTED[@]}" 2>&1; then
log_info "EP${EP_PAD}: Post-production OK"
else
log_error "EP${EP_PAD}: Post-production FAILED"
FAILED=$((FAILED + 1))
continue
fi
log_info "EP${EP_PAD}: DONE"
done
log_info "=== BATCH COMPLETE: ${DONE} processed, $((DONE - FAILED)) OK, ${FAILED} failed ==="
+134
View File
@@ -0,0 +1,134 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
EPISODES=(88 89 55 56 133 57 58 59 60 61 62 63 138 140 64)
SPLIT_MARKER="$(jq -r '.tts_split_marker' "${PROJECT_ROOT}/config/audio-settings.json")"
MAX_WORDS=1000
RAW_DIR="${PROJECT_ROOT}/Episodios/Audios/raw"
TOTAL=${#EPISODES[@]}
DONE=0
FAILED=0
mkdir -p "$RAW_DIR"
split_and_generate() {
local ep_num="$1"
local ep_pad
ep_pad="$(pad_number "$ep_num")"
# Find script file
local script_file=""
for f in "${PROJECT_ROOT}/Episodios/Episodio_${ep_pad}_"*.txt; do
[[ -f "$f" ]] && script_file="$f" && break
done
[[ -z "$script_file" ]] && { log_error "EP${ep_pad}: No script found"; return 1; }
local total_words
total_words="$(wc -w < "$script_file")"
log_info "EP${ep_pad}: ${total_words} words from $(basename "$script_file")"
# Check if final MP3 already exists
local existing_mp3
existing_mp3="$(ls "${PROJECT_ROOT}/Episodios/Audios/final/ep_${ep_pad}_"*.mp3 2>/dev/null | head -1)"
if [[ -n "$existing_mp3" ]]; then
log_info "EP${ep_pad}: Final MP3 already exists, SKIPPING"
return 0
fi
# Split
local tmp_dir
tmp_dir="$(mktemp -d)"
local marker_line
marker_line="$(grep -nE "$SPLIT_MARKER" "$script_file" | head -1 | cut -d: -f1 || echo "")"
if [[ -n "$marker_line" && "$marker_line" -gt 1 ]]; then
head -n "$((marker_line - 1))" "$script_file" > "${tmp_dir}/corpo.txt"
tail -n "+${marker_line}" "$script_file" > "${tmp_dir}/faq.txt"
else
cp "$script_file" "${tmp_dir}/corpo.txt"
: > "${tmp_dir}/faq.txt"
fi
local corpo_words
corpo_words="$(wc -w < "${tmp_dir}/corpo.txt")"
local num_parts=0
if [[ "$corpo_words" -gt "$MAX_WORDS" ]]; then
local corpo_lines mid check line split_line
corpo_lines="$(wc -l < "${tmp_dir}/corpo.txt")"
mid=$((corpo_lines / 2))
split_line="$mid"
for offset in 0 1 -1 2 -2 3 -3 5 -5 10 -10; do
check=$((mid + offset))
if [[ "$check" -gt 0 && "$check" -lt "$corpo_lines" ]]; then
line="$(sed -n "${check}p" "${tmp_dir}/corpo.txt")"
if [[ -z "$line" || "$line" =~ ^[[:space:]]*$ ]]; then
split_line="$check"
break
fi
fi
done
head -n "$split_line" "${tmp_dir}/corpo.txt" > "${tmp_dir}/part_1.txt"
tail -n "+$((split_line + 1))" "${tmp_dir}/corpo.txt" > "${tmp_dir}/part_2.txt"
if [[ -s "${tmp_dir}/faq.txt" ]]; then
cp "${tmp_dir}/faq.txt" "${tmp_dir}/part_3.txt"
num_parts=3
else
num_parts=2
fi
else
cp "${tmp_dir}/corpo.txt" "${tmp_dir}/part_1.txt"
if [[ -s "${tmp_dir}/faq.txt" ]]; then
cp "${tmp_dir}/faq.txt" "${tmp_dir}/part_2.txt"
num_parts=2
else
num_parts=1
fi
fi
log_info "EP${ep_pad}: Split into ${num_parts} parts"
# Generate TTS for each part (skip if raw already exists)
local raw_files=()
for ((i=1; i<=num_parts; i++)); do
local part_file="${tmp_dir}/part_${i}.txt"
local output="${RAW_DIR}/ep_${ep_pad}_p${i}.wav"
local part_words
part_words="$(wc -w < "$part_file")"
if [[ -f "$output" ]]; then
log_info "EP${ep_pad}: Part ${i}/${num_parts} already exists (${part_words} words), skipping"
else
log_info "EP${ep_pad}: Generating part ${i}/${num_parts} (${part_words} words)..."
python3 "${SCRIPT_DIR}/tts-single-part.py" "$part_file" "$output"
log_info "EP${ep_pad}: Part ${i} done"
fi
raw_files+=("$output")
done
# Post-produce
log_info "EP${ep_pad}: Post-producing..."
bash "${SCRIPT_DIR}/post-produce.sh" "$ep_num" "${raw_files[@]}"
log_info "EP${ep_pad}: COMPLETE"
rm -rf "$tmp_dir"
}
log_info "=== BATCH TTS: ${TOTAL} episodios ==="
for EP in "${EPISODES[@]}"; do
DONE=$((DONE + 1))
log_info "--- [${DONE}/${TOTAL}] EP$(pad_number "$EP") ---"
if split_and_generate "$EP"; then
log_info "EP$(pad_number "$EP"): OK"
else
log_error "EP$(pad_number "$EP"): FAILED"
FAILED=$((FAILED + 1))
fi
done
log_info "=== BATCH COMPLETE: ${TOTAL} processed, $((TOTAL - FAILED)) OK, ${FAILED} failed ==="
+168
View File
@@ -0,0 +1,168 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
usage() {
echo "Usage: $0 <episode_number>"
echo " Generates TTS audio via Gemini API for the given episode."
echo " Requires GEMINI_API_KEY environment variable."
echo " Splits text into blocks if needed (max ~1000 words per block)."
exit 1
}
[[ $# -lt 1 ]] && usage
EP_NUM="$1"
EP_PAD="$(pad_number "$EP_NUM")"
ensure_state_file
# Check API key
if [[ -z "${GEMINI_API_KEY:-}" ]]; then
log_error "GEMINI_API_KEY not set. Export it first."
exit 1
fi
# Load settings
SETTINGS_FILE="${PROJECT_ROOT}/config/audio-settings.json"
TTS_MODEL="$(jq -r '.tts_model' "$SETTINGS_FILE")"
TTS_VOICE="$(jq -r '.tts_voice' "$SETTINGS_FILE")"
MAX_WORDS="$(jq -r '.tts_max_words_per_block' "$SETTINGS_FILE")"
SPLIT_MARKER="$(jq -r '.tts_split_marker' "$SETTINGS_FILE")"
# Find script file
SCRIPT_FILE=""
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*.txt; do
if [[ -f "$f" ]]; then
SCRIPT_FILE="$f"
break
fi
done
if [[ -z "$SCRIPT_FILE" ]]; then
log_error "EP${EP_PAD}: No script .txt found"
exit 1
fi
TOTAL_WORDS="$(wc -w < "$SCRIPT_FILE")"
log_info "EP${EP_PAD}: Script has ${TOTAL_WORDS} words, max per block: ${MAX_WORDS}"
# Output directory
RAW_DIR="${PROJECT_ROOT}/Episodios/Audios/raw"
mkdir -p "$RAW_DIR"
# Split if needed
TMP_DIR="$(mktemp -d)"
trap 'rm -rf "$TMP_DIR"' EXIT
if [[ "$TOTAL_WORDS" -le "$MAX_WORDS" ]]; then
cp "$SCRIPT_FILE" "${TMP_DIR}/part_1.txt"
NUM_PARTS=1
else
# Smart split: first at FAQ marker, then subdivide large parts
MARKER_LINE="$(grep -nE "$SPLIT_MARKER" "$SCRIPT_FILE" | head -1 | cut -d: -f1 || echo "")"
if [[ -n "$MARKER_LINE" && "$MARKER_LINE" -gt 1 ]]; then
head -n "$((MARKER_LINE - 1))" "$SCRIPT_FILE" > "${TMP_DIR}/corpo.txt"
tail -n "+${MARKER_LINE}" "$SCRIPT_FILE" > "${TMP_DIR}/faq.txt"
else
cp "$SCRIPT_FILE" "${TMP_DIR}/corpo.txt"
: > "${TMP_DIR}/faq.txt"
fi
# Subdivide corpo if too long
CORPO_WORDS="$(wc -w < "${TMP_DIR}/corpo.txt")"
if [[ "$CORPO_WORDS" -gt "$MAX_WORDS" ]]; then
CORPO_LINES="$(wc -l < "${TMP_DIR}/corpo.txt")"
MID=$((CORPO_LINES / 2))
# Find nearest paragraph break
SPLIT_LINE="$MID"
for offset in 0 1 -1 2 -2 3 -3 5 -5 10 -10; do
CHECK=$((MID + offset))
if [[ "$CHECK" -gt 0 && "$CHECK" -lt "$CORPO_LINES" ]]; then
LINE="$(sed -n "${CHECK}p" "${TMP_DIR}/corpo.txt")"
if [[ -z "$LINE" || "$LINE" =~ ^[[:space:]]*$ ]]; then
SPLIT_LINE="$CHECK"
break
fi
fi
done
head -n "$SPLIT_LINE" "${TMP_DIR}/corpo.txt" > "${TMP_DIR}/part_1.txt"
tail -n "+$((SPLIT_LINE + 1))" "${TMP_DIR}/corpo.txt" > "${TMP_DIR}/part_2.txt"
if [[ -s "${TMP_DIR}/faq.txt" ]]; then
cp "${TMP_DIR}/faq.txt" "${TMP_DIR}/part_3.txt"
NUM_PARTS=3
else
NUM_PARTS=2
fi
else
cp "${TMP_DIR}/corpo.txt" "${TMP_DIR}/part_1.txt"
if [[ -s "${TMP_DIR}/faq.txt" ]]; then
cp "${TMP_DIR}/faq.txt" "${TMP_DIR}/part_2.txt"
NUM_PARTS=2
else
NUM_PARTS=1
fi
fi
fi
log_info "EP${EP_PAD}: Split into ${NUM_PARTS} parts"
# TTS style prompt
STYLE="Lê este texto em português de Portugal (PT-PT), com um tom enérgico, confiante, educativo, inspirador e profissional. Mantém o ritmo natural e envolvente, como se estivesses a conversar diretamente com o ouvinte, transmitindo proximidade e autoridade. Faz pequenas pausas para dar ênfase às ideias-chave e assegura que cada transição entre temas é fluida. Evita soar robótico ou demasiado formal; o objetivo é informar, motivar e criar ligação com quem está a ouvir."
# Generate each part
AUDIO_FILES=()
for ((i=1; i<=NUM_PARTS; i++)); do
PART_FILE="${TMP_DIR}/part_${i}.txt"
PART_TEXT="$(cat "$PART_FILE")"
PART_WORDS="$(wc -w < "$PART_FILE")"
OUTPUT="${RAW_DIR}/ep_${EP_PAD}_p${i}.wav"
log_info "EP${EP_PAD}: Generating part ${i}/${NUM_PARTS} (${PART_WORDS} words)..."
python3 -c "
import wave, sys
from google import genai
from google.genai import types
client = genai.Client(api_key='${GEMINI_API_KEY}')
with open('${PART_FILE}', 'r') as f:
text = f.read()
response = client.models.generate_content(
model='${TTS_MODEL}',
contents='''${STYLE}''' + '\n\n' + text,
config=types.GenerateContentConfig(
response_modalities=['AUDIO'],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name='${TTS_VOICE}')
)
),
),
)
data = response.candidates[0].content.parts[0].inline_data.data
with wave.open('${OUTPUT}', 'wb') as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(24000)
wf.writeframes(data)
duration = (len(data) // 2) / 24000
print(f'{duration:.0f}')
"
DURATION="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$OUTPUT")"
log_info "EP${EP_PAD}: Part ${i} done: ${DURATION%.*}s"
AUDIO_FILES+=("$OUTPUT")
done
# Update state
set_episode_field "$EP_NUM" "status" "audio_done"
# Print output files for post-produce.sh
log_info "EP${EP_PAD}: All ${NUM_PARTS} parts generated. Run post-production:"
echo "bash scripts/post-produce.sh ${EP_NUM} ${AUDIO_FILES[*]}"
+160
View File
@@ -0,0 +1,160 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
usage() {
echo "Usage: $0 <episode_number>"
echo " Generates podcast script and WP description for the given episode."
echo " Reads episode data from config/episode-guide-map.json."
echo " Skips script generation if .txt already exists."
exit 1
}
[[ $# -lt 1 ]] && usage
EP_NUM="$1"
EP_PAD="$(pad_number "$EP_NUM")"
ensure_state_file
# Load episode data from map
MAP_FILE="${PROJECT_ROOT}/config/episode-guide-map.json"
EP_DATA="$(jq -r --arg n "$EP_NUM" '.[$n] // empty' "$MAP_FILE")"
if [[ -z "$EP_DATA" ]]; then
log_error "Episode $EP_NUM not found in episode-guide-map.json"
exit 1
fi
GUIDE_TITLE="$(echo "$EP_DATA" | jq -r '.guide_title')"
PODCAST_TITLE="$(echo "$EP_DATA" | jq -r '.podcast_title')"
GUIDE_URL="$(echo "$EP_DATA" | jq -r '.guide_url')"
SAFE_TITLE="$(echo "$PODCAST_TITLE" | sed 's/[^a-zA-Z0-9]/_/g' | sed 's/__*/_/g' | sed 's/_$//')"
SCRIPT_FILE="${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_${SAFE_TITLE}.txt"
WP_FILE="${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_${SAFE_TITLE}_wp.json"
# Check if script already exists (reuse existing .txt files)
EXISTING_SCRIPT=""
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*.txt; do
if [[ -f "$f" ]]; then
EXISTING_SCRIPT="$f"
SCRIPT_FILE="$f"
break
fi
done
SERVICES="$(cat "${PROJECT_ROOT}/config/service-links.json")"
SCRIPT_PROMPT="$(cat "${PROJECT_ROOT}/prompts/generate-script.md")"
WP_PROMPT="$(cat "${PROJECT_ROOT}/prompts/generate-wp-description.md")"
# Step 1: Generate script (or skip if exists)
if [[ -n "$EXISTING_SCRIPT" ]]; then
log_info "EP${EP_PAD}: Script already exists at ${EXISTING_SCRIPT}, skipping generation"
else
log_info "EP${EP_PAD}: Generating podcast script for '${PODCAST_TITLE}'"
FULL_PROMPT="${SCRIPT_PROMPT}
---
## Dados do episodio
- Numero: ${EP_NUM}
- Titulo: ${PODCAST_TITLE}
- Guia de referencia: ${GUIDE_TITLE}
- URL do guia: ${GUIDE_URL}
## Links de servicos Descomplicar (usar quando contextual)
${SERVICES}
## Instrucao
Gera o guiao completo seguindo a estrutura obrigatoria acima. Output apenas o texto limpo."
echo "$FULL_PROMPT" | claude --print > "$SCRIPT_FILE"
log_info "EP${EP_PAD}: Script saved to ${SCRIPT_FILE}"
fi
# Step 2: Generate WP description
log_info "EP${EP_PAD}: Generating WordPress description"
SCRIPT_CONTENT="$(cat "$SCRIPT_FILE")"
WP_FULL_PROMPT="${WP_PROMPT}
---
## Dados do episodio
- Numero: ${EP_NUM}
- Titulo: ${PODCAST_TITLE}
- URL do guia relacionado: ${GUIDE_URL}
## Links de servicos Descomplicar (USAR APENAS ESTES — nunca inventar)
${SERVICES}
## Guiao do episodio (base para a descricao)
${SCRIPT_CONTENT}
## Instrucao
Gera o JSON com a descricao WordPress completa. Output APENAS JSON valido."
echo "$WP_FULL_PROMPT" | claude --print > "$WP_FILE"
# Validate and auto-repair JSON output
if ! python3 -c "import json; json.load(open('$WP_FILE'))" 2>/dev/null; then
log_warn "EP${EP_PAD}: WP JSON inválido — a tentar reparação automática..."
python3 - "$WP_FILE" << 'PYFIX'
import sys, re, json
filepath = sys.argv[1]
with open(filepath, 'r', encoding='utf-8') as f:
raw = f.read()
# Encontrar e corrigir aspas não escapadas no content_html
marker = '"content_html": "'
start = raw.find(marker)
if start == -1:
print("SKIP: content_html não encontrado")
sys.exit(1)
content_start = start + len(marker)
end_pattern = re.search(r'",\s*\n\s*"hashtags"', raw[content_start:])
if not end_pattern:
end_pattern = re.search(r'",\s*\n\s*"wp_tags"', raw[content_start:])
if not end_pattern:
print("SKIP: fim do content_html não encontrado")
sys.exit(1)
content_end = content_start + end_pattern.start()
raw_content = raw[content_start:content_end]
fixed_content = re.sub(r'(?<!\\)"', '\\"', raw_content)
fixed_raw = raw[:content_start] + fixed_content + raw[content_end:]
try:
json.loads(fixed_raw)
with open(filepath, 'w', encoding='utf-8') as f:
f.write(fixed_raw)
print("JSON reparado com sucesso")
except Exception as e:
print(f"Reparação falhou: {e}")
sys.exit(1)
PYFIX
if python3 -c "import json; json.load(open('$WP_FILE'))" 2>/dev/null; then
log_info "EP${EP_PAD}: JSON reparado automaticamente"
else
log_error "EP${EP_PAD}: JSON inválido após reparação — verificação manual necessária"
fi
else
log_info "EP${EP_PAD}: WP description saved to ${WP_FILE}"
fi
# Update state
STATUS="$(get_episode_status "$EP_NUM")"
if [[ "$STATUS" == "not_found" ]]; then
add_episode "$EP_NUM" "$PODCAST_TITLE" "$GUIDE_URL"
fi
set_episode_field "$EP_NUM" "status" "script_done"
set_episode_field "$EP_NUM" "script_path" "$(basename "$SCRIPT_FILE")"
set_episode_field "$EP_NUM" "wp_data_path" "$(basename "$WP_FILE")"
log_info "EP${EP_PAD}: Content generation complete"
+48
View File
@@ -0,0 +1,48 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
usage() {
echo "Usage: $0 [start_episode] [count]"
echo " Generates canva-bulk.csv for cover image generation."
echo " Default: next 7 episodes from pipeline state."
exit 1
}
ensure_state_file
START="${1:-$(jq -r '.next_episode' "$STATE_FILE")}"
COUNT="${2:-7}"
END=$((START + COUNT - 1))
MAP_FILE="${PROJECT_ROOT}/config/episode-guide-map.json"
CSV_FILE="${PROJECT_ROOT}/canva-bulk.csv"
echo "number,title,keyword" > "$CSV_FILE"
for ((ep=START; ep<=END; ep++)); do
EP_DATA="$(jq -r --arg n "$ep" '.[$n] // empty' "$MAP_FILE")"
if [[ -z "$EP_DATA" ]]; then
log_warn "Episode $ep not found in map, skipping"
continue
fi
TITLE="$(echo "$EP_DATA" | jq -r '.podcast_title')"
# Try to get keyword from WP JSON if it exists
EP_PAD="$(pad_number "$ep")"
KEYWORD=""
for wp_file in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*_wp.json; do
if [[ -f "$wp_file" ]]; then
KEYWORD="$(jq -r '.keyword // ""' "$wp_file" 2>/dev/null || echo "")"
break
fi
done
[[ -z "$KEYWORD" ]] && KEYWORD="$TITLE"
echo "${ep},\"${TITLE}\",\"${KEYWORD}\"" >> "$CSV_FILE"
done
log_info "Generated Canva CSV: ${CSV_FILE} (episodes ${START}-${END})"
echo "CSV saved to: ${CSV_FILE}"
+71
View File
@@ -0,0 +1,71 @@
#!/usr/bin/env bash
set -euo pipefail
PROJECT_ROOT="/media/ealmeida/Dados/Hub/05-Projectos/Podcast-Descomplicar-Digital"
STATE_FILE="${PROJECT_ROOT}/pipeline-state.json"
LOG_DIR="${PROJECT_ROOT}/logs"
# Auto-load .env if present
if [[ -f "${PROJECT_ROOT}/.env" ]]; then
set -a
source "${PROJECT_ROOT}/.env"
set +a
fi
log() {
local level="$1"; shift
local msg="$*"
local ts
ts="$(date '+%Y-%m-%d %H:%M:%S')"
mkdir -p "${LOG_DIR}"
echo "[${ts}] [${level}] ${msg}" | tee -a "${LOG_DIR}/pipeline-$(date '+%Y-%m-%d').log"
}
log_info() { log "INFO" "$@"; }
log_warn() { log "WARN" "$@"; }
log_error() { log "ERROR" "$@"; }
ensure_state_file() {
if [[ ! -f "${STATE_FILE}" ]]; then
echo '{"last_updated":"","next_episode":20,"next_publish_date":"","publish_time":"07:00","publish_days":["mon","tue","wed","thu","fri"],"episodes":[]}' | jq '.' > "${STATE_FILE}"
log_info "Created new pipeline-state.json"
fi
}
get_episode_status() {
local ep_num="$1"
jq -r --argjson n "$ep_num" '.episodes[] | select(.number == $n) | .status // "not_found"' "${STATE_FILE}" 2>/dev/null || echo "not_found"
}
set_episode_field() {
local ep_num="$1" field="$2" value="$3"
local tmp
tmp="$(mktemp)"
jq --argjson n "$ep_num" --arg f "$field" --arg v "$value" \
'(.episodes[] | select(.number == $n))[$f] = $v | .last_updated = (now | todate)' \
"${STATE_FILE}" > "$tmp" && mv "$tmp" "${STATE_FILE}"
}
add_episode() {
local ep_num="$1" title="$2" guide_url="$3"
local tmp
tmp="$(mktemp)"
jq --argjson n "$ep_num" --arg t "$title" --arg g "$guide_url" \
'.episodes += [{"number":$n,"title":$t,"source_guide_url":$g,"status":"pending","script_path":null,"wp_data_path":null,"audio_raw_path":null,"audio_final_path":null,"cover_path":null,"wp_post_id":null,"scheduled_date":null}] | .last_updated = (now | todate)' \
"${STATE_FILE}" > "$tmp" && mv "$tmp" "${STATE_FILE}"
}
next_weekday() {
local base_date="$1"
local d
d="$(date -d "${base_date}" '+%u')"
if [[ "$d" -ge 5 ]]; then
date -d "${base_date} + $((8 - d)) days" '+%Y-%m-%d'
else
date -d "${base_date} + 1 day" '+%Y-%m-%d'
fi
}
pad_number() {
printf '%03d' "$1"
}
+154
View File
@@ -0,0 +1,154 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
usage() {
echo "Usage: $0 <episode_number> <raw_audio_path> [raw_audio_path_2 ...]"
echo " Applies intro, outro, and loudness normalization to raw TTS audio."
echo " Accepts multiple audio parts that will be concatenated in order."
exit 1
}
[[ $# -lt 2 ]] && usage
EP_NUM="$1"; shift
RAW_PARTS=("$@")
EP_PAD="$(pad_number "$EP_NUM")"
ensure_state_file
for f in "${RAW_PARTS[@]}"; do
if [[ ! -f "$f" ]]; then
log_error "EP${EP_PAD}: Raw audio file not found: ${f}"
exit 1
fi
done
# Load audio settings
SETTINGS_FILE="${PROJECT_ROOT}/config/audio-settings.json"
INTRO_FILE="${PROJECT_ROOT}/$(jq -r '.intro_file' "$SETTINGS_FILE")"
OUTRO_FILE="${PROJECT_ROOT}/$(jq -r '.outro_file' "$SETTINGS_FILE")"
FADE_IN="$(jq -r '.fade_in_duration' "$SETTINGS_FILE")"
BG_DUR="$(jq -r '.background_duration' "$SETTINGS_FILE")"
BG_VOL="$(jq -r '.background_volume_db' "$SETTINGS_FILE")"
FADE_OUT="$(jq -r '.fade_out_duration' "$SETTINGS_FILE")"
LUFS="$(jq -r '.loudness_target_lufs' "$SETTINGS_FILE")"
BITRATE="$(jq -r '.export_bitrate' "$SETTINGS_FILE")"
SAMPLE_RATE="$(jq -r '.export_sample_rate' "$SETTINGS_FILE")"
MIN_DUR="$(jq -r '.min_duration_minutes' "$SETTINGS_FILE")"
MAX_DUR="$(jq -r '.max_duration_minutes' "$SETTINGS_FILE")"
# Verify input files
for f in "$INTRO_FILE" "$OUTRO_FILE"; do
if [[ ! -f "$f" ]]; then
log_error "EP${EP_PAD}: Required audio file not found: ${f}"
exit 1
fi
done
# Create output directory
OUTPUT_DIR="${PROJECT_ROOT}/Episodios/Audios/final"
mkdir -p "$OUTPUT_DIR"
# Get podcast title for filename
MAP_FILE="${PROJECT_ROOT}/config/episode-guide-map.json"
PODCAST_TITLE="$(jq -r --arg n "$EP_NUM" '.[$n].podcast_title // "episodio"' "$MAP_FILE")"
SAFE_TITLE="$(echo "$PODCAST_TITLE" | sed 's/[^a-zA-Z0-9]/-/g' | sed 's/--*/-/g' | sed 's/-$//' | tr '[:upper:]' '[:lower:]')"
OUTPUT_FILE="${OUTPUT_DIR}/ep_${EP_PAD}_${SAFE_TITLE}.mp3"
# Temp directory
TMP_DIR="$(mktemp -d)"
trap 'rm -rf "$TMP_DIR"' EXIT
log_info "EP${EP_PAD}: Starting post-production"
# Concatenate raw audio parts if multiple
if [[ "${#RAW_PARTS[@]}" -gt 1 ]]; then
log_info "EP${EP_PAD}: Concatenating ${#RAW_PARTS[@]} audio parts"
CONCAT_LIST="${TMP_DIR}/concat_list.txt"
for part in "${RAW_PARTS[@]}"; do
# Normalize each part to same format first
PART_BASE="$(basename "$part" | sed 's/\.[^.]*$//')"
ffmpeg -y -v quiet -i "$part" -af "highshelf=f=4000:g=-6" -ar "$SAMPLE_RATE" -ac 2 "${TMP_DIR}/${PART_BASE}_norm.wav"
echo "file '${TMP_DIR}/${PART_BASE}_norm.wav'" >> "$CONCAT_LIST"
done
ffmpeg -y -v quiet -f concat -safe 0 -i "$CONCAT_LIST" -c copy "${TMP_DIR}/raw_combined.wav"
RAW_COMBINED="${TMP_DIR}/raw_combined.wav"
else
RAW_COMBINED="${RAW_PARTS[0]}"
fi
# Get duration of raw audio
RAW_DUR="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$RAW_COMBINED")"
RAW_DUR_INT="${RAW_DUR%.*}"
log_info "EP${EP_PAD}: Raw audio duration: ${RAW_DUR_INT}s"
# Create intro background: The Inspiring at low volume for first 30s
ffmpeg -y -v quiet \
-i "$INTRO_FILE" \
-af "afade=t=in:st=0:d=${FADE_IN},volume=${BG_VOL}dB,afade=t=out:st=$((BG_DUR - 3)):d=3" \
-t "$BG_DUR" \
-ar "$SAMPLE_RATE" -ac 2 \
"${TMP_DIR}/intro_bg.wav"
# Prepare raw audio (ensure stereo, correct sample rate, de-ess)
ffmpeg -y -v quiet \
-i "$RAW_COMBINED" \
-af "highshelf=f=4000:g=-6" \
-ar "$SAMPLE_RATE" -ac 2 \
"${TMP_DIR}/tts_stereo.wav"
# Mix intro background with beginning of TTS
ffmpeg -y -v quiet \
-i "${TMP_DIR}/tts_stereo.wav" \
-i "${TMP_DIR}/intro_bg.wav" \
-filter_complex "[1]apad=whole_dur=${RAW_DUR_INT}[bg];[0][bg]amix=inputs=2:duration=first:dropout_transition=3[mixed]" \
-map "[mixed]" \
-ar "$SAMPLE_RATE" -ac 2 \
"${TMP_DIR}/tts_with_intro.wav"
# Get outro duration
OUTRO_DUR="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$OUTRO_FILE")"
OUTRO_DUR_INT="${OUTRO_DUR%.*}"
# Create ending: outro audio + The Inspiring fade-out underneath
ffmpeg -y -v quiet \
-i "$OUTRO_FILE" \
-i "$INTRO_FILE" \
-filter_complex "[1]volume=${BG_VOL}dB,afade=t=out:st=$((OUTRO_DUR_INT - FADE_OUT)):d=${FADE_OUT},atrim=0:${OUTRO_DUR_INT}[music];[0][music]amix=inputs=2:duration=first[out]" \
-map "[out]" \
-ar "$SAMPLE_RATE" -ac 2 \
"${TMP_DIR}/outro_mixed.wav"
# Concatenate TTS (with intro) + outro
ffmpeg -y -v quiet \
-i "${TMP_DIR}/tts_with_intro.wav" \
-i "${TMP_DIR}/outro_mixed.wav" \
-filter_complex "[0][1]concat=n=2:v=0:a=1[out]" \
-map "[out]" \
-ar "$SAMPLE_RATE" -ac 2 \
"${TMP_DIR}/full_episode.wav"
# Normalize loudness and export as MP3
ffmpeg -y -v quiet \
-i "${TMP_DIR}/full_episode.wav" \
-af "loudnorm=I=${LUFS}:TP=-1.5:LRA=11" \
-ar "$SAMPLE_RATE" -ac 2 \
-b:a "$BITRATE" \
"$OUTPUT_FILE"
# Validate duration
FINAL_DUR="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$OUTPUT_FILE")"
FINAL_MIN="$(echo "${FINAL_DUR%.*} / 60" | bc)"
log_info "EP${EP_PAD}: Final duration: ${FINAL_MIN} minutes (${FINAL_DUR%.*}s)"
if [[ "$FINAL_MIN" -lt "$MIN_DUR" ]] || [[ "$FINAL_MIN" -gt "$MAX_DUR" ]]; then
log_warn "EP${EP_PAD}: Duration ${FINAL_MIN}min is outside expected range (${MIN_DUR}-${MAX_DUR}min)"
fi
# Update state
set_episode_field "$EP_NUM" "status" "produced"
set_episode_field "$EP_NUM" "audio_final_path" "$(basename "$OUTPUT_FILE")"
log_info "EP${EP_PAD}: Post-production complete -> ${OUTPUT_FILE}"
+140
View File
@@ -0,0 +1,140 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
usage() {
echo "Usage: $0 <episode_number> <scheduled_date>"
echo " Publishes episode to WordPress via wp-cli over SSH."
echo " scheduled_date format: YYYY-MM-DD"
echo " Requires: final audio, WP JSON, cover image (optional)."
exit 1
}
[[ $# -lt 2 ]] && usage
EP_NUM="$1"
SCHED_DATE="$2"
EP_PAD="$(pad_number "$EP_NUM")"
PUBLISH_TIME="$(jq -r '.publish_time' "$STATE_FILE")"
ensure_state_file
# Find required files
AUDIO_FILE=""
for f in "${PROJECT_ROOT}/Episodios/Audios/final/ep_${EP_PAD}_"*.mp3; do
[[ -f "$f" ]] && AUDIO_FILE="$f" && break
done
WP_JSON=""
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*_wp.json; do
[[ -f "$f" ]] && WP_JSON="$f" && break
done
COVER_FILE=""
while IFS= read -r -d '' f; do
COVER_FILE="$f"
break
done < <(find "${PROJECT_ROOT}/Episodios/Capas_PodCast" -name "ep_${EP_PAD}_*" \( -name "*.jpg" -o -name "*.png" \) -print0 2>/dev/null)
# Validate required files
if [[ -z "$AUDIO_FILE" || ! -f "$AUDIO_FILE" ]]; then
log_error "EP${EP_PAD}: Audio file not found in Audios/final/"
exit 1
fi
if [[ -z "$WP_JSON" || ! -f "$WP_JSON" ]]; then
log_error "EP${EP_PAD}: WP JSON file not found"
exit 1
fi
if [[ -z "$COVER_FILE" ]]; then
log_warn "EP${EP_PAD}: Cover image not found, publishing without featured image"
fi
# Read WP data
TITLE="$(jq -r '.title' "$WP_JSON")"
CONTENT="$(jq -r '.content_html' "$WP_JSON")"
META_DESC="$(jq -r '.meta_description' "$WP_JSON")"
TAGS="$(jq -r '.wp_tags | join(",")' "$WP_JSON")"
# Get audio metadata
DURATION="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$AUDIO_FILE")"
DUR_MIN=$((${DURATION%.*} / 60))
DUR_SEC=$((${DURATION%.*} % 60))
DURATION_FMT="$(printf '%d:%02d' "$DUR_MIN" "$DUR_SEC")"
FILESIZE="$(du -h "$AUDIO_FILE" | cut -f1)"
FILESIZE_RAW="$(stat -c%s "$AUDIO_FILE")"
log_info "EP${EP_PAD}: Publishing '${TITLE}' scheduled for ${SCHED_DATE} ${PUBLISH_TIME}"
# Generate wp-cli commands for SSH execution
CMDS_FILE="${PROJECT_ROOT}/logs/publish_${EP_PAD}_commands.sh"
mkdir -p "${PROJECT_ROOT}/logs"
AUDIO_BASENAME="$(basename "$AUDIO_FILE")"
YEAR="$(date -d "$SCHED_DATE" '+%Y')"
MONTH="$(date -d "$SCHED_DATE" '+%m')"
UPLOAD_PATH="wp-content/uploads/podcast/${YEAR}/${MONTH}"
# Escape single quotes in content
ESCAPED_CONTENT="$(echo "$CONTENT" | sed "s/'/'\\\\''/g")"
ESCAPED_META="$(echo "$META_DESC" | sed "s/'/'\\\\''/g")"
ESCAPED_TITLE="$(echo "$TITLE" | sed "s/'/'\\\\''/g")"
cat > "$CMDS_FILE" << CMDEOF
#!/usr/bin/env bash
# Auto-generated publish commands for EP${EP_PAD}
# Run via SSH MCP on server (user: ealmeida, path: /home/ealmeida/public_html)
set -euo pipefail
WP_PATH="/home/ealmeida/public_html"
UPLOAD_DIR="\${WP_PATH}/${UPLOAD_PATH}"
# 1. Create upload directory
mkdir -p "\${UPLOAD_DIR}"
# 2. Audio file must be uploaded to server first (via sftp MCP)
# Source: ${AUDIO_FILE}
# Target: \${UPLOAD_DIR}/${AUDIO_BASENAME}
# 3. Create podcast post
POST_ID=\$(wp post create \\
--post_type=podcast \\
--post_title='${ESCAPED_TITLE}' \\
--post_status=future \\
--post_date='${SCHED_DATE} ${PUBLISH_TIME}:00' \\
--tags_input='${TAGS}' \\
--porcelain \\
--allow-root \\
--path="\${WP_PATH}")
echo "Created post: \${POST_ID}"
# 4. Add content (separate to avoid shell escaping issues)
wp post update \${POST_ID} --post_content='${ESCAPED_CONTENT}' --allow-root --path="\${WP_PATH}"
# 5. Set SSP meta fields
wp post meta update \${POST_ID} episode_type audio --allow-root --path="\${WP_PATH}"
wp post meta update \${POST_ID} audio_file "https://descomplicar.pt/${UPLOAD_PATH}/${AUDIO_BASENAME}" --allow-root --path="\${WP_PATH}"
wp post meta update \${POST_ID} duration "${DURATION_FMT}" --allow-root --path="\${WP_PATH}"
wp post meta update \${POST_ID} filesize "${FILESIZE}" --allow-root --path="\${WP_PATH}"
wp post meta update \${POST_ID} filesize_raw "${FILESIZE_RAW}" --allow-root --path="\${WP_PATH}"
# 6. Set Rank Math meta description
wp post meta update \${POST_ID} rank_math_description '${ESCAPED_META}' --allow-root --path="\${WP_PATH}"
# 7. Fix permissions
chown -R ealmeida:ealmeida "\${UPLOAD_DIR}"
echo "EP${EP_PAD} published as post \${POST_ID}, scheduled for ${SCHED_DATE} ${PUBLISH_TIME}"
CMDEOF
chmod +x "$CMDS_FILE"
# Update state
set_episode_field "$EP_NUM" "status" "published"
set_episode_field "$EP_NUM" "scheduled_date" "$SCHED_DATE"
log_info "EP${EP_PAD}: Publish commands saved to ${CMDS_FILE}"
log_info "EP${EP_PAD}: Upload audio via SFTP, then run commands via SSH"
+292
View File
@@ -0,0 +1,292 @@
#!/usr/bin/env bash
set -euo pipefail
# schedule-episode.sh — Agenda episódio completo no WordPress
# Envia MP3 + capa via SCP, importa media, cria post com todos os metas
#
# Uso: ./scripts/schedule-episode.sh <ep_num> <YYYY-MM-DD> [--dry-run]
#
# Requisitos locais:
# - MP3 em Episodios/Audios/final/ep_NNN_*.mp3
# - Capa em banco-media: capas-geradas/podcast/podcast-epNNN-*.png
# - ffprobe (para duração)
#
# Requisitos servidor:
# - wp-cli com --allow-root
# - SSH porta 9443, chave ~/.ssh/id_ed25519
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
# SSH config
SSH_KEY="${HOME}/.ssh/id_ed25519"
SSH_PORT=9443
SSH_HOST="server.descomplicar.pt"
SSH_USER="root"
SSH_OPTS="-o IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
WP_PATH="/home/ealmeida/public_html"
WP_OWNER="ealmeida:ealmeida"
SITE_URL="https://descomplicar.pt"
SERIES_SLUG="podcast-descomplicar-digital"
# Banco de media (capas)
CAPAS_DIR="/media/ealmeida/Dados/Hub/06-Operacoes/Conteúdos/banco-media/capas-geradas/podcast"
DRY_RUN=false
usage() {
echo "Usage: $0 <ep_num> <YYYY-MM-DD> [--dry-run]"
echo " Agenda episodio completo no WordPress (audio + capa + metas + SEO)"
exit 1
}
ssh_cmd() {
SSH_AUTH_SOCK= ssh -p "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "${SSH_USER}@${SSH_HOST}" "$@"
}
scp_file() {
SSH_AUTH_SOCK= scp -P "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "$1" "${SSH_USER}@${SSH_HOST}:$2"
}
[[ $# -lt 2 ]] && usage
EP_NUM="$1"
SCHED_DATE="$2"
[[ "${3:-}" == "--dry-run" ]] && DRY_RUN=true
EP_PAD="$(pad_number "$EP_NUM")"
# === 1. Encontrar ficheiros locais ===
AUDIO_FILE=""
for f in "${PROJECT_ROOT}/Episodios/Audios/final/ep_${EP_PAD}_"*.mp3; do
[[ -f "$f" ]] && AUDIO_FILE="$f" && break
done
COVER_FILE=""
for f in "${CAPAS_DIR}/podcast-ep${EP_PAD}-"*.png; do
[[ -f "$f" ]] && COVER_FILE="$f" && break
done
GUIDE_FILE=""
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*.txt; do
[[ -f "$f" ]] && GUIDE_FILE="$f" && break
done
# Validar
[[ -z "$AUDIO_FILE" ]] && log_error "EP${EP_PAD}: MP3 nao encontrado em Episodios/Audios/final/" && exit 1
[[ -z "$COVER_FILE" ]] && log_error "EP${EP_PAD}: Capa PNG nao encontrada em ${CAPAS_DIR}/" && exit 1
[[ -z "$GUIDE_FILE" ]] && log_warn "EP${EP_PAD}: Guiao .txt nao encontrado (conteudo WP ficara vazio)"
# === 2. Extrair metadata do audio ===
DURATION_RAW=$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$AUDIO_FILE")
DUR_SEC_TOTAL=${DURATION_RAW%.*}
DUR_MIN=$((DUR_SEC_TOTAL / 60))
DUR_SEC=$((DUR_SEC_TOTAL % 60))
DURATION_FMT="$(printf '%d:%02d' "$DUR_MIN" "$DUR_SEC")"
FILESIZE_H="$(du -h "$AUDIO_FILE" | cut -f1)"
FILESIZE_RAW="$(stat -c%s "$AUDIO_FILE")"
AUDIO_BASENAME="$(basename "$AUDIO_FILE")"
# Extrair titulo do nome do ficheiro do guiao
TITLE=""
if [[ -n "$GUIDE_FILE" ]]; then
TITLE="$(basename "$GUIDE_FILE" .txt | sed 's/^Episodio_[0-9]*_//' | tr '_' ' ')"
fi
# Fallback do audio
[[ -z "$TITLE" ]] && TITLE="$(basename "$AUDIO_FILE" .mp3 | sed 's/^ep_[0-9]*_//' | tr '-' ' ')"
# Upload paths
YEAR="$(date -d "$SCHED_DATE" '+%Y')"
MONTH="$(date -d "$SCHED_DATE" '+%m')"
AUDIO_REMOTE_DIR="${WP_PATH}/wp-content/uploads/podcast/${YEAR}/${MONTH}"
AUDIO_URL="${SITE_URL}/wp-content/uploads/podcast/${YEAR}/${MONTH}/${AUDIO_BASENAME}"
log_info "EP${EP_PAD}: '${TITLE}' | ${DURATION_FMT} | ${FILESIZE_H} | ${SCHED_DATE} 07:00"
if $DRY_RUN; then
log_info "[DRY-RUN] Audio: ${AUDIO_FILE}"
log_info "[DRY-RUN] Capa: ${COVER_FILE}"
log_info "[DRY-RUN] URL audio: ${AUDIO_URL}"
log_info "[DRY-RUN] Titulo: ${TITLE}"
exit 0
fi
# === 3. Enviar MP3 para o servidor ===
log_info "EP${EP_PAD}: Enviar MP3..."
ssh_cmd "mkdir -p '${AUDIO_REMOTE_DIR}'"
scp_file "$AUDIO_FILE" "${AUDIO_REMOTE_DIR}/"
ssh_cmd "chown ${WP_OWNER} '${AUDIO_REMOTE_DIR}/${AUDIO_BASENAME}'"
log_info "EP${EP_PAD}: MP3 enviado -> ${AUDIO_REMOTE_DIR}/${AUDIO_BASENAME}"
# === 4. Enviar capa e importar no WP ===
log_info "EP${EP_PAD}: Enviar capa..."
COVER_BASENAME="$(basename "$COVER_FILE")"
ssh_cmd "mkdir -p /tmp/podcast-upload"
scp_file "$COVER_FILE" "/tmp/podcast-upload/${COVER_BASENAME}"
ssh_cmd "chown ${WP_OWNER} '/tmp/podcast-upload/${COVER_BASENAME}'"
ATTACH_ID=$(ssh_cmd "cd '${WP_PATH}' && wp media import '/tmp/podcast-upload/${COVER_BASENAME}' --title='${COVER_BASENAME%.png}' --porcelain --allow-root 2>/dev/null")
ssh_cmd "rm -f '/tmp/podcast-upload/${COVER_BASENAME}'"
if [[ -z "$ATTACH_ID" ]]; then
log_error "EP${EP_PAD}: Falha ao importar capa no WP"
exit 1
fi
log_info "EP${EP_PAD}: Capa importada (attach_id: ${ATTACH_ID})"
# === 5. Criar post podcast agendado ===
log_info "EP${EP_PAD}: Criar post..."
POST_ID=$(ssh_cmd "cd '${WP_PATH}' && wp post create \
--post_type=podcast \
--post_title='$(echo "$TITLE" | sed "s/'/'\\\\''/g")' \
--post_status=future \
--post_date='${SCHED_DATE} 07:00:00' \
--porcelain \
--allow-root 2>/dev/null")
if [[ -z "$POST_ID" ]]; then
log_error "EP${EP_PAD}: Falha ao criar post"
exit 1
fi
log_info "EP${EP_PAD}: Post criado (ID: ${POST_ID})"
# === 6. Associar serie e featured image ===
ssh_cmd "cd '${WP_PATH}' && \
wp post term set ${POST_ID} series '${SERIES_SLUG}' --allow-root 2>/dev/null && \
wp post meta update ${POST_ID} _thumbnail_id ${ATTACH_ID} --allow-root 2>/dev/null"
# === 7. Metas SSP (Seriously Simple Podcasting) ===
ssh_cmd "cd '${WP_PATH}' && \
wp post meta update ${POST_ID} episode_type audio --allow-root 2>/dev/null && \
wp post meta update ${POST_ID} audio_file '${AUDIO_URL}' --allow-root 2>/dev/null && \
wp post meta update ${POST_ID} duration '${DURATION_FMT}' --allow-root 2>/dev/null && \
wp post meta update ${POST_ID} filesize '${FILESIZE_H}' --allow-root 2>/dev/null && \
wp post meta update ${POST_ID} filesize_raw '${FILESIZE_RAW}' --allow-root 2>/dev/null && \
wp post meta update ${POST_ID} date_recorded '${SCHED_DATE} 07:00:00' --allow-root 2>/dev/null"
# === 8. Aplicar conteudo WP + Rank Math + tags (se _wp.json existir) ===
WP_JSON=""
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*_wp.json; do
[[ -f "$f" ]] && WP_JSON="$f" && break
done
if [[ -n "$WP_JSON" ]]; then
# Auto-reparar JSON se necessário
if ! python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
log_warn "EP${EP_PAD}: JSON inválido — a tentar reparação automática..."
python3 - "$WP_JSON" << 'PYFIX'
import sys, re, json
filepath = sys.argv[1]
with open(filepath, 'r') as f:
raw = f.read()
marker = '"content_html": "'
start = raw.find(marker)
if start == -1: sys.exit(1)
content_start = start + len(marker)
end_pattern = re.search(r'",\s*\n\s*"hashtags"', raw[content_start:]) or re.search(r'",\s*\n\s*"wp_tags"', raw[content_start:])
if not end_pattern: sys.exit(1)
content_end = content_start + end_pattern.start()
fixed = raw[:content_start] + re.sub(r'(?<!\\)"', '\\"', raw[content_start:content_end]) + raw[content_end:]
json.loads(fixed)
with open(filepath, 'w') as f: f.write(fixed)
PYFIX
log_info "EP${EP_PAD}: JSON reparado"
fi
fi
if [[ -n "$WP_JSON" ]] && python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
log_info "EP${EP_PAD}: Aplicar conteudo WP de $(basename "$WP_JSON")"
WP_CONTENT="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('content_html',''))" 2>/dev/null)"
WP_META="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('meta_description',''))" 2>/dev/null)"
WP_KEYWORD="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('keyword',''))" 2>/dev/null)"
WP_TAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(','.join(d.get('wp_tags',[])))" 2>/dev/null)"
WP_HASHTAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(' '.join(d.get('hashtags',[])))" 2>/dev/null)"
# Excerpt = primeira linha do meta + hashtags
WP_EXCERPT=""
if [[ -n "$WP_META" && -n "$WP_HASHTAGS" ]]; then
WP_EXCERPT="${WP_META}
${WP_HASHTAGS}"
fi
# Aplicar conteudo HTML
if [[ -n "$WP_CONTENT" ]]; then
ESCAPED_CONTENT="$(echo "$WP_CONTENT" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_content='${ESCAPED_CONTENT}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: post_content aplicado"
fi
# Aplicar excerpt
if [[ -n "$WP_EXCERPT" ]]; then
ESCAPED_EXCERPT="$(echo "$WP_EXCERPT" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_excerpt='${ESCAPED_EXCERPT}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: post_excerpt aplicado"
fi
# Aplicar tags
if [[ -n "$WP_TAGS" ]]; then
ssh_cmd "cd '${WP_PATH}' && wp post term set ${POST_ID} post_tag ${WP_TAGS} --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: tags aplicadas"
fi
# Rank Math: meta description + focus keyword
if [[ -n "$WP_META" ]]; then
ESCAPED_META="$(echo "$WP_META" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_description '${ESCAPED_META}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: rank_math_description aplicado"
fi
if [[ -n "$WP_KEYWORD" ]]; then
ESCAPED_KW="$(echo "$WP_KEYWORD" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_focus_keyword '${ESCAPED_KW}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: rank_math_focus_keyword aplicado"
fi
# Rank Math: SEO title (preferir seo_title do JSON, fallback para title + sufixo)
WP_SEO_TITLE="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('seo_title',''))" 2>/dev/null)"
if [[ -z "$WP_SEO_TITLE" ]]; then
WP_TITLE_FALLBACK="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('title',''))" 2>/dev/null)"
[[ -n "$WP_TITLE_FALLBACK" ]] && WP_SEO_TITLE="${WP_TITLE_FALLBACK} | Podcast Descomplicar Digital"
fi
if [[ -n "$WP_SEO_TITLE" ]]; then
ESCAPED_SEO_TITLE="$(echo "$WP_SEO_TITLE" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_title '${ESCAPED_SEO_TITLE}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: rank_math_title aplicado"
fi
# Slug optimizado (preferir slug do JSON)
WP_SLUG="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('slug',''))" 2>/dev/null)"
if [[ -n "$WP_SLUG" ]]; then
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_name='${WP_SLUG}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: slug actualizado para ${WP_SLUG}"
fi
else
log_info "EP${EP_PAD}: PENDENTE — WP JSON nao encontrado, gerar via generate-content.sh"
fi
# === 9. Corrigir permissoes uploads ===
ssh_cmd "chown -R ${WP_OWNER} '${AUDIO_REMOTE_DIR}/' '${WP_PATH}/wp-content/uploads/${YEAR}/${MONTH}/' 2>/dev/null" || true
log_info "EP${EP_PAD}: Agendado para ${SCHED_DATE} 07:00 (post ${POST_ID})"
# Actualizar pipeline-state.json
EP_TITLE="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('title',''))" 2>/dev/null || echo "")"
AUDIO_BASENAME="$(basename "$AUDIO_FILE")"
jq --argjson n "$EP_NUM" --arg t "$EP_TITLE" --arg a "Episodios/Audios/final/${AUDIO_BASENAME}" --arg s "$SCHED_DATE" \
'if [.episodes[] | select(.num == $n)] | length > 0
then (.episodes[] | select(.num == $n)) |= . + {status: "ready", title: $t, audio: $a, scheduled: $s}
else .episodes += [{num: ($n | tonumber), title: $t, audio: $a, scheduled: $s, status: "ready"}]
end | .last_updated = (now | todate)' \
"${STATE_FILE}" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "${STATE_FILE}"
log_info "EP${EP_PAD}: pipeline-state.json actualizado"
echo "${POST_ID}"
+82
View File
@@ -0,0 +1,82 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
usage() {
echo "Usage: $0 <input_file> <output_dir>"
echo " Splits a podcast script into blocks for TTS generation."
echo " Splits at the FAQ transition marker or at word limit."
echo " Creates part_1.txt, part_2.txt, etc. in output_dir."
exit 1
}
[[ $# -lt 2 ]] && usage
INPUT_FILE="$1"
OUTPUT_DIR="$2"
if [[ ! -f "$INPUT_FILE" ]]; then
log_error "Input file not found: ${INPUT_FILE}"
exit 1
fi
mkdir -p "$OUTPUT_DIR"
SETTINGS_FILE="${PROJECT_ROOT}/config/audio-settings.json"
MAX_WORDS="$(jq -r '.tts_max_words_per_block' "$SETTINGS_FILE")"
SPLIT_MARKER="$(jq -r '.tts_split_marker' "$SETTINGS_FILE")"
TOTAL_WORDS="$(wc -w < "$INPUT_FILE")"
log_info "Total words: ${TOTAL_WORDS}, max per block: ${MAX_WORDS}"
if [[ "$TOTAL_WORDS" -le "$MAX_WORDS" ]]; then
# No split needed
cp "$INPUT_FILE" "${OUTPUT_DIR}/part_1.txt"
log_info "No split needed (${TOTAL_WORDS} words). Created part_1.txt"
echo "1"
exit 0
fi
# Try to split at the FAQ marker
MARKER_LINE="$(grep -nE "$SPLIT_MARKER" "$INPUT_FILE" | head -1 | cut -d: -f1)"
if [[ -n "$MARKER_LINE" && "$MARKER_LINE" -gt 1 ]]; then
# Split at the marker line (FAQ section starts here)
head -n "$((MARKER_LINE - 1))" "$INPUT_FILE" > "${OUTPUT_DIR}/part_1.txt"
tail -n "+${MARKER_LINE}" "$INPUT_FILE" > "${OUTPUT_DIR}/part_2.txt"
WORDS_1="$(wc -w < "${OUTPUT_DIR}/part_1.txt")"
WORDS_2="$(wc -w < "${OUTPUT_DIR}/part_2.txt")"
log_info "Split at FAQ marker (line ${MARKER_LINE}): part_1=${WORDS_1} words, part_2=${WORDS_2} words"
echo "2"
else
# No marker found — split at approximate midpoint by paragraph
TOTAL_LINES="$(wc -l < "$INPUT_FILE")"
MID_LINE=$((TOTAL_LINES / 2))
# Find nearest empty line (paragraph break) near midpoint
SPLIT_LINE=""
for offset in 0 1 -1 2 -2 3 -3 5 -5 10 -10; do
CHECK=$((MID_LINE + offset))
if [[ "$CHECK" -gt 0 && "$CHECK" -lt "$TOTAL_LINES" ]]; then
LINE_CONTENT="$(sed -n "${CHECK}p" "$INPUT_FILE")"
if [[ -z "$LINE_CONTENT" || "$LINE_CONTENT" =~ ^[[:space:]]*$ ]]; then
SPLIT_LINE="$CHECK"
break
fi
fi
done
if [[ -z "$SPLIT_LINE" ]]; then
SPLIT_LINE="$MID_LINE"
fi
head -n "$SPLIT_LINE" "$INPUT_FILE" > "${OUTPUT_DIR}/part_1.txt"
tail -n "+$((SPLIT_LINE + 1))" "$INPUT_FILE" > "${OUTPUT_DIR}/part_2.txt"
WORDS_1="$(wc -w < "${OUTPUT_DIR}/part_1.txt")"
WORDS_2="$(wc -w < "${OUTPUT_DIR}/part_2.txt")"
log_info "Split at paragraph break (line ${SPLIT_LINE}): part_1=${WORDS_1} words, part_2=${WORDS_2} words"
echo "2"
fi
+115
View File
@@ -0,0 +1,115 @@
#!/usr/bin/env bash
set -euo pipefail
# transfer-to-server.sh — Transfere ficheiros do desktop para o CWP server via SCP
# Utiliza a chave SSH em ~/.ssh/id_ed25519, porta 9443
# Permissoes finais: ealmeida:ealmeida
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
SSH_KEY="${HOME}/.ssh/id_ed25519"
SSH_PORT=9443
SSH_HOST="server.descomplicar.pt"
SSH_USER="root"
SSH_OPTS="-o IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
WP_PATH="/home/ealmeida/public_html"
usage() {
echo "Usage: $0 <type> <local_file> [remote_subdir]"
echo ""
echo "Types:"
echo " cover <file.png> -> wp-content/uploads/YYYY/MM/"
echo " audio <file.mp3> [YYYY/MM] -> wp-content/uploads/podcast/YYYY/MM/"
echo " batch <dir_of_files> <type> -> envia todos os ficheiros do directorio"
echo ""
echo "Examples:"
echo " $0 cover /path/to/ep001.png"
echo " $0 audio /path/to/ep020.mp3 2026/04"
echo " $0 batch /path/to/capas/ cover"
exit 1
}
scp_file() {
local src="$1" dst="$2"
SSH_AUTH_SOCK= scp -P "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "$src" "${SSH_USER}@${SSH_HOST}:${dst}"
}
ssh_cmd() {
SSH_AUTH_SOCK= ssh -p "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "${SSH_USER}@${SSH_HOST}" "$@"
}
[[ $# -lt 2 ]] && usage
TYPE="$1"
shift
case "$TYPE" in
cover)
LOCAL_FILE="$1"
[[ ! -f "$LOCAL_FILE" ]] && log_error "Ficheiro nao encontrado: $LOCAL_FILE" && exit 1
YEAR="$(date '+%Y')"
MONTH="$(date '+%m')"
REMOTE_DIR="${WP_PATH}/wp-content/uploads/${YEAR}/${MONTH}"
ssh_cmd "mkdir -p '${REMOTE_DIR}'"
scp_file "$LOCAL_FILE" "${REMOTE_DIR}/"
BASENAME="$(basename "$LOCAL_FILE")"
ssh_cmd "chown ealmeida:ealmeida '${REMOTE_DIR}/${BASENAME}'"
log_info "Cover enviada: ${BASENAME} -> ${REMOTE_DIR}/"
;;
audio)
LOCAL_FILE="$1"
[[ ! -f "$LOCAL_FILE" ]] && log_error "Ficheiro nao encontrado: $LOCAL_FILE" && exit 1
if [[ $# -ge 2 ]]; then
SUBDIR="$2"
else
YEAR="$(date '+%Y')"
MONTH="$(date '+%m')"
SUBDIR="${YEAR}/${MONTH}"
fi
REMOTE_DIR="${WP_PATH}/wp-content/uploads/podcast/${SUBDIR}"
ssh_cmd "mkdir -p '${REMOTE_DIR}'"
scp_file "$LOCAL_FILE" "${REMOTE_DIR}/"
BASENAME="$(basename "$LOCAL_FILE")"
ssh_cmd "chown ealmeida:ealmeida '${REMOTE_DIR}/${BASENAME}'"
log_info "Audio enviado: ${BASENAME} -> ${REMOTE_DIR}/"
;;
batch)
LOCAL_DIR="$1"
BATCH_TYPE="${2:-cover}"
[[ ! -d "$LOCAL_DIR" ]] && log_error "Directorio nao encontrado: $LOCAL_DIR" && exit 1
YEAR="$(date '+%Y')"
MONTH="$(date '+%m')"
case "$BATCH_TYPE" in
cover) REMOTE_DIR="${WP_PATH}/wp-content/uploads/${YEAR}/${MONTH}" ;;
audio) REMOTE_DIR="${WP_PATH}/wp-content/uploads/podcast/${YEAR}/${MONTH}" ;;
*) log_error "Tipo batch invalido: $BATCH_TYPE" && exit 1 ;;
esac
ssh_cmd "mkdir -p '${REMOTE_DIR}'"
COUNT=0
for f in "${LOCAL_DIR}"/*.{png,jpg,mp3,wav} ; do
[[ ! -f "$f" ]] && continue
scp_file "$f" "${REMOTE_DIR}/"
COUNT=$((COUNT + 1))
done
ssh_cmd "chown -R ealmeida:ealmeida '${REMOTE_DIR}/'"
log_info "Batch ${BATCH_TYPE}: ${COUNT} ficheiros enviados para ${REMOTE_DIR}/"
;;
*)
usage
;;
esac
+62
View File
@@ -0,0 +1,62 @@
#!/usr/bin/env python3
"""Generate TTS for a single text file via Gemini API."""
import sys, wave, os
def main():
if len(sys.argv) < 3:
print("Usage: tts-single-part.py <input.txt> <output.wav>")
sys.exit(1)
input_file = sys.argv[1]
output_file = sys.argv[2]
api_key = os.environ.get("GEMINI_API_KEY")
if not api_key:
print("ERROR: GEMINI_API_KEY not set")
sys.exit(1)
from google import genai
from google.genai import types
style = (
"Lê este texto em português de Portugal (PT-PT), com um tom enérgico, "
"confiante, educativo, inspirador e profissional. Mantém o ritmo natural "
"e envolvente, como se estivesses a conversar diretamente com o ouvinte, "
"transmitindo proximidade e autoridade. Faz pequenas pausas para dar ênfase "
"às ideias-chave e assegura que cada transição entre temas é fluida. Evita "
"soar robótico ou demasiado formal; o objetivo é informar, motivar e criar "
"ligação com quem está a ouvir."
)
with open(input_file, "r") as f:
text = f.read()
words = len(text.split())
print(f"Generating TTS for {words} words...")
client = genai.Client(api_key=api_key)
response = client.models.generate_content(
model="gemini-2.5-pro-preview-tts",
contents=style + "\n\n" + text,
config=types.GenerateContentConfig(
response_modalities=["AUDIO"],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Sulafat")
)
),
),
)
data = response.candidates[0].content.parts[0].inline_data.data
with wave.open(output_file, "wb") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(24000)
wf.writeframes(data)
duration = (len(data) // 2) / 24000
print(f"OK: {duration:.0f}s -> {output_file}")
if __name__ == "__main__":
main()
+570
View File
@@ -0,0 +1,570 @@
#!/bin/bash
# =================================================================
# COMANDOS DE INSTALACAO WiP - Websites Inteligentes e Poderosos
# Versao: 3.0
# Data: 24 Fevereiro 2026
# Autor: Descomplicar - Emanuel Almeida
# =================================================================
#
# IMPORTANTE: Este script corre no servidor CWP via SSH MCP
# SEMPRE usar --allow-root (user shell e /usr/sbin/nologin no CWP)
# Path base: /home/USER/dominio.pt (user CWP != nome dominio)
#
# Mudancas v3.0:
# - Removido MainWP (descontinuado)
# - Tema hello-elementor (era astra)
# - fluentform no core (era wpforms-lite)
# - Adicionado complianz-gdpr (GDPR, core)
# - Adicionado clean-admin mu-plugin
# - --allow-root obrigatorio em todos os comandos
# - Removidos: wpvivid, bit-integrations, ai-engine, branda
# =================================================================
# Cores para output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log() { echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] $1${NC}"; }
warn() { echo -e "${YELLOW}[WARN] $1${NC}"; }
err() { echo -e "${RED}[ERROR] $1${NC}"; }
# Helper: executa wp com flags obrigatorias
wp_run() {
local wp_path=$1
shift
wp "$@" --allow-root --path="$wp_path"
}
# =================================================================
# SETUP WORDPRESS BASE
# =================================================================
setup_wordpress_base() {
local domain=$1
local wp_path=$2
local admin_user=$3
local admin_email=$4
log "Configurando WordPress base: $domain (path: $wp_path)"
# Download WordPress em PT-PT
wp_run "$wp_path" core download --locale=pt_PT --skip-content
# Configurar wp-config
wp_run "$wp_path" config create \
--dbname="${domain//./_}_db" \
--dbuser="$DB_USER" \
--dbpass="$DB_PASS" \
--dbhost="localhost"
# Instalar WordPress
wp_run "$wp_path" core install \
--url="https://$domain" \
--title="$domain" \
--admin_user="$admin_user" \
--admin_password="$(openssl rand -base64 12)" \
--admin_email="$admin_email" \
--skip-email
# Configuracoes base Portugal
wp_run "$wp_path" option update timezone_string 'Europe/Lisbon'
wp_run "$wp_path" option update date_format 'd/m/Y'
wp_run "$wp_path" option update time_format 'H:i'
wp_run "$wp_path" option update start_of_week 1
wp_run "$wp_path" option update blogdescription ''
wp_run "$wp_path" option update default_comment_status 'closed'
wp_run "$wp_path" option update comment_moderation 1
# Permalinks
wp_run "$wp_path" rewrite structure '/%postname%/' --hard
wp_run "$wp_path" rewrite flush
# Remover plugins e conteudo padrao
wp_run "$wp_path" plugin delete hello akismet 2>/dev/null || true
wp_run "$wp_path" post delete 1 2 --force 2>/dev/null || true
wp_run "$wp_path" comment delete 1 --force 2>/dev/null || true
# Tema hello-elementor (sera activado com Elementor)
wp_run "$wp_path" theme install hello-elementor --activate
log "WordPress base configurado para $domain"
}
# =================================================================
# INSTALAR CORE WiP (TODOS OS PLANOS)
# =================================================================
install_core_wip() {
local wp_path=$1
log "Instalando plugins core WiP..."
# Core Seguranca (3)
wp_run "$wp_path" plugin install \
wordfence \
wp-security-audit-log \
complianz-gdpr \
--activate
# Core Performance (4)
wp_run "$wp_path" plugin install \
wp-fastest-cache \
webp-express \
insert-headers-and-footers \
updraftplus \
--activate
# Core Design (4 — elementor-free por defeito, tema ja activado)
wp_run "$wp_path" plugin install \
elementor \
elementskit-lite \
loco-translate \
seo-by-rank-math \
--activate
# Core Marketing (3)
wp_run "$wp_path" plugin install \
fluent-crm \
fluent-smtp \
fluentform \
--activate
log "Core WiP instalado (15 plugins + tema)"
}
# =================================================================
# MU-PLUGIN: DESCOMPLICAR CLEAN ADMIN
# Remove banners promo do admin (Elementor, ElementsKit, etc.)
# =================================================================
install_clean_admin() {
local wp_path=$1
local clean_admin_src="${HOME}/.claude-work/descomplicar-clean-admin.php"
local mu_plugins_dir="${wp_path}/wp-content/mu-plugins"
log "A instalar mu-plugin descomplicar-clean-admin..."
if [ ! -f "$clean_admin_src" ]; then
warn "Ficheiro nao encontrado: $clean_admin_src"
warn "Fazer download do repositorio antes de continuar."
return 1
fi
mkdir -p "$mu_plugins_dir"
cp "$clean_admin_src" "${mu_plugins_dir}/descomplicar-clean-admin.php"
# Corrigir permissoes
local cwp_user
cwp_user=$(echo "$wp_path" | cut -d/ -f3)
chown "${cwp_user}:${cwp_user}" "${mu_plugins_dir}/descomplicar-clean-admin.php"
log "mu-plugin clean-admin instalado em $mu_plugins_dir"
}
# =================================================================
# CONFIGURAR WPFC (WP Fastest Cache)
# A configuracao via SQL e necessaria — opcoes estao como JSON string
# =================================================================
configure_wpfc() {
local wp_path=$1
local db_prefix
db_prefix=$(wp_run "$wp_path" config get table_prefix 2>/dev/null || echo "wp_")
log "Configurando WP Fastest Cache..."
local wpfc_config='{"is_mobile_theme":"","wpFastestCacheStatus":"on","wpFastestCacheSsl":"on","wpFastestCacheMobile":"on","wpFastestCacheLoggedInUsers":"on","wpFastestCacheNewPost":"on","wpFastestCacheUpdatePost":"on","wpFastestCacheByPass":"","wpFastestCacheRenderBlocking":"on","wpFastestCacheSmallImage":"","wpFastestCacheCombineCss":"on","wpFastestCacheCombineJs":"on","wpFastestCacheMinifyCss":"on","wpFastestCacheMinifyCssExclude":"","wpFastestCacheMinifyJs":"on","wpFastestCacheMinifyJsExclude":"","wpFastestCacheCDN":"","wpFastestCacheCDNexclude":"","wpFastestCacheLanguage":"","wpFastestCacheWordPress":""}'
wp_run "$wp_path" option update WpFastestCacheOptions "$wpfc_config"
log "WPFC configurado"
}
# =================================================================
# INSTALAR TRADUCOES PT-PT
# =================================================================
install_translations() {
local wp_path=$1
local translations_src="/media/ealmeida/Dados/Dev/WordPress/Traducao-Plugins-PT-PT"
log "A instalar traducoes PT-PT..."
if [ ! -d "$translations_src" ]; then
warn "Biblioteca de traducoes nao encontrada: $translations_src"
warn "Instalar traducoes manualmente via Loco Translate."
return 1
fi
local loco_dir="${wp_path}/wp-content/languages/loco/plugins"
mkdir -p "$loco_dir"
# Copiar ficheiros .po/.mo da biblioteca
local count=0
for po_file in "$translations_src"/**/*.po "$translations_src"/**/*.mo; do
[ -f "$po_file" ] || continue
cp "$po_file" "$loco_dir/"
count=$((count + 1))
done
local cwp_user
cwp_user=$(echo "$wp_path" | cut -d/ -f3)
chown -R "${cwp_user}:${cwp_user}" "$loco_dir"
log "Traducoes PT-PT instaladas ($count ficheiros)"
}
# =================================================================
# OFERTA STARTER (~22 plugins)
# =================================================================
install_starter() {
local domain=$1
local wp_path=$2
local admin_user=${3:-"admin"}
local admin_email=${4:-"admin@descomplicar.pt"}
log "=== INSTALACAO STARTER: $domain ==="
setup_wordpress_base "$domain" "$wp_path" "$admin_user" "$admin_email"
install_core_wip "$wp_path"
install_clean_admin "$wp_path"
# Plugins unicos Starter (7)
log "Instalando plugins unicos Starter..."
wp_run "$wp_path" plugin install \
bdthemes-element-pack-lite \
happy-elementor-addons \
envato-elements \
bit-integrations \
bit-social \
ai-engine \
branda-white-labeling \
--activate
configure_wpfc "$wp_path"
install_translations "$wp_path"
# Corrigir permissoes finais
local cwp_user
cwp_user=$(echo "$wp_path" | cut -d/ -f3)
chown -R "${cwp_user}:${cwp_user}" "$wp_path"
log "STARTER instalado (~22 plugins)"
site_info "$wp_path"
}
# =================================================================
# OFERTA CORPORATE (~28 plugins)
# =================================================================
install_corporate() {
local domain=$1
local wp_path=$2
local admin_user=${3:-"admin"}
local admin_email=${4:-"admin@descomplicar.pt"}
log "=== INSTALACAO CORPORATE: $domain ==="
install_starter "$domain" "$wp_path" "$admin_user" "$admin_email"
log "Upgrade para CORPORATE..."
# Elementor Free -> Pro (instalar ZIP manualmente, nao esta em wp.org)
warn "ATENCAO: Elementor Pro requer instalacao manual via ZIP."
warn "Desactivar elementor free e instalar elementor-pro via admin ou WP-CLI upload."
# wp_run "$wp_path" plugin install /caminho/elementor-pro.zip --activate
# bdthemes lite -> premium
wp_run "$wp_path" plugin deactivate bdthemes-element-pack-lite
wp_run "$wp_path" plugin delete bdthemes-element-pack-lite
warn "ATENCAO: bdthemes-element-pack (premium) requer instalacao manual via ZIP."
# Adicionais Corporate
wp_run "$wp_path" plugin install \
google-site-kit \
wp-event-solution \
premium-addons-for-elementor \
--activate
# happyfiles-pro: instalar via ZIP (premium)
warn "ATENCAO: happyfiles-pro requer instalacao manual via ZIP."
# Corrigir permissoes
local cwp_user
cwp_user=$(echo "$wp_path" | cut -d/ -f3)
chown -R "${cwp_user}:${cwp_user}" "$wp_path"
log "CORPORATE instalado (~28 plugins)"
}
# =================================================================
# OFERTA CARE (~40 plugins)
# =================================================================
install_care() {
local domain=$1
local wp_path=$2
local admin_user=${3:-"admin"}
local admin_email=${4:-"admin@descomplicar.pt"}
log "=== INSTALACAO CARE: $domain ==="
install_corporate "$domain" "$wp_path" "$admin_user" "$admin_email"
log "Adicionando funcionalidades CARE..."
# KiviCare (todos premium — instalar via ZIP)
warn "ATENCAO: KiviCare requer instalacao manual via ZIP (5 plugins)."
warn "Plugins: kivicare-clinic-management-system, kivicare-pro,"
warn "kivicare-telemed-addon, kivicare-google-meet, kivicare-webhook-addon"
# Care custom
wp_run "$wp_path" plugin install fluent-support click-to-chat-for-whatsapp --activate
warn "Instalar via ZIP: care-notificacao-whatsms-main, sinc-care, sinccare-fatura"
# E-commerce light
wp_run "$wp_path" plugin install \
woocommerce \
multibanco-ifthen-software-gateway-for-woocommerce \
contribuinte-checkout \
moloni \
--activate
# Configuracoes WooCommerce PT
wp_run "$wp_path" option update woocommerce_store_address 'Portugal'
wp_run "$wp_path" option update woocommerce_currency 'EUR'
wp_run "$wp_path" option update woocommerce_default_country 'PT'
wp_run "$wp_path" option update woocommerce_calc_taxes 'yes'
# Corrigir permissoes
local cwp_user
cwp_user=$(echo "$wp_path" | cut -d/ -f3)
chown -R "${cwp_user}:${cwp_user}" "$wp_path"
log "CARE instalado (~40 plugins)"
}
# =================================================================
# OFERTA ECOMMERCE (~38 plugins)
# =================================================================
install_ecommerce() {
local domain=$1
local wp_path=$2
local admin_user=${3:-"admin"}
local admin_email=${4:-"admin@descomplicar.pt"}
log "=== INSTALACAO ECOMMERCE: $domain ==="
install_corporate "$domain" "$wp_path" "$admin_user" "$admin_email"
log "Adicionando funcionalidades E-COMMERCE..."
# WooCommerce stack
wp_run "$wp_path" plugin install \
woocommerce \
multibanco-ifthen-software-gateway-for-woocommerce \
wholesalex \
woo-save-abandoned-carts \
money-manager \
betterdocs \
fluent-support \
click-to-chat-for-whatsapp \
--activate
# wpfunnels disponivel em wp.org (lite)
wp_run "$wp_path" plugin install wpfunnels --activate
warn "wpfunnels-pro e woocommerce-dashboard-stats: instalar via ZIP"
# Configuracoes WooCommerce PT
wp_run "$wp_path" option update woocommerce_store_address 'Portugal'
wp_run "$wp_path" option update woocommerce_currency 'EUR'
wp_run "$wp_path" option update woocommerce_default_country 'PT'
wp_run "$wp_path" option update woocommerce_calc_taxes 'yes'
wp_run "$wp_path" option update woocommerce_enable_coupons 'yes'
wp_run "$wp_path" option update woocommerce_manage_stock 'yes'
# Instalar paginas WooCommerce
wp_run "$wp_path" wc tool run install_pages
# IVA Portugal
wp_run "$wp_path" wc tax create --country=PT --rate=23 --name="IVA Normal" --class=standard
wp_run "$wp_path" wc tax create --country=PT --rate=13 --name="IVA Intermedio" --class=reduced-rate
wp_run "$wp_path" wc tax create --country=PT --rate=6 --name="IVA Reduzido" --class=zero-rate
# Corrigir permissoes
local cwp_user
cwp_user=$(echo "$wp_path" | cut -d/ -f3)
chown -R "${cwp_user}:${cwp_user}" "$wp_path"
log "ECOMMERCE instalado (~38 plugins)"
}
# =================================================================
# OFERTA CHALLENGE (~40+ plugins)
# =================================================================
install_challenge() {
local domain=$1
local wp_path=$2
local sector=${3:-"general"}
local admin_user=${4:-"admin"}
local admin_email=${5:-"admin@descomplicar.pt"}
log "=== INSTALACAO CHALLENGE: $domain (Sector: $sector) ==="
install_ecommerce "$domain" "$wp_path" "$admin_user" "$admin_email"
log "Adicionando funcionalidades CHALLENGE..."
# Migration & compliance
wp_run "$wp_path" plugin install \
product-import-export-for-woo \
wpconsent-cookies-banner-privacy-suite \
--activate
warn "fg-prestashop-to-woocommerce-premium: instalar via ZIP (plugin premium)"
# Plugins sector-especificos
case $sector in
"automotive"|"auto")
log "Configurando para sector AUTOMOVEL..."
# Plugins especificos automovel instalados conforme cliente
;;
"healthcare"|"saude")
log "Configurando para sector SAUDE..."
warn "Considerar instalar KiviCare (ver plano Care)"
;;
"education"|"educacao")
log "Configurando para sector EDUCACAO..."
wp_run "$wp_path" plugin install learnpress --activate
;;
*)
log "Challenge generico — ajustar conforme cliente"
;;
esac
# Corrigir permissoes
local cwp_user
cwp_user=$(echo "$wp_path" | cut -d/ -f3)
chown -R "${cwp_user}:${cwp_user}" "$wp_path"
log "CHALLENGE instalado (~40+ plugins, sector: $sector)"
}
# =================================================================
# INFO SITE
# =================================================================
site_info() {
local wp_path=$1
echo -e "${BLUE}=== INFORMACOES DO SITE ===${NC}"
echo -e "${BLUE}Path: $wp_path${NC}"
echo -e "${BLUE}WordPress: $(wp_run "$wp_path" core version 2>/dev/null)${NC}"
echo -e "${BLUE}Tema activo: $(wp_run "$wp_path" theme list --status=active --field=name 2>/dev/null)${NC}"
echo -e "${BLUE}Plugins activos: $(wp_run "$wp_path" plugin list --status=active --format=count 2>/dev/null)${NC}"
echo -e "${BLUE}==============================${NC}"
echo -e "${YELLOW}Plugins activos:${NC}"
wp_run "$wp_path" plugin list --status=active --format=table
}
# =================================================================
# MENU PRINCIPAL
# =================================================================
show_menu() {
echo -e "${BLUE}"
echo "=================================================="
echo " WiP - Websites Inteligentes e Poderosos"
echo " Instalador v3.0 (Fev 2026)"
echo "=================================================="
echo -e "${NC}"
echo "1) Starter - Corporativo basico (~22 plugins)"
echo "2) Corporate - Empresarial avancado (~28 plugins)"
echo "3) Care - Sector saude (~40 plugins)"
echo "4) E-commerce - Loja online completa (~38 plugins)"
echo "5) Challenge - Especializado premium (~40+ plugins)"
echo "6) Info site - Informacoes site existente"
echo "7) Sair"
echo ""
}
# =================================================================
# MAIN
# =================================================================
main() {
if ! command -v wp &> /dev/null; then
err "WP-CLI nao encontrado. Verificar instalacao."
exit 1
fi
if [ -z "$DB_USER" ] || [ -z "$DB_PASS" ]; then
err "Variaveis DB_USER e DB_PASS nao definidas."
echo "Exportar antes de correr: export DB_USER=user DB_PASS=pass"
exit 1
fi
while true; do
show_menu
read -p "Escolha [1-7]: " choice
case $choice in
1)
read -p "Dominio (ex: cliente.pt): " domain
read -p "Path completo (ex: /home/ealmeida/cliente.pt): " wp_path
read -p "Admin user [admin]: " admin_user
read -p "Admin email: " admin_email
admin_user=${admin_user:-admin}
install_starter "$domain" "$wp_path" "$admin_user" "$admin_email"
;;
2)
read -p "Dominio: " domain
read -p "Path completo: " wp_path
read -p "Admin user [admin]: " admin_user
read -p "Admin email: " admin_email
admin_user=${admin_user:-admin}
install_corporate "$domain" "$wp_path" "$admin_user" "$admin_email"
;;
3)
read -p "Dominio: " domain
read -p "Path completo: " wp_path
read -p "Admin user [admin]: " admin_user
read -p "Admin email: " admin_email
admin_user=${admin_user:-admin}
install_care "$domain" "$wp_path" "$admin_user" "$admin_email"
;;
4)
read -p "Dominio: " domain
read -p "Path completo: " wp_path
read -p "Admin user [admin]: " admin_user
read -p "Admin email: " admin_email
admin_user=${admin_user:-admin}
install_ecommerce "$domain" "$wp_path" "$admin_user" "$admin_email"
;;
5)
read -p "Dominio: " domain
read -p "Path completo: " wp_path
read -p "Sector [automotive/healthcare/education/general]: " sector
read -p "Admin user [admin]: " admin_user
read -p "Admin email: " admin_email
sector=${sector:-general}
admin_user=${admin_user:-admin}
install_challenge "$domain" "$wp_path" "$sector" "$admin_user" "$admin_email"
;;
6)
read -p "Path completo do site: " wp_path
site_info "$wp_path"
;;
7)
log "Saindo."
exit 0
;;
*)
err "Opcao invalida."
;;
esac
echo ""
read -p "Pressione Enter para continuar..."
clear
done
}
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
main "$@"
fi