feat: scripts de projectos vindos do Hub (podcast, alojadamaria, clip, ocr, etc.)
Movidos do vault Hub para centralizar scripts. Hub mantem symlinks. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Executable
+141
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Captura screenshots de alojadamaria.com para auditoria visual SEO/UX
|
||||
"""
|
||||
from playwright.sync_api import sync_playwright
|
||||
import json
|
||||
import time
|
||||
import os
|
||||
|
||||
BASE_URL = "https://alojadamaria.com/"
|
||||
OUTPUT_DIR = "/media/ealmeida/Dados/Hub/03-Propostas/ALojaDaMaria/screenshots/alojadamaria"
|
||||
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
VIEWPORTS = {
|
||||
"desktop": {"width": 1440, "height": 900},
|
||||
"mobile": {"width": 375, "height": 812},
|
||||
}
|
||||
|
||||
PAGES = {
|
||||
"homepage": BASE_URL,
|
||||
"categoria": BASE_URL + "product-category/novidades/",
|
||||
"contacto": BASE_URL + "contactos/",
|
||||
}
|
||||
|
||||
def capturar(page, url, nome, viewport):
|
||||
"""Captura acima da dobra e página completa"""
|
||||
print(f" -> A capturar: {nome} ({viewport['width']}x{viewport['height']})")
|
||||
try:
|
||||
page.goto(url, wait_until="networkidle", timeout=30000)
|
||||
time.sleep(2)
|
||||
|
||||
# Fechar pop-ups comuns (cookie consent, newsletter)
|
||||
for selector in [
|
||||
"button[class*='close']",
|
||||
"button[class*='dismiss']",
|
||||
"[class*='cookie'] button",
|
||||
"[id*='cookie'] button",
|
||||
"[class*='popup-close']",
|
||||
".pum-close",
|
||||
"button[aria-label*='Close']",
|
||||
"button[aria-label*='close']",
|
||||
]:
|
||||
try:
|
||||
el = page.query_selector(selector)
|
||||
if el and el.is_visible():
|
||||
el.click()
|
||||
time.sleep(0.5)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Above the fold (viewport apenas)
|
||||
page.screenshot(
|
||||
path=f"{OUTPUT_DIR}/{nome}_atf.png",
|
||||
full_page=False,
|
||||
clip={"x": 0, "y": 0, "width": viewport["width"], "height": viewport["height"]},
|
||||
)
|
||||
|
||||
# Página completa
|
||||
page.screenshot(
|
||||
path=f"{OUTPUT_DIR}/{nome}_full.png",
|
||||
full_page=True,
|
||||
)
|
||||
|
||||
# Recolher metadados
|
||||
title = page.title()
|
||||
h1_els = page.query_selector_all("h1")
|
||||
h1_texts = [el.inner_text().strip() for el in h1_els if el.is_visible()]
|
||||
|
||||
nav_visible = bool(page.query_selector("nav, [class*='nav'], [class*='menu']"))
|
||||
|
||||
ctas = []
|
||||
for sel in ["a[class*='btn'], a[class*='button'], button[class*='btn'], .add-to-cart, [class*='cta']"]:
|
||||
els = page.query_selector_all(sel)
|
||||
for el in els[:5]:
|
||||
try:
|
||||
if el.is_visible():
|
||||
ctas.append(el.inner_text().strip()[:50])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
popup_visible = bool(page.query_selector(".pum-overlay, [class*='popup'][style*='display: block'], [class*='modal'][style*='display: block']"))
|
||||
|
||||
# Dimensões do logo
|
||||
logo = page.query_selector("img[class*='logo'], a[class*='logo'] img, header img, .site-logo img")
|
||||
logo_info = None
|
||||
if logo:
|
||||
try:
|
||||
bb = logo.bounding_box()
|
||||
logo_info = bb
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {
|
||||
"url": url,
|
||||
"title": title,
|
||||
"h1": h1_texts,
|
||||
"nav_visible": nav_visible,
|
||||
"ctas_sample": ctas[:8],
|
||||
"popup_detected": popup_visible,
|
||||
"logo_bounding_box": logo_info,
|
||||
}
|
||||
except Exception as e:
|
||||
print(f" ERRO: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
def main():
|
||||
resultados = {}
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
|
||||
for device_name, viewport in VIEWPORTS.items():
|
||||
print(f"\n[{device_name.upper()}] {viewport['width']}x{viewport['height']}")
|
||||
context = browser.new_context(
|
||||
viewport=viewport,
|
||||
user_agent="Mozilla/5.0 (compatible; AuditBot/1.0)",
|
||||
locale="pt-PT",
|
||||
)
|
||||
page = context.new_page()
|
||||
|
||||
for page_name, url in PAGES.items():
|
||||
chave = f"{device_name}_{page_name}"
|
||||
print(f" Página: {page_name}")
|
||||
dados = capturar(page, url, chave, viewport)
|
||||
resultados[chave] = dados
|
||||
|
||||
context.close()
|
||||
|
||||
browser.close()
|
||||
|
||||
with open(f"{OUTPUT_DIR}/metadados.json", "w", encoding="utf-8") as f:
|
||||
json.dump(resultados, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print("\nCaptura concluída. Ficheiros em:", OUTPUT_DIR)
|
||||
return resultados
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Executable
+122
@@ -0,0 +1,122 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Captura detalhes adicionais: hero CTA, produto, footer, barra anúncio
|
||||
"""
|
||||
from playwright.sync_api import sync_playwright
|
||||
import time
|
||||
|
||||
OUTPUT_DIR = "/media/ealmeida/Dados/Hub/03-Propostas/ALojaDaMaria/screenshots/alojadamaria"
|
||||
BASE_URL = "https://alojadamaria.com/"
|
||||
|
||||
def crop(page, path, clip):
|
||||
page.screenshot(path=path, clip=clip, full_page=False)
|
||||
print(f" Guardado: {path}")
|
||||
|
||||
def main():
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
|
||||
# --- Desktop 1440px ---
|
||||
ctx = browser.new_context(viewport={"width": 1440, "height": 900}, locale="pt-PT")
|
||||
page = ctx.new_page()
|
||||
page.goto(BASE_URL, wait_until="networkidle", timeout=40000)
|
||||
time.sleep(2)
|
||||
|
||||
# Hero completo com CTA visível
|
||||
page.screenshot(path=f"{OUTPUT_DIR}/desktop_hero_zoom.png",
|
||||
clip={"x": 0, "y": 0, "width": 1440, "height": 600})
|
||||
print(" Hero desktop guardado")
|
||||
|
||||
# Header/nav
|
||||
page.screenshot(path=f"{OUTPUT_DIR}/desktop_header.png",
|
||||
clip={"x": 0, "y": 0, "width": 1440, "height": 80})
|
||||
print(" Header desktop guardado")
|
||||
|
||||
# Barra topo (announcement bar)
|
||||
page.screenshot(path=f"{OUTPUT_DIR}/desktop_announcebar.png",
|
||||
clip={"x": 0, "y": 0, "width": 1440, "height": 35})
|
||||
print(" Barra anúncio guardada")
|
||||
|
||||
# Produtos (scroll para secção)
|
||||
page.evaluate("window.scrollTo(0, 700)")
|
||||
time.sleep(1)
|
||||
page.screenshot(path=f"{OUTPUT_DIR}/desktop_produtos.png",
|
||||
clip={"x": 0, "y": 0, "width": 1440, "height": 900})
|
||||
print(" Produtos desktop guardados")
|
||||
|
||||
# Footer
|
||||
page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||
time.sleep(1)
|
||||
page.screenshot(path=f"{OUTPUT_DIR}/desktop_footer.png",
|
||||
clip={"x": 0, "y": 0, "width": 1440, "height": 900})
|
||||
print(" Footer desktop guardado")
|
||||
|
||||
ctx.close()
|
||||
|
||||
# --- Mobile 375px ---
|
||||
ctx_m = browser.new_context(viewport={"width": 375, "height": 812}, locale="pt-PT")
|
||||
page_m = ctx_m.new_page()
|
||||
page_m.goto(BASE_URL, wait_until="networkidle", timeout=40000)
|
||||
time.sleep(2)
|
||||
|
||||
# Header mobile
|
||||
page_m.screenshot(path=f"{OUTPUT_DIR}/mobile_header.png",
|
||||
clip={"x": 0, "y": 0, "width": 375, "height": 120})
|
||||
print(" Header mobile guardado")
|
||||
|
||||
# Hero mobile
|
||||
page_m.screenshot(path=f"{OUTPUT_DIR}/mobile_hero.png",
|
||||
clip={"x": 0, "y": 0, "width": 375, "height": 500})
|
||||
print(" Hero mobile guardado")
|
||||
|
||||
# Produtos mobile
|
||||
page_m.evaluate("window.scrollTo(0, 500)")
|
||||
time.sleep(1)
|
||||
page_m.screenshot(path=f"{OUTPUT_DIR}/mobile_produtos.png",
|
||||
clip={"x": 0, "y": 0, "width": 375, "height": 812})
|
||||
print(" Produtos mobile guardados")
|
||||
|
||||
# Footer mobile
|
||||
page_m.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||
time.sleep(1)
|
||||
page_m.screenshot(path=f"{OUTPUT_DIR}/mobile_footer.png",
|
||||
clip={"x": 0, "y": 0, "width": 375, "height": 812})
|
||||
print(" Footer mobile guardado")
|
||||
|
||||
# Tentar obter URL de produto real
|
||||
links = page_m.query_selector_all("a[href*='product']")
|
||||
product_url = None
|
||||
for l in links:
|
||||
href = l.get_attribute("href")
|
||||
if href and "product-category" not in href and "alojadamaria.com/product" in href:
|
||||
product_url = href
|
||||
break
|
||||
|
||||
if product_url:
|
||||
print(f"\n URL produto encontrado: {product_url}")
|
||||
page_m.goto(product_url, wait_until="networkidle", timeout=30000)
|
||||
time.sleep(2)
|
||||
page_m.screenshot(path=f"{OUTPUT_DIR}/mobile_produto_detalhe_atf.png",
|
||||
full_page=False)
|
||||
page_m.screenshot(path=f"{OUTPUT_DIR}/mobile_produto_detalhe_full.png",
|
||||
full_page=True)
|
||||
print(" Produto detalhe mobile guardado")
|
||||
|
||||
# Desktop produto
|
||||
ctx_d2 = browser.new_context(viewport={"width": 1440, "height": 900}, locale="pt-PT")
|
||||
page_d2 = ctx_d2.new_page()
|
||||
page_d2.goto(product_url, wait_until="networkidle", timeout=30000)
|
||||
time.sleep(2)
|
||||
page_d2.screenshot(path=f"{OUTPUT_DIR}/desktop_produto_detalhe_atf.png",
|
||||
full_page=False)
|
||||
page_d2.screenshot(path=f"{OUTPUT_DIR}/desktop_produto_detalhe_full.png",
|
||||
full_page=True)
|
||||
print(" Produto detalhe desktop guardado")
|
||||
ctx_d2.close()
|
||||
|
||||
ctx_m.close()
|
||||
browser.close()
|
||||
print("\nCapturas extra concluídas.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Executable
+246
@@ -0,0 +1,246 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script de captura e análise visual SEO para descomplicar.pt
|
||||
Analisa: capturas desktop/mobile, above-the-fold, imagens, CTAs
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
URL = "https://descomplicar.pt"
|
||||
SCREENSHOTS_DIR = "/media/ealmeida/Dados/Hub/03-Propostas/ALojaDaMaria/screenshots"
|
||||
|
||||
VIEWPORTS = {
|
||||
"desktop": {"width": 1920, "height": 1080},
|
||||
"laptop": {"width": 1366, "height": 768},
|
||||
"tablet": {"width": 768, "height": 1024},
|
||||
"mobile": {"width": 375, "height": 812},
|
||||
}
|
||||
|
||||
|
||||
def capture(url, output_path, viewport_width=1920, viewport_height=1080):
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch()
|
||||
page = browser.new_page(viewport={"width": viewport_width, "height": viewport_height})
|
||||
page.goto(url, wait_until="networkidle", timeout=30000)
|
||||
page.screenshot(path=output_path, full_page=False)
|
||||
browser.close()
|
||||
|
||||
|
||||
def analyse_page(url):
|
||||
results = {}
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch()
|
||||
|
||||
# --- Desktop 1920x1080 ---
|
||||
page = browser.new_page(viewport=VIEWPORTS["desktop"])
|
||||
page.goto(url, wait_until="networkidle", timeout=30000)
|
||||
page.screenshot(
|
||||
path=f"{SCREENSHOTS_DIR}/desktop_1920.png", full_page=False
|
||||
)
|
||||
page.screenshot(
|
||||
path=f"{SCREENSHOTS_DIR}/desktop_1920_full.png", full_page=True
|
||||
)
|
||||
|
||||
# Dados above-the-fold (desktop)
|
||||
atf = page.evaluate("""() => {
|
||||
const vw = window.innerWidth;
|
||||
const vh = window.innerHeight;
|
||||
|
||||
// H1
|
||||
const h1s = Array.from(document.querySelectorAll('h1'));
|
||||
const h1Visible = h1s.filter(el => {
|
||||
const r = el.getBoundingClientRect();
|
||||
return r.top >= 0 && r.bottom <= vh && r.width > 0;
|
||||
});
|
||||
|
||||
// CTAs (botões e links com texto de acção)
|
||||
const ctaKeywords = /contacto|falar|orçamento|começar|saber mais|ver mais|agendar|demo|serviços|get started|contact/i;
|
||||
const allBtns = Array.from(document.querySelectorAll('a, button'));
|
||||
const ctasAtf = allBtns.filter(el => {
|
||||
const r = el.getBoundingClientRect();
|
||||
return r.top >= 0 && r.bottom <= vh && r.width > 0 && ctaKeywords.test(el.textContent);
|
||||
}).map(el => ({text: el.textContent.trim().substring(0,60), tag: el.tagName, top: Math.round(el.getBoundingClientRect().top)}));
|
||||
|
||||
// Value proposition (primeiro parágrafo/subtítulo visível)
|
||||
const textEls = Array.from(document.querySelectorAll('h2, h3, p, .subtitle, .hero-text, [class*="hero"] p, [class*="tagline"]'));
|
||||
const vpEl = textEls.find(el => {
|
||||
const r = el.getBoundingClientRect();
|
||||
return r.top >= 0 && r.bottom <= vh && el.textContent.trim().length > 30;
|
||||
});
|
||||
|
||||
// Sinais de confiança (logos, testimonials, reviews)
|
||||
const trustSelectors = '[class*="client"], [class*="partner"], [class*="logo"], [class*="review"], [class*="testim"], [class*="trust"], .stars, [class*="rating"]';
|
||||
const trustEls = Array.from(document.querySelectorAll(trustSelectors));
|
||||
const trustAtf = trustEls.filter(el => {
|
||||
const r = el.getBoundingClientRect();
|
||||
return r.top >= 0 && r.bottom <= vh && r.width > 0;
|
||||
}).length;
|
||||
|
||||
return {
|
||||
viewport: {width: vw, height: vh},
|
||||
h1Count: h1s.length,
|
||||
h1Texts: h1s.map(el => ({text: el.textContent.trim().substring(0,100), visible: h1Visible.includes(el)})),
|
||||
h1AboveFold: h1Visible.length,
|
||||
ctasAboveFold: ctasAtf,
|
||||
valueProposition: vpEl ? vpEl.textContent.trim().substring(0,200) : null,
|
||||
trustSignalsAboveFold: trustAtf,
|
||||
};
|
||||
}""")
|
||||
|
||||
# Análise de imagens
|
||||
images = page.evaluate("""() => {
|
||||
return Array.from(document.querySelectorAll('img')).map(img => ({
|
||||
src: img.src.substring(0, 120),
|
||||
alt: img.alt,
|
||||
hasAlt: img.alt.trim().length > 0,
|
||||
loading: img.loading,
|
||||
width: img.width,
|
||||
height: img.height,
|
||||
hasWidthAttr: img.hasAttribute('width'),
|
||||
hasHeightAttr: img.hasAttribute('height'),
|
||||
isWebP: img.src.includes('.webp'),
|
||||
isAvif: img.src.includes('.avif'),
|
||||
naturalWidth: img.naturalWidth,
|
||||
naturalHeight: img.naturalHeight,
|
||||
rect: (() => { const r = img.getBoundingClientRect(); return {top: Math.round(r.top), visible: r.width > 0}; })()
|
||||
}));
|
||||
}""")
|
||||
|
||||
# Dados de meta SEO
|
||||
meta_seo = page.evaluate("""() => {
|
||||
const getMeta = (name) => {
|
||||
const el = document.querySelector(`meta[name="${name}"], meta[property="${name}"]`);
|
||||
return el ? el.getAttribute('content') : null;
|
||||
};
|
||||
return {
|
||||
title: document.title,
|
||||
metaDescription: getMeta('description'),
|
||||
ogTitle: getMeta('og:title'),
|
||||
ogDescription: getMeta('og:description'),
|
||||
ogImage: getMeta('og:image'),
|
||||
canonical: (() => { const l = document.querySelector('link[rel="canonical"]'); return l ? l.href : null; })(),
|
||||
lang: document.documentElement.lang,
|
||||
h2Count: document.querySelectorAll('h2').length,
|
||||
h3Count: document.querySelectorAll('h3').length,
|
||||
};
|
||||
}""")
|
||||
|
||||
# Desempenho básico (recursos)
|
||||
perf = page.evaluate("""() => {
|
||||
const entries = performance.getEntriesByType('resource');
|
||||
const imgs = entries.filter(e => e.initiatorType === 'img');
|
||||
const scripts = entries.filter(e => e.initiatorType === 'script');
|
||||
const styles = entries.filter(e => e.initiatorType === 'link' || e.initiatorType === 'css');
|
||||
return {
|
||||
totalResources: entries.length,
|
||||
imgCount: imgs.length,
|
||||
scriptCount: scripts.length,
|
||||
styleCount: styles.length,
|
||||
};
|
||||
}""")
|
||||
|
||||
results["desktop_atf"] = atf
|
||||
results["images"] = images
|
||||
results["meta_seo"] = meta_seo
|
||||
results["perf"] = perf
|
||||
|
||||
# --- Mobile 375x812 ---
|
||||
mobile_page = browser.new_page(
|
||||
viewport=VIEWPORTS["mobile"],
|
||||
user_agent="Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1"
|
||||
)
|
||||
mobile_page.goto(url, wait_until="networkidle", timeout=30000)
|
||||
mobile_page.screenshot(
|
||||
path=f"{SCREENSHOTS_DIR}/mobile_375.png", full_page=False
|
||||
)
|
||||
mobile_page.screenshot(
|
||||
path=f"{SCREENSHOTS_DIR}/mobile_375_full.png", full_page=True
|
||||
)
|
||||
|
||||
mobile_checks = mobile_page.evaluate("""() => {
|
||||
const vw = window.innerWidth;
|
||||
const vh = window.innerHeight;
|
||||
const docWidth = document.documentElement.scrollWidth;
|
||||
|
||||
// Verificar overflow horizontal
|
||||
const hasHorizontalScroll = docWidth > vw;
|
||||
|
||||
// Navegação móvel
|
||||
const nav = document.querySelector('nav, [class*="nav"], [class*="menu"], header');
|
||||
const navVisible = nav ? nav.getBoundingClientRect().width > 0 : false;
|
||||
const hamburger = document.querySelector('[class*="hamburger"], [class*="toggle"], [class*="burger"], .menu-icon, [aria-label*="menu"], [aria-label*="Menu"]');
|
||||
|
||||
// Tamanho dos tap targets (mínimo 48x48px)
|
||||
const allTapTargets = Array.from(document.querySelectorAll('a, button, input, select, textarea'));
|
||||
const smallTargets = allTapTargets.filter(el => {
|
||||
const r = el.getBoundingClientRect();
|
||||
return r.width > 0 && r.height > 0 && (r.width < 44 || r.height < 44);
|
||||
}).slice(0, 10).map(el => ({
|
||||
tag: el.tagName,
|
||||
text: el.textContent.trim().substring(0, 40),
|
||||
w: Math.round(el.getBoundingClientRect().width),
|
||||
h: Math.round(el.getBoundingClientRect().height)
|
||||
}));
|
||||
|
||||
// Tamanho de fonte base
|
||||
const bodyFontSize = parseFloat(window.getComputedStyle(document.body).fontSize);
|
||||
|
||||
// H1 visível no mobile
|
||||
const h1s = Array.from(document.querySelectorAll('h1'));
|
||||
const h1MobileVisible = h1s.filter(el => {
|
||||
const r = el.getBoundingClientRect();
|
||||
return r.top >= 0 && r.bottom <= vh && r.width > 0;
|
||||
});
|
||||
|
||||
// CTAs mobile
|
||||
const ctaKeywords = /contacto|falar|orçamento|começar|saber mais|ver mais|agendar|demo|serviços/i;
|
||||
const ctasMobile = Array.from(document.querySelectorAll('a, button')).filter(el => {
|
||||
const r = el.getBoundingClientRect();
|
||||
return r.top >= 0 && r.bottom <= vh && r.width > 0 && ctaKeywords.test(el.textContent);
|
||||
}).map(el => ({text: el.textContent.trim().substring(0,50), w: Math.round(el.getBoundingClientRect().width), h: Math.round(el.getBoundingClientRect().height)}));
|
||||
|
||||
return {
|
||||
viewport: {width: vw, height: vh},
|
||||
documentWidth: docWidth,
|
||||
hasHorizontalScroll,
|
||||
navVisible,
|
||||
hasHamburger: !!hamburger,
|
||||
hamburgerClass: hamburger ? hamburger.className.substring(0,60) : null,
|
||||
smallTapTargets: smallTargets,
|
||||
smallTapTargetCount: smallTargets.length,
|
||||
bodyFontSize,
|
||||
h1AboveFoldMobile: h1MobileVisible.length,
|
||||
h1TextMobile: h1MobileVisible[0] ? h1MobileVisible[0].textContent.trim().substring(0,100) : null,
|
||||
ctasMobileAtf: ctasMobile,
|
||||
};
|
||||
}""")
|
||||
|
||||
results["mobile"] = mobile_checks
|
||||
|
||||
# --- Laptop 1366x768 ---
|
||||
laptop_page = browser.new_page(viewport=VIEWPORTS["laptop"])
|
||||
laptop_page.goto(url, wait_until="networkidle", timeout=30000)
|
||||
laptop_page.screenshot(
|
||||
path=f"{SCREENSHOTS_DIR}/laptop_1366.png", full_page=False
|
||||
)
|
||||
|
||||
browser.close()
|
||||
|
||||
return results
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("A capturar screenshots e analisar descomplicar.pt...")
|
||||
data = analyse_page(URL)
|
||||
|
||||
output_file = f"{SCREENSHOTS_DIR}/analysis_data.json"
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"Análise concluída. Dados guardados em: {output_file}")
|
||||
print(f"Screenshots em: {SCREENSHOTS_DIR}/")
|
||||
print("\n--- RESUMO ---")
|
||||
print(json.dumps(data, ensure_ascii=False, indent=2))
|
||||
Executable
+11
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
PERFEX_URL="https://desk.descomplicar.pt"
|
||||
PERFEX_API_KEY="eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyIjoibWNwIiwibmFtZSI6Im1jcCIsIkFQSV9USU1FIjoxNzQxOTY1MDQ3fQ.hNv_dMzijjbNTI9-wVxsHXUm-K8ckGN5v4f9Kgk-dPc"
|
||||
CLAUDE_LOG_DIR="/home/ealmeida/.logs/claude-agent"
|
||||
TASK_JSON=$1
|
||||
TASK_ID=$(echo $TASK_JSON | jq -r ".task_id // .id")
|
||||
TASK_NAME=$(echo $TASK_JSON | jq -r ".task_name // .name")
|
||||
LOG_FILE="$CLAUDE_LOG_DIR/task-$TASK_ID-$(date +%Y%m%d).log"
|
||||
echo "[$(date)] INÍCIO — Tarefa #$TASK_ID: $TASK_NAME" >> "$LOG_FILE"
|
||||
curl -s -X PUT -H "authtoken: $PERFEX_API_KEY" -d "status=4" "$PERFEX_URL/api/v1/tasks/$TASK_ID" >> "$LOG_FILE" 2>&1
|
||||
claude -p "És o AIkTop. Resolve a tarefa #$TASK_ID. No final, usa MCP para marcar status 5." --allowedTools Read,Edit,Write,Bash,Command --max-turns 20 --yes >> "$LOG_FILE" 2>&1
|
||||
Executable
+129
@@ -0,0 +1,129 @@
|
||||
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime
|
||||
import os
|
||||
|
||||
# Helper to get issues from MCP tool output, handling truncation info
|
||||
def parse_mcp_output(mcp_output_string):
|
||||
json_start_index = mcp_output_string.find("{")
|
||||
if json_start_index == -1:
|
||||
return None, "Error: No JSON content found in tool output."
|
||||
|
||||
clean_content = mcp_output_string[json_start_index:]
|
||||
|
||||
try:
|
||||
parsed_content = json.loads(clean_content)
|
||||
if "Result" in parsed_content:
|
||||
return parsed_content["Result"], None
|
||||
elif isinstance(parsed_content, list):
|
||||
return parsed_content, None
|
||||
else:
|
||||
return None, f"Error: Unexpected JSON structure after cleaning: {clean_content[:200]}..."
|
||||
except json.JSONDecodeError as e:
|
||||
return None, f"Error: Could not parse JSON content after cleaning: {e} - {clean_content[:200]}..."
|
||||
|
||||
# Function to fetch all issues using pagination and save them to a file
|
||||
def fetch_all_issues_and_save(owner, repo, state, file_path, page_size=100):
|
||||
all_issues = []
|
||||
page = 1
|
||||
while True:
|
||||
# Simulate calling mcp_gitea_list_repo_issues
|
||||
# In a real scenario, this would be a direct call to the MCP tool
|
||||
# For this script, we assume this function will be called with a placeholder for the actual MCP tool output
|
||||
# since direct MCP tool calls are not possible within this embedded script context.
|
||||
# This function needs to be invoked in a way that allows external MCP calls.
|
||||
|
||||
# --- THIS PART NEEDS TO BE EXECUTED OUTSIDE THIS SCRIPT OR BY A TOOL THAT CAN CALL MCP ---
|
||||
# For now, this script will only process an already existing file.
|
||||
# The external loop will call mcp_gitea_list_repo_issues and write the combined output to file_path
|
||||
print(f"DEBUG: Placeholder for fetching page {page} from {owner}/{repo}")
|
||||
break # Break as we cannot truly paginate from within this isolated script
|
||||
|
||||
# This part assumes file_path already contains the FULL JSON from all pages
|
||||
if not os.path.exists(file_path):
|
||||
print(f"Error: Issue data file not found at {file_path}. Please ensure it is created with full data.")
|
||||
return
|
||||
|
||||
with open(file_path, "r") as f:
|
||||
full_issues_content = f.read()
|
||||
|
||||
issues_data, error = parse_mcp_output(full_issues_content)
|
||||
if error:
|
||||
print(error)
|
||||
return
|
||||
|
||||
return issues_data
|
||||
|
||||
def find_first_unhandled_original_issue(issues_data):
|
||||
if not issues_data:
|
||||
return None
|
||||
|
||||
delegation_prefixes = [
|
||||
"[Dir. Automação]",
|
||||
"[Dir. Desenvolvimento]",
|
||||
"[Dir. Infraestrutura]",
|
||||
"[COO]",
|
||||
"[Improvement Evaluator]"
|
||||
]
|
||||
|
||||
issues_data.sort(key=lambda x: datetime.strptime(x["created_at"], "%Y-%m-%dT%H:%M:%SZ")) # Sort by creation date in ascending order
|
||||
|
||||
for issue in issues_data:
|
||||
if issue["state"] == "closed":
|
||||
continue
|
||||
|
||||
is_delegated_by_prefix = False
|
||||
for prefix in delegation_prefixes:
|
||||
if issue["title"].startswith(prefix):
|
||||
is_delegated_by_prefix = True
|
||||
break
|
||||
if is_delegated_by_prefix:
|
||||
continue
|
||||
|
||||
# Check for delegation comments or if comments exist for n8n workflow issues (implying delegation)
|
||||
# This check is a simplification and might need to fetch comments for accurate check
|
||||
if issue["comments"] > 0 and ("Tarefa delegada ao Dir." in issue["body"] or "n8n Workflow" in issue["title"]):
|
||||
continue
|
||||
|
||||
# If we reach here, it's an open, non-delegated, original issue
|
||||
return {
|
||||
"number": issue["number"],
|
||||
"title": issue["title"],
|
||||
"body": issue["body"]
|
||||
}
|
||||
|
||||
return None # No unhandled original issues found.
|
||||
|
||||
|
||||
# Main execution flow
|
||||
temp_file_path = "open_issues.json"
|
||||
owner = "ealmeida"
|
||||
repo = "mcp-paperclip"
|
||||
state = "open"
|
||||
|
||||
# This part needs to be handled externally to call MCP tools iteratively
|
||||
# For now, let's just process the existing open_issues.json
|
||||
# issues_data = fetch_all_issues_and_save(owner, repo, state, temp_file_path)
|
||||
# Instead, read the pre-existing full JSON data
|
||||
if not os.path.exists(temp_file_path):
|
||||
print(f"Error: Issue data file not found at {temp_file_path}. Please create it manually with full data.")
|
||||
exit(1)
|
||||
|
||||
with open(temp_file_path, "r") as f:
|
||||
full_issues_content = f.read()
|
||||
|
||||
issues_data, error = parse_mcp_output(full_issues_content)
|
||||
if error:
|
||||
print(error)
|
||||
exit(1)
|
||||
|
||||
unhandled_issue = find_first_unhandled_original_issue(issues_data)
|
||||
|
||||
if unhandled_issue:
|
||||
print(f"Oldest unhandled original issue found:")
|
||||
print(f"Issue Number: {unhandled_issue["number"]}")
|
||||
print(f"Issue Title: {unhandled_issue["title"]}")
|
||||
print(f"Issue Body: {unhandled_issue["body"]}")
|
||||
else:
|
||||
print("No unhandled original issues found in the provided data.")
|
||||
Executable
+4
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
echo \"$(date): Validating instructionsFilePath...\"
|
||||
PGPASSWORD=paperclip psql -h localhost -p 54329 -U paperclip -d paperclip -c \"SELECT name, COALESCE(adapter_config->>'instructionsFilePath', 'none') as path, status FROM agents WHERE adapter_config ? 'instructionsFilePath' ORDER BY name;\" | while IFS='|' read name path status; do name=\$(echo $name | xargs); path=\$(echo $path | xargs); if [[ \"$path\" != 'none' ]] && [ -f \"$path\" ]; then echo \"OK: $name ($status) -> $path\"; else echo \"MISSING: $name ($status) -> $path\"; fi; done
|
||||
echo \"---\"
|
||||
Executable
+151
@@ -0,0 +1,151 @@
|
||||
"""Lightweight connection handling for MCP servers."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from contextlib import AsyncExitStack
|
||||
from typing import Any
|
||||
|
||||
from mcp import ClientSession, StdioServerParameters
|
||||
from mcp.client.sse import sse_client
|
||||
from mcp.client.stdio import stdio_client
|
||||
from mcp.client.streamable_http import streamablehttp_client
|
||||
|
||||
|
||||
class MCPConnection(ABC):
|
||||
"""Base class for MCP server connections."""
|
||||
|
||||
def __init__(self):
|
||||
self.session = None
|
||||
self._stack = None
|
||||
|
||||
@abstractmethod
|
||||
def _create_context(self):
|
||||
"""Create the connection context based on connection type."""
|
||||
|
||||
async def __aenter__(self):
|
||||
"""Initialize MCP server connection."""
|
||||
self._stack = AsyncExitStack()
|
||||
await self._stack.__aenter__()
|
||||
|
||||
try:
|
||||
ctx = self._create_context()
|
||||
result = await self._stack.enter_async_context(ctx)
|
||||
|
||||
if len(result) == 2:
|
||||
read, write = result
|
||||
elif len(result) == 3:
|
||||
read, write, _ = result
|
||||
else:
|
||||
raise ValueError(f"Unexpected context result: {result}")
|
||||
|
||||
session_ctx = ClientSession(read, write)
|
||||
self.session = await self._stack.enter_async_context(session_ctx)
|
||||
await self.session.initialize()
|
||||
return self
|
||||
except BaseException:
|
||||
await self._stack.__aexit__(None, None, None)
|
||||
raise
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Clean up MCP server connection resources."""
|
||||
if self._stack:
|
||||
await self._stack.__aexit__(exc_type, exc_val, exc_tb)
|
||||
self.session = None
|
||||
self._stack = None
|
||||
|
||||
async def list_tools(self) -> list[dict[str, Any]]:
|
||||
"""Retrieve available tools from the MCP server."""
|
||||
response = await self.session.list_tools()
|
||||
return [
|
||||
{
|
||||
"name": tool.name,
|
||||
"description": tool.description,
|
||||
"input_schema": tool.inputSchema,
|
||||
}
|
||||
for tool in response.tools
|
||||
]
|
||||
|
||||
async def call_tool(self, tool_name: str, arguments: dict[str, Any]) -> Any:
|
||||
"""Call a tool on the MCP server with provided arguments."""
|
||||
result = await self.session.call_tool(tool_name, arguments=arguments)
|
||||
return result.content
|
||||
|
||||
|
||||
class MCPConnectionStdio(MCPConnection):
|
||||
"""MCP connection using standard input/output."""
|
||||
|
||||
def __init__(self, command: str, args: list[str] = None, env: dict[str, str] = None):
|
||||
super().__init__()
|
||||
self.command = command
|
||||
self.args = args or []
|
||||
self.env = env
|
||||
|
||||
def _create_context(self):
|
||||
return stdio_client(
|
||||
StdioServerParameters(command=self.command, args=self.args, env=self.env)
|
||||
)
|
||||
|
||||
|
||||
class MCPConnectionSSE(MCPConnection):
|
||||
"""MCP connection using Server-Sent Events."""
|
||||
|
||||
def __init__(self, url: str, headers: dict[str, str] = None):
|
||||
super().__init__()
|
||||
self.url = url
|
||||
self.headers = headers or {}
|
||||
|
||||
def _create_context(self):
|
||||
return sse_client(url=self.url, headers=self.headers)
|
||||
|
||||
|
||||
class MCPConnectionHTTP(MCPConnection):
|
||||
"""MCP connection using Streamable HTTP."""
|
||||
|
||||
def __init__(self, url: str, headers: dict[str, str] = None):
|
||||
super().__init__()
|
||||
self.url = url
|
||||
self.headers = headers or {}
|
||||
|
||||
def _create_context(self):
|
||||
return streamablehttp_client(url=self.url, headers=self.headers)
|
||||
|
||||
|
||||
def create_connection(
|
||||
transport: str,
|
||||
command: str = None,
|
||||
args: list[str] = None,
|
||||
env: dict[str, str] = None,
|
||||
url: str = None,
|
||||
headers: dict[str, str] = None,
|
||||
) -> MCPConnection:
|
||||
"""Factory function to create the appropriate MCP connection.
|
||||
|
||||
Args:
|
||||
transport: Connection type ("stdio", "sse", or "http")
|
||||
command: Command to run (stdio only)
|
||||
args: Command arguments (stdio only)
|
||||
env: Environment variables (stdio only)
|
||||
url: Server URL (sse and http only)
|
||||
headers: HTTP headers (sse and http only)
|
||||
|
||||
Returns:
|
||||
MCPConnection instance
|
||||
"""
|
||||
transport = transport.lower()
|
||||
|
||||
if transport == "stdio":
|
||||
if not command:
|
||||
raise ValueError("Command is required for stdio transport")
|
||||
return MCPConnectionStdio(command=command, args=args, env=env)
|
||||
|
||||
elif transport == "sse":
|
||||
if not url:
|
||||
raise ValueError("URL is required for sse transport")
|
||||
return MCPConnectionSSE(url=url, headers=headers)
|
||||
|
||||
elif transport in ["http", "streamable_http", "streamable-http"]:
|
||||
if not url:
|
||||
raise ValueError("URL is required for http transport")
|
||||
return MCPConnectionHTTP(url=url, headers=headers)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unsupported transport type: {transport}. Use 'stdio', 'sse', or 'http'")
|
||||
Executable
+373
@@ -0,0 +1,373 @@
|
||||
"""MCP Server Evaluation Harness
|
||||
|
||||
This script evaluates MCP servers by running test questions against them using Claude.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from anthropic import Anthropic
|
||||
|
||||
from connections import create_connection
|
||||
|
||||
EVALUATION_PROMPT = """You are an AI assistant with access to tools.
|
||||
|
||||
When given a task, you MUST:
|
||||
1. Use the available tools to complete the task
|
||||
2. Provide summary of each step in your approach, wrapped in <summary> tags
|
||||
3. Provide feedback on the tools provided, wrapped in <feedback> tags
|
||||
4. Provide your final response, wrapped in <response> tags
|
||||
|
||||
Summary Requirements:
|
||||
- In your <summary> tags, you must explain:
|
||||
- The steps you took to complete the task
|
||||
- Which tools you used, in what order, and why
|
||||
- The inputs you provided to each tool
|
||||
- The outputs you received from each tool
|
||||
- A summary for how you arrived at the response
|
||||
|
||||
Feedback Requirements:
|
||||
- In your <feedback> tags, provide constructive feedback on the tools:
|
||||
- Comment on tool names: Are they clear and descriptive?
|
||||
- Comment on input parameters: Are they well-documented? Are required vs optional parameters clear?
|
||||
- Comment on descriptions: Do they accurately describe what the tool does?
|
||||
- Comment on any errors encountered during tool usage: Did the tool fail to execute? Did the tool return too many tokens?
|
||||
- Identify specific areas for improvement and explain WHY they would help
|
||||
- Be specific and actionable in your suggestions
|
||||
|
||||
Response Requirements:
|
||||
- Your response should be concise and directly address what was asked
|
||||
- Always wrap your final response in <response> tags
|
||||
- If you cannot solve the task return <response>NOT_FOUND</response>
|
||||
- For numeric responses, provide just the number
|
||||
- For IDs, provide just the ID
|
||||
- For names or text, provide the exact text requested
|
||||
- Your response should go last"""
|
||||
|
||||
|
||||
def parse_evaluation_file(file_path: Path) -> list[dict[str, Any]]:
|
||||
"""Parse XML evaluation file with qa_pair elements."""
|
||||
try:
|
||||
tree = ET.parse(file_path)
|
||||
root = tree.getroot()
|
||||
evaluations = []
|
||||
|
||||
for qa_pair in root.findall(".//qa_pair"):
|
||||
question_elem = qa_pair.find("question")
|
||||
answer_elem = qa_pair.find("answer")
|
||||
|
||||
if question_elem is not None and answer_elem is not None:
|
||||
evaluations.append({
|
||||
"question": (question_elem.text or "").strip(),
|
||||
"answer": (answer_elem.text or "").strip(),
|
||||
})
|
||||
|
||||
return evaluations
|
||||
except Exception as e:
|
||||
print(f"Error parsing evaluation file {file_path}: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def extract_xml_content(text: str, tag: str) -> str | None:
|
||||
"""Extract content from XML tags."""
|
||||
pattern = rf"<{tag}>(.*?)</{tag}>"
|
||||
matches = re.findall(pattern, text, re.DOTALL)
|
||||
return matches[-1].strip() if matches else None
|
||||
|
||||
|
||||
async def agent_loop(
|
||||
client: Anthropic,
|
||||
model: str,
|
||||
question: str,
|
||||
tools: list[dict[str, Any]],
|
||||
connection: Any,
|
||||
) -> tuple[str, dict[str, Any]]:
|
||||
"""Run the agent loop with MCP tools."""
|
||||
messages = [{"role": "user", "content": question}]
|
||||
|
||||
response = await asyncio.to_thread(
|
||||
client.messages.create,
|
||||
model=model,
|
||||
max_tokens=4096,
|
||||
system=EVALUATION_PROMPT,
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
)
|
||||
|
||||
messages.append({"role": "assistant", "content": response.content})
|
||||
|
||||
tool_metrics = {}
|
||||
|
||||
while response.stop_reason == "tool_use":
|
||||
tool_use = next(block for block in response.content if block.type == "tool_use")
|
||||
tool_name = tool_use.name
|
||||
tool_input = tool_use.input
|
||||
|
||||
tool_start_ts = time.time()
|
||||
try:
|
||||
tool_result = await connection.call_tool(tool_name, tool_input)
|
||||
tool_response = json.dumps(tool_result) if isinstance(tool_result, (dict, list)) else str(tool_result)
|
||||
except Exception as e:
|
||||
tool_response = f"Error executing tool {tool_name}: {str(e)}\n"
|
||||
tool_response += traceback.format_exc()
|
||||
tool_duration = time.time() - tool_start_ts
|
||||
|
||||
if tool_name not in tool_metrics:
|
||||
tool_metrics[tool_name] = {"count": 0, "durations": []}
|
||||
tool_metrics[tool_name]["count"] += 1
|
||||
tool_metrics[tool_name]["durations"].append(tool_duration)
|
||||
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": [{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": tool_use.id,
|
||||
"content": tool_response,
|
||||
}]
|
||||
})
|
||||
|
||||
response = await asyncio.to_thread(
|
||||
client.messages.create,
|
||||
model=model,
|
||||
max_tokens=4096,
|
||||
system=EVALUATION_PROMPT,
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
)
|
||||
messages.append({"role": "assistant", "content": response.content})
|
||||
|
||||
response_text = next(
|
||||
(block.text for block in response.content if hasattr(block, "text")),
|
||||
None,
|
||||
)
|
||||
return response_text, tool_metrics
|
||||
|
||||
|
||||
async def evaluate_single_task(
|
||||
client: Anthropic,
|
||||
model: str,
|
||||
qa_pair: dict[str, Any],
|
||||
tools: list[dict[str, Any]],
|
||||
connection: Any,
|
||||
task_index: int,
|
||||
) -> dict[str, Any]:
|
||||
"""Evaluate a single QA pair with the given tools."""
|
||||
start_time = time.time()
|
||||
|
||||
print(f"Task {task_index + 1}: Running task with question: {qa_pair['question']}")
|
||||
response, tool_metrics = await agent_loop(client, model, qa_pair["question"], tools, connection)
|
||||
|
||||
response_value = extract_xml_content(response, "response")
|
||||
summary = extract_xml_content(response, "summary")
|
||||
feedback = extract_xml_content(response, "feedback")
|
||||
|
||||
duration_seconds = time.time() - start_time
|
||||
|
||||
return {
|
||||
"question": qa_pair["question"],
|
||||
"expected": qa_pair["answer"],
|
||||
"actual": response_value,
|
||||
"score": int(response_value == qa_pair["answer"]) if response_value else 0,
|
||||
"total_duration": duration_seconds,
|
||||
"tool_calls": tool_metrics,
|
||||
"num_tool_calls": sum(len(metrics["durations"]) for metrics in tool_metrics.values()),
|
||||
"summary": summary,
|
||||
"feedback": feedback,
|
||||
}
|
||||
|
||||
|
||||
REPORT_HEADER = """
|
||||
# Evaluation Report
|
||||
|
||||
## Summary
|
||||
|
||||
- **Accuracy**: {correct}/{total} ({accuracy:.1f}%)
|
||||
- **Average Task Duration**: {average_duration_s:.2f}s
|
||||
- **Average Tool Calls per Task**: {average_tool_calls:.2f}
|
||||
- **Total Tool Calls**: {total_tool_calls}
|
||||
|
||||
---
|
||||
"""
|
||||
|
||||
TASK_TEMPLATE = """
|
||||
### Task {task_num}
|
||||
|
||||
**Question**: {question}
|
||||
**Ground Truth Answer**: `{expected_answer}`
|
||||
**Actual Answer**: `{actual_answer}`
|
||||
**Correct**: {correct_indicator}
|
||||
**Duration**: {total_duration:.2f}s
|
||||
**Tool Calls**: {tool_calls}
|
||||
|
||||
**Summary**
|
||||
{summary}
|
||||
|
||||
**Feedback**
|
||||
{feedback}
|
||||
|
||||
---
|
||||
"""
|
||||
|
||||
|
||||
async def run_evaluation(
|
||||
eval_path: Path,
|
||||
connection: Any,
|
||||
model: str = "claude-3-7-sonnet-20250219",
|
||||
) -> str:
|
||||
"""Run evaluation with MCP server tools."""
|
||||
print("🚀 Starting Evaluation")
|
||||
|
||||
client = Anthropic()
|
||||
|
||||
tools = await connection.list_tools()
|
||||
print(f"📋 Loaded {len(tools)} tools from MCP server")
|
||||
|
||||
qa_pairs = parse_evaluation_file(eval_path)
|
||||
print(f"📋 Loaded {len(qa_pairs)} evaluation tasks")
|
||||
|
||||
results = []
|
||||
for i, qa_pair in enumerate(qa_pairs):
|
||||
print(f"Processing task {i + 1}/{len(qa_pairs)}")
|
||||
result = await evaluate_single_task(client, model, qa_pair, tools, connection, i)
|
||||
results.append(result)
|
||||
|
||||
correct = sum(r["score"] for r in results)
|
||||
accuracy = (correct / len(results)) * 100 if results else 0
|
||||
average_duration_s = sum(r["total_duration"] for r in results) / len(results) if results else 0
|
||||
average_tool_calls = sum(r["num_tool_calls"] for r in results) / len(results) if results else 0
|
||||
total_tool_calls = sum(r["num_tool_calls"] for r in results)
|
||||
|
||||
report = REPORT_HEADER.format(
|
||||
correct=correct,
|
||||
total=len(results),
|
||||
accuracy=accuracy,
|
||||
average_duration_s=average_duration_s,
|
||||
average_tool_calls=average_tool_calls,
|
||||
total_tool_calls=total_tool_calls,
|
||||
)
|
||||
|
||||
report += "".join([
|
||||
TASK_TEMPLATE.format(
|
||||
task_num=i + 1,
|
||||
question=qa_pair["question"],
|
||||
expected_answer=qa_pair["answer"],
|
||||
actual_answer=result["actual"] or "N/A",
|
||||
correct_indicator="✅" if result["score"] else "❌",
|
||||
total_duration=result["total_duration"],
|
||||
tool_calls=json.dumps(result["tool_calls"], indent=2),
|
||||
summary=result["summary"] or "N/A",
|
||||
feedback=result["feedback"] or "N/A",
|
||||
)
|
||||
for i, (qa_pair, result) in enumerate(zip(qa_pairs, results))
|
||||
])
|
||||
|
||||
return report
|
||||
|
||||
|
||||
def parse_headers(header_list: list[str]) -> dict[str, str]:
|
||||
"""Parse header strings in format 'Key: Value' into a dictionary."""
|
||||
headers = {}
|
||||
if not header_list:
|
||||
return headers
|
||||
|
||||
for header in header_list:
|
||||
if ":" in header:
|
||||
key, value = header.split(":", 1)
|
||||
headers[key.strip()] = value.strip()
|
||||
else:
|
||||
print(f"Warning: Ignoring malformed header: {header}")
|
||||
return headers
|
||||
|
||||
|
||||
def parse_env_vars(env_list: list[str]) -> dict[str, str]:
|
||||
"""Parse environment variable strings in format 'KEY=VALUE' into a dictionary."""
|
||||
env = {}
|
||||
if not env_list:
|
||||
return env
|
||||
|
||||
for env_var in env_list:
|
||||
if "=" in env_var:
|
||||
key, value = env_var.split("=", 1)
|
||||
env[key.strip()] = value.strip()
|
||||
else:
|
||||
print(f"Warning: Ignoring malformed environment variable: {env_var}")
|
||||
return env
|
||||
|
||||
|
||||
async def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Evaluate MCP servers using test questions",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Evaluate a local stdio MCP server
|
||||
python evaluation.py -t stdio -c python -a my_server.py eval.xml
|
||||
|
||||
# Evaluate an SSE MCP server
|
||||
python evaluation.py -t sse -u https://example.com/mcp -H "Authorization: Bearer token" eval.xml
|
||||
|
||||
# Evaluate an HTTP MCP server with custom model
|
||||
python evaluation.py -t http -u https://example.com/mcp -m claude-3-5-sonnet-20241022 eval.xml
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument("eval_file", type=Path, help="Path to evaluation XML file")
|
||||
parser.add_argument("-t", "--transport", choices=["stdio", "sse", "http"], default="stdio", help="Transport type (default: stdio)")
|
||||
parser.add_argument("-m", "--model", default="claude-3-7-sonnet-20250219", help="Claude model to use (default: claude-3-7-sonnet-20250219)")
|
||||
|
||||
stdio_group = parser.add_argument_group("stdio options")
|
||||
stdio_group.add_argument("-c", "--command", help="Command to run MCP server (stdio only)")
|
||||
stdio_group.add_argument("-a", "--args", nargs="+", help="Arguments for the command (stdio only)")
|
||||
stdio_group.add_argument("-e", "--env", nargs="+", help="Environment variables in KEY=VALUE format (stdio only)")
|
||||
|
||||
remote_group = parser.add_argument_group("sse/http options")
|
||||
remote_group.add_argument("-u", "--url", help="MCP server URL (sse/http only)")
|
||||
remote_group.add_argument("-H", "--header", nargs="+", dest="headers", help="HTTP headers in 'Key: Value' format (sse/http only)")
|
||||
|
||||
parser.add_argument("-o", "--output", type=Path, help="Output file for evaluation report (default: stdout)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.eval_file.exists():
|
||||
print(f"Error: Evaluation file not found: {args.eval_file}")
|
||||
sys.exit(1)
|
||||
|
||||
headers = parse_headers(args.headers) if args.headers else None
|
||||
env_vars = parse_env_vars(args.env) if args.env else None
|
||||
|
||||
try:
|
||||
connection = create_connection(
|
||||
transport=args.transport,
|
||||
command=args.command,
|
||||
args=args.args,
|
||||
env=env_vars,
|
||||
url=args.url,
|
||||
headers=headers,
|
||||
)
|
||||
except ValueError as e:
|
||||
print(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"🔗 Connecting to MCP server via {args.transport}...")
|
||||
|
||||
async with connection:
|
||||
print("✅ Connected successfully")
|
||||
report = await run_evaluation(args.eval_file, connection, args.model)
|
||||
|
||||
if args.output:
|
||||
args.output.write_text(report)
|
||||
print(f"\n✅ Report saved to {args.output}")
|
||||
else:
|
||||
print("\n" + report)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Executable
+145
@@ -0,0 +1,145 @@
|
||||
#!/usr/bin/env bash
|
||||
# propagate-structure.sh — Aplica a estrutura padrão (PROC-DEV-STANDARD) a outro projecto
|
||||
#
|
||||
# Uso:
|
||||
# ./propagate-structure.sh /caminho/para/projecto [--dry-run]
|
||||
#
|
||||
# Cria (se não existirem): MEMORY.md, STATUS.md, AGENTS.md, CHANGELOG.md, desk.project,
|
||||
# docs/PLANS/, docs/SPECS/, docs/audit/, docs/audit/findings/
|
||||
#
|
||||
# Não sobrescreve ficheiros existentes — só preenche lacunas.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
TARGET="${1:-}"
|
||||
DRY="${2:-}"
|
||||
|
||||
if [[ -z "$TARGET" || ! -d "$TARGET" ]]; then
|
||||
echo "Uso: $0 /caminho/projecto [--dry-run]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
PROJECT_NAME="$(basename "$TARGET")"
|
||||
TODAY="$(date +%Y-%m-%d)"
|
||||
CREATED=0
|
||||
SKIPPED=0
|
||||
|
||||
log() { echo " $1 $2"; }
|
||||
|
||||
run() {
|
||||
if [[ "$DRY" == "--dry-run" ]]; then
|
||||
log "[DRY]" "$1"
|
||||
else
|
||||
eval "$1"
|
||||
fi
|
||||
}
|
||||
|
||||
ensure_dir() {
|
||||
if [[ -d "$1" ]]; then
|
||||
log "[SKIP]" "dir $1"
|
||||
SKIPPED=$((SKIPPED+1))
|
||||
else
|
||||
run "mkdir -p '$1'"
|
||||
log "[NEW]" "dir $1"
|
||||
CREATED=$((CREATED+1))
|
||||
fi
|
||||
}
|
||||
|
||||
ensure_file() {
|
||||
local path="$1"
|
||||
local content="$2"
|
||||
if [[ -f "$path" ]]; then
|
||||
log "[SKIP]" "file $(basename "$path")"
|
||||
SKIPPED=$((SKIPPED+1))
|
||||
else
|
||||
if [[ "$DRY" == "--dry-run" ]]; then
|
||||
log "[DRY]" "would create $path"
|
||||
else
|
||||
printf '%s' "$content" > "$path"
|
||||
log "[NEW]" "file $(basename "$path")"
|
||||
fi
|
||||
CREATED=$((CREATED+1))
|
||||
fi
|
||||
}
|
||||
|
||||
echo "=== Propagação de estrutura PROC-DEV-STANDARD ==="
|
||||
echo "Projecto: $TARGET"
|
||||
[[ "$DRY" == "--dry-run" ]] && echo "Modo: DRY-RUN (nada será escrito)"
|
||||
echo
|
||||
|
||||
# Directorias
|
||||
ensure_dir "$TARGET/docs/PLANS"
|
||||
ensure_dir "$TARGET/docs/SPECS"
|
||||
ensure_dir "$TARGET/docs/audit/findings"
|
||||
|
||||
# Ficheiros standard
|
||||
ensure_file "$TARGET/MEMORY.md" \
|
||||
"# MEMORY.md — $PROJECT_NAME
|
||||
|
||||
## $TODAY
|
||||
- Estrutura padrão aplicada via propagate-structure.sh
|
||||
"
|
||||
|
||||
ensure_file "$TARGET/STATUS.md" \
|
||||
"# STATUS.md — $PROJECT_NAME
|
||||
|
||||
**Versão:** 0.1 | **Data:** $TODAY | **Agente:** —
|
||||
|
||||
## Estado actual
|
||||
- Em curso: —
|
||||
- Bloqueios: —
|
||||
- Próximos passos: —
|
||||
"
|
||||
|
||||
ensure_file "$TARGET/AGENTS.md" \
|
||||
"# AGENTS.md — $PROJECT_NAME
|
||||
|
||||
Directrizes específicas deste projecto para agentes IA.
|
||||
|
||||
## Referências obrigatórias
|
||||
|
||||
- \`ECOSYSTEM.md\` — Hub/06-Operacoes/Documentacao/ECOSYSTEM.md
|
||||
- \`PROC-DEV-STANDARD\` — D7-SIS-006
|
||||
- \`PROC-AUDIT-STANDARD\` — D7-SIS-007
|
||||
|
||||
## Protocolo de sessão
|
||||
|
||||
Início: ler MEMORY.md + STATUS.md. Fim: actualizar ambos.
|
||||
|
||||
## Convenções específicas
|
||||
|
||||
- (a definir)
|
||||
"
|
||||
|
||||
ensure_file "$TARGET/CHANGELOG.md" \
|
||||
"# CHANGELOG — $PROJECT_NAME
|
||||
|
||||
## 0.1.0 — $TODAY
|
||||
### Adicionado
|
||||
- Estrutura padrão (PROC-DEV-STANDARD)
|
||||
"
|
||||
|
||||
if [[ ! -f "$TARGET/desk.project" ]]; then
|
||||
ensure_file "$TARGET/desk.project" \
|
||||
'{
|
||||
"task_id": null,
|
||||
"project_id": null,
|
||||
"customer_id": null,
|
||||
"gitea_repo": null
|
||||
}
|
||||
'
|
||||
echo " [AVISO] desk.project criado vazio — preencher task_id Desk CRM"
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "=== Resultado ==="
|
||||
echo " Criados: $CREATED"
|
||||
echo " Existentes: $SKIPPED"
|
||||
[[ "$DRY" == "--dry-run" ]] && echo " (dry-run — nada foi escrito)"
|
||||
echo
|
||||
|
||||
# Validação final
|
||||
if [[ "$DRY" != "--dry-run" && -x "$(dirname "$0")/validate-structure.sh" ]]; then
|
||||
echo "=== Validação ==="
|
||||
bash "$(dirname "$0")/validate-structure.sh" "$TARGET" || true
|
||||
fi
|
||||
Executable
+107
@@ -0,0 +1,107 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# validate-structure.sh — Valida a estrutura de um projecto contra o standard
|
||||
# ESTRUTURA.md do Modelo-Organizacao-Proc-Dev
|
||||
#
|
||||
# Uso: ./scripts/validate-structure.sh [caminho_do_projecto]
|
||||
# Se não for especificado caminho, valida o projecto actual (raiz)
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 — Em conformidade
|
||||
# 1 — Não conforme (estrutura em falta)
|
||||
# 2 — Erro de execução
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ROOT="${1:-.}"
|
||||
ERRORS=0
|
||||
WARNINGS=0
|
||||
|
||||
echo "=== Validação Estrutural (ESTRUTURA.md) ==="
|
||||
echo "Projecto: $(cd "$ROOT" && pwd)"
|
||||
echo ""
|
||||
|
||||
# 1. docs/ existe
|
||||
if [ -d "$ROOT/docs" ]; then
|
||||
echo " [OK] docs/ existe"
|
||||
else
|
||||
echo " [FALHA] docs/ não existe"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
|
||||
# 2. docs/PLANS/ existe
|
||||
if [ -d "$ROOT/docs/PLANS" ]; then
|
||||
echo " [OK] docs/PLANS/ existe"
|
||||
else
|
||||
echo " [FALHA] docs/PLANS/ não existe"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
|
||||
# 3. docs/SPECS/ existe
|
||||
if [ -d "$ROOT/docs/SPECS" ]; then
|
||||
echo " [OK] docs/SPECS/ existe"
|
||||
else
|
||||
echo " [FALHA] docs/SPECS/ não existe"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
|
||||
# 4. AGENTS.md existe
|
||||
if [ -f "$ROOT/AGENTS.md" ]; then
|
||||
# Verificar se tem conteúdo mínimo
|
||||
LINES=$(wc -l < "$ROOT/AGENTS.md")
|
||||
if [ "$LINES" -ge 5 ]; then
|
||||
echo " [OK] AGENTS.md existe ($LINES linhas)"
|
||||
else
|
||||
echo " [AVISO] AGENTS.md existe mas tem apenas $LINES linhas"
|
||||
WARNINGS=$((WARNINGS + 1))
|
||||
fi
|
||||
else
|
||||
echo " [FALHA] AGENTS.md não existe"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
|
||||
# 5. MEMORY.md existe
|
||||
if [ -f "$ROOT/MEMORY.md" ]; then
|
||||
echo " [OK] MEMORY.md existe"
|
||||
else
|
||||
echo " [FALHA] MEMORY.md não existe"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
|
||||
# 6. desk.project existe e é JSON válido
|
||||
if [ -f "$ROOT/desk.project" ]; then
|
||||
if jq -e . "$ROOT/desk.project" > /dev/null 2>&1; then
|
||||
TASK_ID=$(jq -r '.task_id // "vazio"' "$ROOT/desk.project")
|
||||
echo " [OK] desk.project existe (task_id: $TASK_ID)"
|
||||
else
|
||||
echo " [FALHA] desk.project existe mas não é JSON válido"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
else
|
||||
echo " [FALHA] desk.project não existe"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
|
||||
# 7. Anti-pattern: verificar se existem pastas docs/PROC ou docs/RUNBOOK
|
||||
if [ -d "$ROOT/docs/PROC" ]; then
|
||||
echo " [AVISO] docs/PROC existe — não devia. Usar 06-Operacoes/Procedimentos/ no Hub"
|
||||
WARNINGS=$((WARNINGS + 1))
|
||||
fi
|
||||
if [ -d "$ROOT/docs/RUNBOOK" ]; then
|
||||
echo " [AVISO] docs/RUNBOOK existe — não devia. Usar 06-Operacoes/Runbooks/ no Hub"
|
||||
WARNINGS=$((WARNINGS + 1))
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Resultado ==="
|
||||
if [ "$ERRORS" -gt 0 ]; then
|
||||
echo " FALHAS: $ERRORS"
|
||||
echo " AVISOS: $WARNINGS"
|
||||
echo " VEREDICTO: NÃO CONFORME"
|
||||
exit 1
|
||||
else
|
||||
echo " FALHAS: 0"
|
||||
echo " AVISOS: $WARNINGS"
|
||||
echo " VEREDICTO: CONFORME"
|
||||
exit 0
|
||||
fi
|
||||
@@ -0,0 +1,191 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
OCR Pipeline para Documentos Contabilísticos
|
||||
PDF → imagem → RapidOCR → DeepSeek → JSON estruturado
|
||||
|
||||
Uso: python3 ocr-invoice.py <caminho_do_pdf>
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
# ── CONFIG ───────────────────────────────────────────────────────
|
||||
OPENCODE_GO_KEY = ""
|
||||
# Ler .env automaticamente
|
||||
for line in open(os.path.expanduser("~/.hermes/.env")):
|
||||
line = line.strip()
|
||||
if line.startswith("OPENCODE_GO_API_KEY=") and not line.startswith("#"):
|
||||
OPENCODE_GO_KEY = line.split("=", 1)[1]
|
||||
break
|
||||
OPENCODE_GO_URL = "https://opencode.ai/zen/go/v1"
|
||||
MODEL = "deepseek-v4-flash"
|
||||
OCR_DPI = 200
|
||||
|
||||
|
||||
def pdf_to_image(pdf_path: str, dpi: int = OCR_DPI) -> str:
|
||||
"""Converte primeira página do PDF para imagem PNG."""
|
||||
import pymupdf
|
||||
doc = pymupdf.open(pdf_path)
|
||||
page = doc[0]
|
||||
pix = page.get_pixmap(dpi=dpi)
|
||||
img_path = f"/tmp/ocr_{Path(pdf_path).stem}.png"
|
||||
pix.save(img_path)
|
||||
return img_path
|
||||
|
||||
|
||||
def ocr_image(img_path: str) -> list[dict]:
|
||||
"""Executa RapidOCR na imagem. Retorna lista de {text, score, bbox}."""
|
||||
from rapidocr import RapidOCR
|
||||
engine = RapidOCR()
|
||||
result = engine(img_path)
|
||||
|
||||
lines = []
|
||||
if result and result.txts:
|
||||
for txt, score in zip(result.txts, result.scores):
|
||||
lines.append({"text": txt, "score": float(score)})
|
||||
return lines
|
||||
|
||||
|
||||
def extract_structured(ocr_lines: list[dict], pdf_name: str) -> dict:
|
||||
"""Usa DeepSeek para extrair campos estruturados do texto OCR."""
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI(
|
||||
api_key=OPENCODE_GO_KEY,
|
||||
base_url=OPENCODE_GO_URL,
|
||||
)
|
||||
|
||||
# Texto OCR como bloco
|
||||
ocr_text = "\n".join(f"[{l['score']:.2f}] {l['text']}" for l in ocr_lines)
|
||||
|
||||
prompt = f"""Analisa o seguinte texto extraído de um documento contabilístico (factura/recibo) via OCR.
|
||||
Extrai os campos estruturados e devolve APENAS JSON válido (sem markdown, sem ```).
|
||||
|
||||
Texto OCR:
|
||||
{ocr_text}
|
||||
|
||||
Nome do ficheiro: {pdf_name}
|
||||
|
||||
Devolve JSON com esta estrutura exata:
|
||||
{{
|
||||
"tipo_documento": "factura|recibo|nota_de_credito|outro",
|
||||
"fornecedor": {{
|
||||
"nome": "string",
|
||||
"nif_cif": "string",
|
||||
"morada": "string",
|
||||
"telefone": "string"
|
||||
}},
|
||||
"cliente": {{
|
||||
"nome": "string",
|
||||
"nif": "string",
|
||||
"morada": "string",
|
||||
"telefone": "string"
|
||||
}},
|
||||
"documento": {{
|
||||
"numero": "string",
|
||||
"data": "YYYY-MM-DD",
|
||||
"metodo_pagamento": "string",
|
||||
"referencia": "string"
|
||||
}},
|
||||
"artigos": [
|
||||
{{
|
||||
"codigo": "string",
|
||||
"descricao": "string",
|
||||
"preco_unitario": 0.00,
|
||||
"quantidade": 1,
|
||||
"total": 0.00,
|
||||
"notas": "string"
|
||||
}}
|
||||
],
|
||||
"resumo": {{
|
||||
"base_tributavel": 0.00,
|
||||
"taxa_iva_percent": 0,
|
||||
"iva_valor": 0.00,
|
||||
"recargo_percent": 0,
|
||||
"recargo_valor": 0.00,
|
||||
"total_pagar": 0.00,
|
||||
"moeda": "EUR"
|
||||
}},
|
||||
"notas": "string com observações relevantes"
|
||||
}}
|
||||
|
||||
Regras:
|
||||
- Preços com vírgula decimal (formato PT: 1.234,56 → 1234.56)
|
||||
- Se campo não encontrado, usar null
|
||||
- Se artigos não detectados, array vazio
|
||||
- IVA: se não explícito, calcular a partir de base + total
|
||||
- Moeda: EUR por defeito"""
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model=MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": "És um assistente especializado em extração de dados de documentos contabilísticos portugueses. Devolves sempre JSON válido."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
temperature=0.0,
|
||||
max_tokens=16384,
|
||||
)
|
||||
|
||||
raw = response.choices[0].message.content.strip()
|
||||
|
||||
# Limpar possíveis wrappers markdown
|
||||
if raw.startswith("```"):
|
||||
raw = raw.split("\n", 1)[1]
|
||||
if raw.endswith("```"):
|
||||
raw = raw[:-3]
|
||||
raw = raw.strip()
|
||||
|
||||
return json.loads(raw)
|
||||
|
||||
|
||||
def process_invoice(pdf_path: str) -> dict:
|
||||
"""Pipeline completo: PDF → JSON estruturado."""
|
||||
print(f"📄 A processar: {pdf_path}")
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
# 1. PDF → imagem
|
||||
print(" [1/3] PDF → imagem...")
|
||||
img = pdf_to_image(pdf_path)
|
||||
print(f" OK ({time.time()-t0:.1f}s)")
|
||||
|
||||
# 2. OCR
|
||||
print(" [2/3] OCR (RapidOCR)...")
|
||||
t1 = time.time()
|
||||
lines = ocr_image(img)
|
||||
print(f" {len(lines)} linhas em {time.time()-t1:.1f}s")
|
||||
|
||||
# 3. Structured extraction
|
||||
print(" [3/3] Extração estruturada (DeepSeek)...")
|
||||
t2 = time.time()
|
||||
structured = extract_structured(lines, os.path.basename(pdf_path))
|
||||
print(f" OK ({time.time()-t2:.1f}s)")
|
||||
|
||||
elapsed = time.time() - t0
|
||||
print(f"\n✅ Pipeline completo em {elapsed:.1f}s")
|
||||
|
||||
# Cleanup
|
||||
os.remove(img)
|
||||
|
||||
return {
|
||||
"source_file": pdf_path,
|
||||
"ocr_lines": len(lines),
|
||||
"processing_time_seconds": round(elapsed, 1),
|
||||
"extracted_data": structured,
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print(f"Uso: python3 {sys.argv[0]} <caminho_do_pdf>")
|
||||
sys.exit(1)
|
||||
|
||||
pdf_path = sys.argv[1]
|
||||
if not os.path.exists(pdf_path):
|
||||
print(f"Erro: ficheiro não encontrado: {pdf_path}")
|
||||
sys.exit(1)
|
||||
|
||||
result = process_invoice(pdf_path)
|
||||
print("\n" + json.dumps(result, indent=2, ensure_ascii=False))
|
||||
Executable
+161
@@ -0,0 +1,161 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/scripts/lib.sh"
|
||||
|
||||
usage() {
|
||||
cat << EOF
|
||||
Podcast Descomplicar Digital — Pipeline Automatizado
|
||||
|
||||
Usage: $0 <command> [args]
|
||||
|
||||
Commands:
|
||||
batch N Process next N episodes (content generation)
|
||||
status Show pipeline state
|
||||
retry NNN Re-run failed stage for episode NNN
|
||||
csv [start] [N] Generate Canva CSV (default: next 7 episodes)
|
||||
publish NNN D Generate publish commands for episode NNN on date D (YYYY-MM-DD)
|
||||
init NNN Add episode NNN to pipeline state
|
||||
produce NNN F Post-produce episode NNN from raw audio file F
|
||||
|
||||
Examples:
|
||||
$0 batch 5
|
||||
$0 status
|
||||
$0 csv 20 10
|
||||
$0 produce 20 /path/to/raw.wav
|
||||
$0 publish 20 2026-04-14
|
||||
EOF
|
||||
exit 1
|
||||
}
|
||||
|
||||
[[ $# -lt 1 ]] && usage
|
||||
|
||||
CMD="$1"; shift
|
||||
|
||||
ensure_state_file
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
case "$CMD" in
|
||||
batch)
|
||||
COUNT="${1:-5}"
|
||||
START="$(jq -r '.next_episode' "$STATE_FILE")"
|
||||
END=$((START + COUNT - 1))
|
||||
log_info "=== BATCH: Processing episodes ${START}-${END} ==="
|
||||
|
||||
CURRENT_DATE="$(jq -r '.next_publish_date' "$STATE_FILE")"
|
||||
[[ -z "$CURRENT_DATE" || "$CURRENT_DATE" == "null" ]] && CURRENT_DATE="$(date '+%Y-%m-%d')"
|
||||
|
||||
for ((ep=START; ep<=END; ep++)); do
|
||||
log_info "--- Episode ${ep} ---"
|
||||
|
||||
STATUS="$(get_episode_status "$ep")"
|
||||
if [[ "$STATUS" == "not_found" || "$STATUS" == "pending" ]]; then
|
||||
"${SCRIPT_DIR}/scripts/generate-content.sh" "$ep" || {
|
||||
log_error "EP$(pad_number "$ep"): Content generation failed, stopping batch"
|
||||
exit 1
|
||||
}
|
||||
else
|
||||
log_info "EP$(pad_number "$ep"): Already at status '${STATUS}', skipping content generation"
|
||||
fi
|
||||
|
||||
STATUS="$(get_episode_status "$ep")"
|
||||
if [[ "$STATUS" == "script_done" ]]; then
|
||||
log_warn "EP$(pad_number "$ep"): Audio TTS needed (manual step via AI Studio)"
|
||||
log_warn " -> Generate audio, save to Episodios/Audios/raw/ep_$(pad_number "$ep")_raw.wav"
|
||||
log_warn " -> Then run: $0 produce ${ep} <raw_audio_path>"
|
||||
fi
|
||||
|
||||
CURRENT_DATE="$(next_weekday "$CURRENT_DATE")"
|
||||
done
|
||||
|
||||
jq --argjson n "$((END + 1))" --arg d "$CURRENT_DATE" \
|
||||
'.next_episode = $n | .next_publish_date = $d' \
|
||||
"$STATE_FILE" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "$STATE_FILE"
|
||||
|
||||
log_info "=== BATCH COMPLETE. Next episode: $((END + 1)), next date: ${CURRENT_DATE} ==="
|
||||
;;
|
||||
|
||||
status)
|
||||
echo "=== Pipeline Status ==="
|
||||
echo "Next episode: $(jq -r '.next_episode' "$STATE_FILE")"
|
||||
echo "Next publish date: $(jq -r '.next_publish_date' "$STATE_FILE")"
|
||||
echo ""
|
||||
EPISODE_COUNT="$(jq '.episodes | length' "$STATE_FILE")"
|
||||
if [[ "$EPISODE_COUNT" -gt 0 ]]; then
|
||||
echo "Episodes in pipeline:"
|
||||
jq -r '.episodes[] | " EP\(.number | tostring | if length < 3 then "0" * (3 - length) + . else . end): \(.status) - \(.title)"' "$STATE_FILE"
|
||||
echo ""
|
||||
echo "Counts:"
|
||||
jq -r '.episodes | group_by(.status) | map({status: .[0].status, count: length}) | .[] | " \(.status): \(.count)"' "$STATE_FILE"
|
||||
else
|
||||
echo "No episodes in pipeline yet. Run 'batch' to start."
|
||||
fi
|
||||
;;
|
||||
|
||||
retry)
|
||||
EP="${1:?Episode number required}"
|
||||
STATUS="$(get_episode_status "$EP")"
|
||||
log_info "Retrying EP$(pad_number "$EP") (current status: ${STATUS})"
|
||||
|
||||
case "$STATUS" in
|
||||
pending|not_found)
|
||||
"${SCRIPT_DIR}/scripts/generate-content.sh" "$EP"
|
||||
;;
|
||||
script_done)
|
||||
log_warn "Audio TTS needed — manual step via AI Studio"
|
||||
;;
|
||||
audio_done)
|
||||
RAW=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Audios/raw/ep_$(pad_number "$EP")_"*; do
|
||||
[[ -f "$f" ]] && RAW="$f" && break
|
||||
done
|
||||
if [[ -n "$RAW" ]]; then
|
||||
"${SCRIPT_DIR}/scripts/post-produce.sh" "$EP" "$RAW"
|
||||
else
|
||||
log_error "Raw audio not found. Use: $0 produce ${EP} <path>"
|
||||
fi
|
||||
;;
|
||||
produced)
|
||||
log_info "Ready to publish. Use: $0 publish ${EP} YYYY-MM-DD"
|
||||
;;
|
||||
*)
|
||||
log_warn "Unknown status: ${STATUS}"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
|
||||
csv)
|
||||
"${SCRIPT_DIR}/scripts/generate-csv.sh" "${@}"
|
||||
;;
|
||||
|
||||
produce)
|
||||
EP="${1:?Episode number required}"
|
||||
RAW="${2:?Raw audio path required}"
|
||||
"${SCRIPT_DIR}/scripts/post-produce.sh" "$EP" "$RAW"
|
||||
;;
|
||||
|
||||
publish)
|
||||
EP="${1:?Episode number required}"
|
||||
SCHED_DATE="${2:-$(jq -r '.next_publish_date' "$STATE_FILE")}"
|
||||
"${SCRIPT_DIR}/scripts/publish-episode.sh" "$EP" "$SCHED_DATE"
|
||||
;;
|
||||
|
||||
init)
|
||||
EP="${1:?Episode number required}"
|
||||
MAP_FILE="${PROJECT_ROOT}/config/episode-guide-map.json"
|
||||
EP_DATA="$(jq -r --arg n "$EP" '.[$n] // empty' "$MAP_FILE")"
|
||||
if [[ -n "$EP_DATA" ]]; then
|
||||
TITLE="$(echo "$EP_DATA" | jq -r '.podcast_title')"
|
||||
URL="$(echo "$EP_DATA" | jq -r '.guide_url')"
|
||||
add_episode "$EP" "$TITLE" "$URL"
|
||||
log_info "Added EP$(pad_number "$EP"): ${TITLE}"
|
||||
else
|
||||
log_error "Episode $EP not found in episode-guide-map.json"
|
||||
fi
|
||||
;;
|
||||
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
Executable
+224
@@ -0,0 +1,224 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# apply-wp-content.sh — Aplica conteúdo _wp.json a posts já agendados no WordPress
|
||||
#
|
||||
# Uso: ./scripts/apply-wp-content.sh <ep_num> [post_id]
|
||||
# Se post_id não for fornecido, pesquisa por data na pipeline-state.json
|
||||
#
|
||||
# Útil para: episódios que foram agendados antes do _wp.json existir
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
# SSH config
|
||||
SSH_KEY="${HOME}/.ssh/id_ed25519"
|
||||
SSH_PORT=9443
|
||||
SSH_HOST="server.descomplicar.pt"
|
||||
SSH_USER="root"
|
||||
SSH_OPTS="-o IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
|
||||
WP_PATH="/home/ealmeida/public_html"
|
||||
|
||||
ssh_cmd() {
|
||||
SSH_AUTH_SOCK= ssh -p "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "${SSH_USER}@${SSH_HOST}" "$@"
|
||||
}
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <ep_num> [post_id]"
|
||||
exit 1
|
||||
}
|
||||
|
||||
[[ $# -lt 1 ]] && usage
|
||||
EP_NUM="$1"
|
||||
MANUAL_POST_ID="${2:-}"
|
||||
EP_PAD="$(pad_number "$EP_NUM")"
|
||||
|
||||
# Encontrar _wp.json (find para lidar com nomes com acentos)
|
||||
WP_JSON=""
|
||||
while IFS= read -r f; do
|
||||
[[ -f "$f" ]] && WP_JSON="$f" && break
|
||||
done < <(find "${PROJECT_ROOT}" -maxdepth 3 -name "Episodio_${EP_PAD}_*_wp.json" 2>/dev/null)
|
||||
|
||||
if [[ -z "$WP_JSON" ]]; then
|
||||
log_error "EP${EP_PAD}: _wp.json não encontrado"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validar e auto-reparar JSON se necessário
|
||||
if ! python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
|
||||
log_warn "EP${EP_PAD}: JSON inválido — a tentar reparação automática..."
|
||||
python3 - "$WP_JSON" << 'PYFIX'
|
||||
import sys, re, json
|
||||
filepath = sys.argv[1]
|
||||
with open(filepath, 'r') as f:
|
||||
raw = f.read()
|
||||
marker = '"content_html": "'
|
||||
start = raw.find(marker)
|
||||
if start == -1: sys.exit(1)
|
||||
content_start = start + len(marker)
|
||||
end_pattern = re.search(r'",\s*\n\s*"hashtags"', raw[content_start:]) or re.search(r'",\s*\n\s*"wp_tags"', raw[content_start:])
|
||||
if not end_pattern: sys.exit(1)
|
||||
content_end = content_start + end_pattern.start()
|
||||
fixed = raw[:content_start] + re.sub(r'(?<!\\)"', '\\"', raw[content_start:content_end]) + raw[content_end:]
|
||||
json.loads(fixed)
|
||||
with open(filepath, 'w') as f: f.write(fixed)
|
||||
print("reparado")
|
||||
PYFIX
|
||||
if ! python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
|
||||
log_error "EP${EP_PAD}: JSON inválido após reparação"
|
||||
exit 1
|
||||
fi
|
||||
log_info "EP${EP_PAD}: JSON reparado automaticamente"
|
||||
fi
|
||||
log_info "EP${EP_PAD}: Usando $(basename "$WP_JSON")"
|
||||
|
||||
# Encontrar post_id
|
||||
POST_ID="$MANUAL_POST_ID"
|
||||
if [[ -z "$POST_ID" ]]; then
|
||||
# Tentar obter por data agendada da pipeline-state
|
||||
SCHED_DATE="$(jq -r --argjson n "$EP_NUM" '.episodes[] | select(.num == $n) | .scheduled // empty' "$STATE_FILE")"
|
||||
if [[ -z "$SCHED_DATE" ]]; then
|
||||
log_error "EP${EP_PAD}: Não encontrado em pipeline-state.json, especifique post_id manualmente"
|
||||
exit 1
|
||||
fi
|
||||
log_info "EP${EP_PAD}: A pesquisar post por data ${SCHED_DATE}..."
|
||||
POST_ID="$(ssh_cmd "cd '${WP_PATH}' && wp db query \
|
||||
\"SELECT ID FROM wpah_posts WHERE post_type='podcast' AND post_status='future' AND DATE(post_date)='${SCHED_DATE}' LIMIT 1\" \
|
||||
--skip-column-names \
|
||||
--allow-root 2>/dev/null")"
|
||||
fi
|
||||
|
||||
if [[ -z "$POST_ID" ]]; then
|
||||
log_error "EP${EP_PAD}: Post não encontrado no WordPress"
|
||||
exit 1
|
||||
fi
|
||||
log_info "EP${EP_PAD}: Post ID = ${POST_ID}"
|
||||
|
||||
# Extrair campos do JSON via Python (mais robusto que jq para HTML com acentos)
|
||||
WP_CONTENT="$(python3 -c "import json,sys; d=json.load(open('$WP_JSON')); print(d.get('content_html',''))" 2>/dev/null)"
|
||||
WP_META="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('meta_description',''))" 2>/dev/null)"
|
||||
WP_KEYWORD="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('keyword',''))" 2>/dev/null)"
|
||||
WP_TAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(','.join(d.get('wp_tags',[])))" 2>/dev/null)"
|
||||
WP_HASHTAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(' '.join(d.get('hashtags',[])))" 2>/dev/null)"
|
||||
WP_SEO_TITLE="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('seo_title',''))" 2>/dev/null)"
|
||||
WP_SLUG="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('slug',''))" 2>/dev/null)"
|
||||
|
||||
# Auto-fix RankMath: slug sem keyword → reconstruir; título sem número → adicionar ano
|
||||
_AUTOFIX="$(python3 - "$WP_JSON" "$WP_SLUG" "$WP_SEO_TITLE" <<'PYFIX'
|
||||
import json, sys, re, unicodedata, time
|
||||
|
||||
def slugify(text):
|
||||
text = unicodedata.normalize("NFD", text.lower())
|
||||
text = "".join(c for c in text if unicodedata.category(c) != "Mn")
|
||||
text = re.sub(r'[^a-z0-9\s-]', '', text)
|
||||
return re.sub(r'[-\s]+', '-', text.strip()).rstrip('-')
|
||||
|
||||
STOPWORDS = {"a","o","as","os","de","da","do","das","dos","e","em","no","na","nos","nas","para","por","pelo","pela"}
|
||||
|
||||
def strip_sw(s):
|
||||
return " ".join(t for t in slugify(s).replace("-", " ").split() if t not in STOPWORDS)
|
||||
|
||||
d = json.load(open(sys.argv[1]))
|
||||
slug = sys.argv[2]
|
||||
seo_title = sys.argv[3]
|
||||
kw = d.get('keyword', '')
|
||||
|
||||
# Fix 1: slug sem keyword
|
||||
slug_fixed = slug
|
||||
if kw and slug and strip_sw(kw) not in strip_sw(slug):
|
||||
fk_slug = slugify(kw)
|
||||
orig_words = [w for w in slug.split('-') if w not in fk_slug.split('-') and len(w) > 3][:2]
|
||||
slug_fixed = (fk_slug + ('-' + '-'.join(orig_words) if orig_words else ''))[:75].rstrip('-')
|
||||
|
||||
# Fix 2: seo_title sem número → adicionar ano
|
||||
seo_fixed = seo_title
|
||||
if seo_title and not re.search(r'\d', seo_title):
|
||||
year = time.strftime('%Y')
|
||||
# Inserir ano antes do separador "|" se existir, senão no fim do título
|
||||
# Não truncar o texto principal — o ano vai imediatamente antes do "|"
|
||||
if ' | ' in seo_title:
|
||||
parts = seo_title.split(' | ', 1)
|
||||
seo_fixed = f"{parts[0].rstrip()} {year} | {parts[1]}"
|
||||
else:
|
||||
t = seo_title.rstrip()
|
||||
seo_fixed = (t[:55].rsplit(' ', 1)[0] + f' {year}') if len(t) > 55 else f'{t} {year}'
|
||||
|
||||
print(slug_fixed)
|
||||
print(seo_fixed)
|
||||
PYFIX
|
||||
)"
|
||||
|
||||
# Aplicar valores corrigidos
|
||||
WP_SLUG_NEW="$(echo "$_AUTOFIX" | sed -n '1p')"
|
||||
WP_SEO_TITLE_NEW="$(echo "$_AUTOFIX" | sed -n '2p')"
|
||||
|
||||
if [[ -n "$WP_SLUG_NEW" && "$WP_SLUG_NEW" != "$WP_SLUG" ]]; then
|
||||
log_info "EP${EP_PAD}: slug auto-corrigido: '${WP_SLUG}' → '${WP_SLUG_NEW}'"
|
||||
WP_SLUG="$WP_SLUG_NEW"
|
||||
fi
|
||||
if [[ -n "$WP_SEO_TITLE_NEW" && "$WP_SEO_TITLE_NEW" != "$WP_SEO_TITLE" ]]; then
|
||||
log_info "EP${EP_PAD}: seo_title auto-corrigido: ano adicionado → '${WP_SEO_TITLE_NEW}'"
|
||||
WP_SEO_TITLE="$WP_SEO_TITLE_NEW"
|
||||
fi
|
||||
|
||||
# Excerpt = meta + hashtags
|
||||
WP_EXCERPT=""
|
||||
if [[ -n "$WP_META" && -n "$WP_HASHTAGS" ]]; then
|
||||
WP_EXCERPT="${WP_META}
|
||||
|
||||
${WP_HASHTAGS}"
|
||||
fi
|
||||
|
||||
# Aplicar post_content
|
||||
if [[ -n "$WP_CONTENT" ]]; then
|
||||
ESCAPED_CONTENT="$(printf '%s' "$WP_CONTENT" | python3 -c "import sys; data=sys.stdin.read(); print(data.replace(\"'\", \"'\\\\''\" ))" 2>/dev/null || echo "$WP_CONTENT" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_content='${ESCAPED_CONTENT}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: post_content aplicado"
|
||||
fi
|
||||
|
||||
# Aplicar excerpt
|
||||
if [[ -n "$WP_EXCERPT" ]]; then
|
||||
ESCAPED_EXCERPT="$(printf '%s' "$WP_EXCERPT" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_excerpt='${ESCAPED_EXCERPT}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: post_excerpt aplicado"
|
||||
fi
|
||||
|
||||
# Aplicar slug
|
||||
if [[ -n "$WP_SLUG" ]]; then
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_name='${WP_SLUG}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: slug actualizado -> ${WP_SLUG}"
|
||||
fi
|
||||
|
||||
# Aplicar tags
|
||||
if [[ -n "$WP_TAGS" ]]; then
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post term set ${POST_ID} post_tag ${WP_TAGS} --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: tags aplicadas"
|
||||
fi
|
||||
|
||||
# Rank Math: description
|
||||
if [[ -n "$WP_META" ]]; then
|
||||
ESCAPED_META="$(echo "$WP_META" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_description '${ESCAPED_META}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: rank_math_description aplicado"
|
||||
fi
|
||||
|
||||
# Rank Math: focus keyword
|
||||
if [[ -n "$WP_KEYWORD" ]]; then
|
||||
ESCAPED_KW="$(echo "$WP_KEYWORD" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_focus_keyword '${ESCAPED_KW}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: rank_math_focus_keyword aplicado"
|
||||
fi
|
||||
|
||||
# Rank Math: SEO title
|
||||
if [[ -z "$WP_SEO_TITLE" ]]; then
|
||||
WP_TITLE_FALLBACK="$(jq -r '.title // empty' "$WP_JSON")"
|
||||
[[ -n "$WP_TITLE_FALLBACK" ]] && WP_SEO_TITLE="${WP_TITLE_FALLBACK} | Podcast Descomplicar Digital"
|
||||
fi
|
||||
if [[ -n "$WP_SEO_TITLE" ]]; then
|
||||
ESCAPED_SEO_TITLE="$(echo "$WP_SEO_TITLE" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_title '${ESCAPED_SEO_TITLE}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: rank_math_title aplicado"
|
||||
fi
|
||||
|
||||
log_info "EP${EP_PAD}: Conteúdo WP aplicado com sucesso (post ${POST_ID})"
|
||||
echo "${POST_ID}"
|
||||
Executable
+177
@@ -0,0 +1,177 @@
|
||||
#!/usr/bin/env bash
|
||||
# batch-prepare.sh — Pipeline completo para lote de episódios
|
||||
# Uso: ./scripts/batch-prepare.sh [--dry-run]
|
||||
# Para cada episódio: TTS → pós-produção → agendar no WordPress
|
||||
# Retomável: salta episódios já prontos
|
||||
|
||||
set -uo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
DRY_RUN=false
|
||||
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
|
||||
|
||||
# Verificar GEMINI_API_KEY
|
||||
if [[ -z "${GEMINI_API_KEY:-}" ]]; then
|
||||
log_error "GEMINI_API_KEY não definida. Exporta primeiro: export GEMINI_API_KEY=..."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Lote: episódios e datas de publicação (ordem do calendário)
|
||||
EP_ORDER=(65 132 137 66 67 134 139 69 78 79 80 81 82 83 84)
|
||||
|
||||
declare -A EP_DATES=(
|
||||
[65]="2026-07-02"
|
||||
[132]="2026-07-06"
|
||||
[137]="2026-07-07"
|
||||
[66]="2026-07-08"
|
||||
[67]="2026-07-09"
|
||||
[134]="2026-07-13"
|
||||
[139]="2026-07-14"
|
||||
[69]="2026-07-15"
|
||||
[78]="2026-07-16"
|
||||
[79]="2026-07-20"
|
||||
[80]="2026-07-21"
|
||||
[81]="2026-07-22"
|
||||
[82]="2026-07-23"
|
||||
[83]="2026-07-27"
|
||||
[84]="2026-07-28"
|
||||
)
|
||||
|
||||
BATCH_LOG="${LOG_DIR}/batch-$(date +%Y%m%d-%H%M%S).log"
|
||||
mkdir -p "${LOG_DIR}"
|
||||
|
||||
declare -A RESULTS
|
||||
|
||||
_batch_log() {
|
||||
local msg="$*"
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ${msg}" | tee -a "${BATCH_LOG}"
|
||||
}
|
||||
|
||||
_batch_log "=== BATCH START — $(date) ==="
|
||||
_batch_log "Episódios: ${EP_ORDER[*]}"
|
||||
[[ "$DRY_RUN" == true ]] && _batch_log "MODO DRY-RUN activado"
|
||||
|
||||
for EP_NUM in "${EP_ORDER[@]}"; do
|
||||
EP_PAD="$(pad_number "$EP_NUM")"
|
||||
SCHED_DATE="${EP_DATES[$EP_NUM]}"
|
||||
|
||||
_batch_log "--- EP${EP_PAD} (${SCHED_DATE}) ---"
|
||||
|
||||
# Verificar se já está agendado (status ready no pipeline-state.json)
|
||||
ALREADY_STATUS=$(jq -r --argjson n "$EP_NUM" \
|
||||
'.episodes[] | select(.num == $n) | .status // ""' \
|
||||
"${STATE_FILE}" 2>/dev/null || echo "")
|
||||
|
||||
if [[ "$ALREADY_STATUS" == "ready" ]]; then
|
||||
_batch_log "EP${EP_PAD}: já agendado — a saltar"
|
||||
RESULTS[$EP_NUM]="skip"
|
||||
continue
|
||||
fi
|
||||
|
||||
# --- Etapa 1: TTS ---
|
||||
FINAL_MP3=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Audios/final/ep_${EP_PAD}_"*.mp3; do
|
||||
[[ -f "$f" ]] && FINAL_MP3="$f" && break
|
||||
done
|
||||
|
||||
if [[ -z "$FINAL_MP3" ]]; then
|
||||
_batch_log "EP${EP_PAD}: [1/3] A gerar áudio TTS..."
|
||||
if [[ "$DRY_RUN" == false ]]; then
|
||||
if ! bash "${SCRIPT_DIR}/generate-audio.sh" "$EP_NUM" >> "${BATCH_LOG}" 2>&1; then
|
||||
_batch_log "EP${EP_PAD}: ERRO no TTS — a saltar episódio"
|
||||
RESULTS[$EP_NUM]="erro_tts"
|
||||
continue
|
||||
fi
|
||||
else
|
||||
_batch_log "EP${EP_PAD}: [DRY-RUN] generate-audio.sh ${EP_NUM}"
|
||||
fi
|
||||
|
||||
# --- Etapa 2: Pós-produção ---
|
||||
RAW_FILES=()
|
||||
for f in "${PROJECT_ROOT}/Episodios/Audios/raw/ep_${EP_PAD}_p"*.wav; do
|
||||
[[ -f "$f" ]] && RAW_FILES+=("$f")
|
||||
done
|
||||
|
||||
if [[ ${#RAW_FILES[@]} -eq 0 && "$DRY_RUN" == false ]]; then
|
||||
_batch_log "EP${EP_PAD}: ERRO — sem ficheiros raw após TTS — a saltar episódio"
|
||||
RESULTS[$EP_NUM]="erro_sem_raw"
|
||||
continue
|
||||
fi
|
||||
|
||||
_batch_log "EP${EP_PAD}: [2/3] Pós-produção (${#RAW_FILES[@]} partes)..."
|
||||
if [[ "$DRY_RUN" == false ]]; then
|
||||
if ! bash "${SCRIPT_DIR}/post-produce.sh" "$EP_NUM" "${RAW_FILES[@]}" >> "${BATCH_LOG}" 2>&1; then
|
||||
_batch_log "EP${EP_PAD}: ERRO na pós-produção — a saltar episódio"
|
||||
RESULTS[$EP_NUM]="erro_postprod"
|
||||
continue
|
||||
fi
|
||||
else
|
||||
_batch_log "EP${EP_PAD}: [DRY-RUN] post-produce.sh ${EP_NUM} <raw_files>"
|
||||
fi
|
||||
|
||||
# Encontrar MP3 final
|
||||
for f in "${PROJECT_ROOT}/Episodios/Audios/final/ep_${EP_PAD}_"*.mp3; do
|
||||
[[ -f "$f" ]] && FINAL_MP3="$f" && break
|
||||
done
|
||||
|
||||
if [[ -z "$FINAL_MP3" && "$DRY_RUN" == false ]]; then
|
||||
_batch_log "EP${EP_PAD}: ERRO — MP3 final não encontrado após pós-produção"
|
||||
RESULTS[$EP_NUM]="erro_sem_mp3"
|
||||
continue
|
||||
fi
|
||||
else
|
||||
_batch_log "EP${EP_PAD}: [1/3] Áudio já existe: $(basename "${FINAL_MP3}") — a saltar TTS+pós-prod"
|
||||
fi
|
||||
|
||||
# --- Etapa 3: Agendar no WordPress ---
|
||||
_batch_log "EP${EP_PAD}: [3/3] A agendar no WordPress para ${SCHED_DATE}..."
|
||||
if [[ "$DRY_RUN" == false ]]; then
|
||||
if ! bash "${SCRIPT_DIR}/schedule-episode.sh" "$EP_NUM" "$SCHED_DATE" >> "${BATCH_LOG}" 2>&1; then
|
||||
_batch_log "EP${EP_PAD}: ERRO no agendamento"
|
||||
RESULTS[$EP_NUM]="erro_schedule"
|
||||
continue
|
||||
fi
|
||||
else
|
||||
_batch_log "EP${EP_PAD}: [DRY-RUN] schedule-episode.sh ${EP_NUM} ${SCHED_DATE}"
|
||||
fi
|
||||
|
||||
RESULTS[$EP_NUM]="ok"
|
||||
_batch_log "EP${EP_PAD}: ✓ COMPLETO"
|
||||
done
|
||||
|
||||
# Resumo final
|
||||
_batch_log ""
|
||||
_batch_log "=== RESUMO ==="
|
||||
|
||||
OK_COUNT=0
|
||||
ERR_COUNT=0
|
||||
SKIP_COUNT=0
|
||||
|
||||
for EP_NUM in "${EP_ORDER[@]}"; do
|
||||
EP_PAD="$(pad_number "$EP_NUM")"
|
||||
STATUS="${RESULTS[$EP_NUM]:-desconhecido}"
|
||||
case "$STATUS" in
|
||||
ok)
|
||||
_batch_log "✓ EP${EP_PAD} — completo"
|
||||
((OK_COUNT++)) || true
|
||||
;;
|
||||
skip)
|
||||
_batch_log "⏭ EP${EP_PAD} — já agendado"
|
||||
((SKIP_COUNT++)) || true
|
||||
;;
|
||||
*)
|
||||
_batch_log "✗ EP${EP_PAD} — ERRO: ${STATUS}"
|
||||
((ERR_COUNT++)) || true
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
_batch_log ""
|
||||
_batch_log "Total: ${OK_COUNT} ok | ${SKIP_COUNT} saltados | ${ERR_COUNT} erros"
|
||||
_batch_log "Log completo: ${BATCH_LOG}"
|
||||
_batch_log "=== BATCH END — $(date) ==="
|
||||
|
||||
[[ "$ERR_COUNT" -gt 0 ]] && exit 1
|
||||
exit 0
|
||||
Executable
+68
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
# Episodios a produzir (ordem do calendario)
|
||||
EPISODES=(136 37 38 39 40 41 42 44 135 46 47 48 49 50 51)
|
||||
|
||||
TOTAL=${#EPISODES[@]}
|
||||
DONE=0
|
||||
FAILED=0
|
||||
|
||||
log_info "=== BATCH PRODUCE: ${TOTAL} episodios ==="
|
||||
|
||||
for EP in "${EPISODES[@]}"; do
|
||||
EP_PAD="$(pad_number "$EP")"
|
||||
DONE=$((DONE + 1))
|
||||
log_info "--- [${DONE}/${TOTAL}] EP${EP_PAD} ---"
|
||||
|
||||
# Step 1: Generate audio (split + TTS)
|
||||
log_info "EP${EP_PAD}: Generating audio..."
|
||||
if bash "${SCRIPT_DIR}/generate-audio.sh" "$EP" 2>&1; then
|
||||
log_info "EP${EP_PAD}: Audio generation OK"
|
||||
else
|
||||
log_error "EP${EP_PAD}: Audio generation FAILED, skipping"
|
||||
FAILED=$((FAILED + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
# Step 2: Collect raw audio parts
|
||||
RAW_DIR="${PROJECT_ROOT}/Episodios/Audios/raw"
|
||||
RAW_FILES=()
|
||||
for p in "${RAW_DIR}/ep_${EP_PAD}_p"*.wav; do
|
||||
[[ -f "$p" ]] && RAW_FILES+=("$p")
|
||||
done
|
||||
|
||||
if [[ ${#RAW_FILES[@]} -eq 0 ]]; then
|
||||
# Try alternate path with accent
|
||||
RAW_DIR="${PROJECT_ROOT}/Episodios/Audios/raw"
|
||||
for p in "${RAW_DIR}/ep_${EP_PAD}_p"*.wav; do
|
||||
[[ -f "$p" ]] && RAW_FILES+=("$p")
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ ${#RAW_FILES[@]} -eq 0 ]]; then
|
||||
log_error "EP${EP_PAD}: No raw audio files found, skipping post-production"
|
||||
FAILED=$((FAILED + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
# Sort parts numerically
|
||||
IFS=$'\n' RAW_FILES_SORTED=($(printf '%s\n' "${RAW_FILES[@]}" | sort)); unset IFS
|
||||
|
||||
# Step 3: Post-produce
|
||||
log_info "EP${EP_PAD}: Post-producing ${#RAW_FILES_SORTED[@]} parts..."
|
||||
if bash "${SCRIPT_DIR}/post-produce.sh" "$EP" "${RAW_FILES_SORTED[@]}" 2>&1; then
|
||||
log_info "EP${EP_PAD}: Post-production OK"
|
||||
else
|
||||
log_error "EP${EP_PAD}: Post-production FAILED"
|
||||
FAILED=$((FAILED + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
log_info "EP${EP_PAD}: DONE"
|
||||
done
|
||||
|
||||
log_info "=== BATCH COMPLETE: ${DONE} processed, $((DONE - FAILED)) OK, ${FAILED} failed ==="
|
||||
Executable
+134
@@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
EPISODES=(88 89 55 56 133 57 58 59 60 61 62 63 138 140 64)
|
||||
SPLIT_MARKER="$(jq -r '.tts_split_marker' "${PROJECT_ROOT}/config/audio-settings.json")"
|
||||
MAX_WORDS=1000
|
||||
RAW_DIR="${PROJECT_ROOT}/Episodios/Audios/raw"
|
||||
TOTAL=${#EPISODES[@]}
|
||||
DONE=0
|
||||
FAILED=0
|
||||
|
||||
mkdir -p "$RAW_DIR"
|
||||
|
||||
split_and_generate() {
|
||||
local ep_num="$1"
|
||||
local ep_pad
|
||||
ep_pad="$(pad_number "$ep_num")"
|
||||
|
||||
# Find script file
|
||||
local script_file=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Episodio_${ep_pad}_"*.txt; do
|
||||
[[ -f "$f" ]] && script_file="$f" && break
|
||||
done
|
||||
[[ -z "$script_file" ]] && { log_error "EP${ep_pad}: No script found"; return 1; }
|
||||
|
||||
local total_words
|
||||
total_words="$(wc -w < "$script_file")"
|
||||
log_info "EP${ep_pad}: ${total_words} words from $(basename "$script_file")"
|
||||
|
||||
# Check if final MP3 already exists
|
||||
local existing_mp3
|
||||
existing_mp3="$(ls "${PROJECT_ROOT}/Episodios/Audios/final/ep_${ep_pad}_"*.mp3 2>/dev/null | head -1)"
|
||||
if [[ -n "$existing_mp3" ]]; then
|
||||
log_info "EP${ep_pad}: Final MP3 already exists, SKIPPING"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Split
|
||||
local tmp_dir
|
||||
tmp_dir="$(mktemp -d)"
|
||||
|
||||
local marker_line
|
||||
marker_line="$(grep -nE "$SPLIT_MARKER" "$script_file" | head -1 | cut -d: -f1 || echo "")"
|
||||
|
||||
if [[ -n "$marker_line" && "$marker_line" -gt 1 ]]; then
|
||||
head -n "$((marker_line - 1))" "$script_file" > "${tmp_dir}/corpo.txt"
|
||||
tail -n "+${marker_line}" "$script_file" > "${tmp_dir}/faq.txt"
|
||||
else
|
||||
cp "$script_file" "${tmp_dir}/corpo.txt"
|
||||
: > "${tmp_dir}/faq.txt"
|
||||
fi
|
||||
|
||||
local corpo_words
|
||||
corpo_words="$(wc -w < "${tmp_dir}/corpo.txt")"
|
||||
|
||||
local num_parts=0
|
||||
if [[ "$corpo_words" -gt "$MAX_WORDS" ]]; then
|
||||
local corpo_lines mid check line split_line
|
||||
corpo_lines="$(wc -l < "${tmp_dir}/corpo.txt")"
|
||||
mid=$((corpo_lines / 2))
|
||||
split_line="$mid"
|
||||
for offset in 0 1 -1 2 -2 3 -3 5 -5 10 -10; do
|
||||
check=$((mid + offset))
|
||||
if [[ "$check" -gt 0 && "$check" -lt "$corpo_lines" ]]; then
|
||||
line="$(sed -n "${check}p" "${tmp_dir}/corpo.txt")"
|
||||
if [[ -z "$line" || "$line" =~ ^[[:space:]]*$ ]]; then
|
||||
split_line="$check"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
head -n "$split_line" "${tmp_dir}/corpo.txt" > "${tmp_dir}/part_1.txt"
|
||||
tail -n "+$((split_line + 1))" "${tmp_dir}/corpo.txt" > "${tmp_dir}/part_2.txt"
|
||||
if [[ -s "${tmp_dir}/faq.txt" ]]; then
|
||||
cp "${tmp_dir}/faq.txt" "${tmp_dir}/part_3.txt"
|
||||
num_parts=3
|
||||
else
|
||||
num_parts=2
|
||||
fi
|
||||
else
|
||||
cp "${tmp_dir}/corpo.txt" "${tmp_dir}/part_1.txt"
|
||||
if [[ -s "${tmp_dir}/faq.txt" ]]; then
|
||||
cp "${tmp_dir}/faq.txt" "${tmp_dir}/part_2.txt"
|
||||
num_parts=2
|
||||
else
|
||||
num_parts=1
|
||||
fi
|
||||
fi
|
||||
|
||||
log_info "EP${ep_pad}: Split into ${num_parts} parts"
|
||||
|
||||
# Generate TTS for each part (skip if raw already exists)
|
||||
local raw_files=()
|
||||
for ((i=1; i<=num_parts; i++)); do
|
||||
local part_file="${tmp_dir}/part_${i}.txt"
|
||||
local output="${RAW_DIR}/ep_${ep_pad}_p${i}.wav"
|
||||
local part_words
|
||||
part_words="$(wc -w < "$part_file")"
|
||||
|
||||
if [[ -f "$output" ]]; then
|
||||
log_info "EP${ep_pad}: Part ${i}/${num_parts} already exists (${part_words} words), skipping"
|
||||
else
|
||||
log_info "EP${ep_pad}: Generating part ${i}/${num_parts} (${part_words} words)..."
|
||||
python3 "${SCRIPT_DIR}/tts-single-part.py" "$part_file" "$output"
|
||||
log_info "EP${ep_pad}: Part ${i} done"
|
||||
fi
|
||||
raw_files+=("$output")
|
||||
done
|
||||
|
||||
# Post-produce
|
||||
log_info "EP${ep_pad}: Post-producing..."
|
||||
bash "${SCRIPT_DIR}/post-produce.sh" "$ep_num" "${raw_files[@]}"
|
||||
log_info "EP${ep_pad}: COMPLETE"
|
||||
|
||||
rm -rf "$tmp_dir"
|
||||
}
|
||||
|
||||
log_info "=== BATCH TTS: ${TOTAL} episodios ==="
|
||||
|
||||
for EP in "${EPISODES[@]}"; do
|
||||
DONE=$((DONE + 1))
|
||||
log_info "--- [${DONE}/${TOTAL}] EP$(pad_number "$EP") ---"
|
||||
if split_and_generate "$EP"; then
|
||||
log_info "EP$(pad_number "$EP"): OK"
|
||||
else
|
||||
log_error "EP$(pad_number "$EP"): FAILED"
|
||||
FAILED=$((FAILED + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
log_info "=== BATCH COMPLETE: ${TOTAL} processed, $((TOTAL - FAILED)) OK, ${FAILED} failed ==="
|
||||
Executable
+168
@@ -0,0 +1,168 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <episode_number>"
|
||||
echo " Generates TTS audio via Gemini API for the given episode."
|
||||
echo " Requires GEMINI_API_KEY environment variable."
|
||||
echo " Splits text into blocks if needed (max ~1000 words per block)."
|
||||
exit 1
|
||||
}
|
||||
|
||||
[[ $# -lt 1 ]] && usage
|
||||
EP_NUM="$1"
|
||||
EP_PAD="$(pad_number "$EP_NUM")"
|
||||
|
||||
ensure_state_file
|
||||
|
||||
# Check API key
|
||||
if [[ -z "${GEMINI_API_KEY:-}" ]]; then
|
||||
log_error "GEMINI_API_KEY not set. Export it first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Load settings
|
||||
SETTINGS_FILE="${PROJECT_ROOT}/config/audio-settings.json"
|
||||
TTS_MODEL="$(jq -r '.tts_model' "$SETTINGS_FILE")"
|
||||
TTS_VOICE="$(jq -r '.tts_voice' "$SETTINGS_FILE")"
|
||||
MAX_WORDS="$(jq -r '.tts_max_words_per_block' "$SETTINGS_FILE")"
|
||||
SPLIT_MARKER="$(jq -r '.tts_split_marker' "$SETTINGS_FILE")"
|
||||
|
||||
# Find script file
|
||||
SCRIPT_FILE=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*.txt; do
|
||||
if [[ -f "$f" ]]; then
|
||||
SCRIPT_FILE="$f"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ -z "$SCRIPT_FILE" ]]; then
|
||||
log_error "EP${EP_PAD}: No script .txt found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
TOTAL_WORDS="$(wc -w < "$SCRIPT_FILE")"
|
||||
log_info "EP${EP_PAD}: Script has ${TOTAL_WORDS} words, max per block: ${MAX_WORDS}"
|
||||
|
||||
# Output directory
|
||||
RAW_DIR="${PROJECT_ROOT}/Episodios/Audios/raw"
|
||||
mkdir -p "$RAW_DIR"
|
||||
|
||||
# Split if needed
|
||||
TMP_DIR="$(mktemp -d)"
|
||||
trap 'rm -rf "$TMP_DIR"' EXIT
|
||||
|
||||
if [[ "$TOTAL_WORDS" -le "$MAX_WORDS" ]]; then
|
||||
cp "$SCRIPT_FILE" "${TMP_DIR}/part_1.txt"
|
||||
NUM_PARTS=1
|
||||
else
|
||||
# Smart split: first at FAQ marker, then subdivide large parts
|
||||
MARKER_LINE="$(grep -nE "$SPLIT_MARKER" "$SCRIPT_FILE" | head -1 | cut -d: -f1 || echo "")"
|
||||
|
||||
if [[ -n "$MARKER_LINE" && "$MARKER_LINE" -gt 1 ]]; then
|
||||
head -n "$((MARKER_LINE - 1))" "$SCRIPT_FILE" > "${TMP_DIR}/corpo.txt"
|
||||
tail -n "+${MARKER_LINE}" "$SCRIPT_FILE" > "${TMP_DIR}/faq.txt"
|
||||
else
|
||||
cp "$SCRIPT_FILE" "${TMP_DIR}/corpo.txt"
|
||||
: > "${TMP_DIR}/faq.txt"
|
||||
fi
|
||||
|
||||
# Subdivide corpo if too long
|
||||
CORPO_WORDS="$(wc -w < "${TMP_DIR}/corpo.txt")"
|
||||
if [[ "$CORPO_WORDS" -gt "$MAX_WORDS" ]]; then
|
||||
CORPO_LINES="$(wc -l < "${TMP_DIR}/corpo.txt")"
|
||||
MID=$((CORPO_LINES / 2))
|
||||
# Find nearest paragraph break
|
||||
SPLIT_LINE="$MID"
|
||||
for offset in 0 1 -1 2 -2 3 -3 5 -5 10 -10; do
|
||||
CHECK=$((MID + offset))
|
||||
if [[ "$CHECK" -gt 0 && "$CHECK" -lt "$CORPO_LINES" ]]; then
|
||||
LINE="$(sed -n "${CHECK}p" "${TMP_DIR}/corpo.txt")"
|
||||
if [[ -z "$LINE" || "$LINE" =~ ^[[:space:]]*$ ]]; then
|
||||
SPLIT_LINE="$CHECK"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
head -n "$SPLIT_LINE" "${TMP_DIR}/corpo.txt" > "${TMP_DIR}/part_1.txt"
|
||||
tail -n "+$((SPLIT_LINE + 1))" "${TMP_DIR}/corpo.txt" > "${TMP_DIR}/part_2.txt"
|
||||
if [[ -s "${TMP_DIR}/faq.txt" ]]; then
|
||||
cp "${TMP_DIR}/faq.txt" "${TMP_DIR}/part_3.txt"
|
||||
NUM_PARTS=3
|
||||
else
|
||||
NUM_PARTS=2
|
||||
fi
|
||||
else
|
||||
cp "${TMP_DIR}/corpo.txt" "${TMP_DIR}/part_1.txt"
|
||||
if [[ -s "${TMP_DIR}/faq.txt" ]]; then
|
||||
cp "${TMP_DIR}/faq.txt" "${TMP_DIR}/part_2.txt"
|
||||
NUM_PARTS=2
|
||||
else
|
||||
NUM_PARTS=1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
log_info "EP${EP_PAD}: Split into ${NUM_PARTS} parts"
|
||||
|
||||
# TTS style prompt
|
||||
STYLE="Lê este texto em português de Portugal (PT-PT), com um tom enérgico, confiante, educativo, inspirador e profissional. Mantém o ritmo natural e envolvente, como se estivesses a conversar diretamente com o ouvinte, transmitindo proximidade e autoridade. Faz pequenas pausas para dar ênfase às ideias-chave e assegura que cada transição entre temas é fluida. Evita soar robótico ou demasiado formal; o objetivo é informar, motivar e criar ligação com quem está a ouvir."
|
||||
|
||||
# Generate each part
|
||||
AUDIO_FILES=()
|
||||
for ((i=1; i<=NUM_PARTS; i++)); do
|
||||
PART_FILE="${TMP_DIR}/part_${i}.txt"
|
||||
PART_TEXT="$(cat "$PART_FILE")"
|
||||
PART_WORDS="$(wc -w < "$PART_FILE")"
|
||||
OUTPUT="${RAW_DIR}/ep_${EP_PAD}_p${i}.wav"
|
||||
|
||||
log_info "EP${EP_PAD}: Generating part ${i}/${NUM_PARTS} (${PART_WORDS} words)..."
|
||||
|
||||
python3 -c "
|
||||
import wave, sys
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
|
||||
client = genai.Client(api_key='${GEMINI_API_KEY}')
|
||||
with open('${PART_FILE}', 'r') as f:
|
||||
text = f.read()
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='${TTS_MODEL}',
|
||||
contents='''${STYLE}''' + '\n\n' + text,
|
||||
config=types.GenerateContentConfig(
|
||||
response_modalities=['AUDIO'],
|
||||
speech_config=types.SpeechConfig(
|
||||
voice_config=types.VoiceConfig(
|
||||
prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name='${TTS_VOICE}')
|
||||
)
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
data = response.candidates[0].content.parts[0].inline_data.data
|
||||
with wave.open('${OUTPUT}', 'wb') as wf:
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2)
|
||||
wf.setframerate(24000)
|
||||
wf.writeframes(data)
|
||||
|
||||
duration = (len(data) // 2) / 24000
|
||||
print(f'{duration:.0f}')
|
||||
"
|
||||
|
||||
DURATION="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$OUTPUT")"
|
||||
log_info "EP${EP_PAD}: Part ${i} done: ${DURATION%.*}s"
|
||||
AUDIO_FILES+=("$OUTPUT")
|
||||
done
|
||||
|
||||
# Update state
|
||||
set_episode_field "$EP_NUM" "status" "audio_done"
|
||||
|
||||
# Print output files for post-produce.sh
|
||||
log_info "EP${EP_PAD}: All ${NUM_PARTS} parts generated. Run post-production:"
|
||||
echo "bash scripts/post-produce.sh ${EP_NUM} ${AUDIO_FILES[*]}"
|
||||
Executable
+160
@@ -0,0 +1,160 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <episode_number>"
|
||||
echo " Generates podcast script and WP description for the given episode."
|
||||
echo " Reads episode data from config/episode-guide-map.json."
|
||||
echo " Skips script generation if .txt already exists."
|
||||
exit 1
|
||||
}
|
||||
|
||||
[[ $# -lt 1 ]] && usage
|
||||
EP_NUM="$1"
|
||||
EP_PAD="$(pad_number "$EP_NUM")"
|
||||
|
||||
ensure_state_file
|
||||
|
||||
# Load episode data from map
|
||||
MAP_FILE="${PROJECT_ROOT}/config/episode-guide-map.json"
|
||||
EP_DATA="$(jq -r --arg n "$EP_NUM" '.[$n] // empty' "$MAP_FILE")"
|
||||
if [[ -z "$EP_DATA" ]]; then
|
||||
log_error "Episode $EP_NUM not found in episode-guide-map.json"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
GUIDE_TITLE="$(echo "$EP_DATA" | jq -r '.guide_title')"
|
||||
PODCAST_TITLE="$(echo "$EP_DATA" | jq -r '.podcast_title')"
|
||||
GUIDE_URL="$(echo "$EP_DATA" | jq -r '.guide_url')"
|
||||
SAFE_TITLE="$(echo "$PODCAST_TITLE" | sed 's/[^a-zA-Z0-9]/_/g' | sed 's/__*/_/g' | sed 's/_$//')"
|
||||
|
||||
SCRIPT_FILE="${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_${SAFE_TITLE}.txt"
|
||||
WP_FILE="${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_${SAFE_TITLE}_wp.json"
|
||||
|
||||
# Check if script already exists (reuse existing .txt files)
|
||||
EXISTING_SCRIPT=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*.txt; do
|
||||
if [[ -f "$f" ]]; then
|
||||
EXISTING_SCRIPT="$f"
|
||||
SCRIPT_FILE="$f"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
SERVICES="$(cat "${PROJECT_ROOT}/config/service-links.json")"
|
||||
SCRIPT_PROMPT="$(cat "${PROJECT_ROOT}/prompts/generate-script.md")"
|
||||
WP_PROMPT="$(cat "${PROJECT_ROOT}/prompts/generate-wp-description.md")"
|
||||
|
||||
# Step 1: Generate script (or skip if exists)
|
||||
if [[ -n "$EXISTING_SCRIPT" ]]; then
|
||||
log_info "EP${EP_PAD}: Script already exists at ${EXISTING_SCRIPT}, skipping generation"
|
||||
else
|
||||
log_info "EP${EP_PAD}: Generating podcast script for '${PODCAST_TITLE}'"
|
||||
|
||||
FULL_PROMPT="${SCRIPT_PROMPT}
|
||||
|
||||
---
|
||||
|
||||
## Dados do episodio
|
||||
- Numero: ${EP_NUM}
|
||||
- Titulo: ${PODCAST_TITLE}
|
||||
- Guia de referencia: ${GUIDE_TITLE}
|
||||
- URL do guia: ${GUIDE_URL}
|
||||
|
||||
## Links de servicos Descomplicar (usar quando contextual)
|
||||
${SERVICES}
|
||||
|
||||
## Instrucao
|
||||
Gera o guiao completo seguindo a estrutura obrigatoria acima. Output apenas o texto limpo."
|
||||
|
||||
echo "$FULL_PROMPT" | claude --print > "$SCRIPT_FILE"
|
||||
log_info "EP${EP_PAD}: Script saved to ${SCRIPT_FILE}"
|
||||
fi
|
||||
|
||||
# Step 2: Generate WP description
|
||||
log_info "EP${EP_PAD}: Generating WordPress description"
|
||||
|
||||
SCRIPT_CONTENT="$(cat "$SCRIPT_FILE")"
|
||||
WP_FULL_PROMPT="${WP_PROMPT}
|
||||
|
||||
---
|
||||
|
||||
## Dados do episodio
|
||||
- Numero: ${EP_NUM}
|
||||
- Titulo: ${PODCAST_TITLE}
|
||||
- URL do guia relacionado: ${GUIDE_URL}
|
||||
|
||||
## Links de servicos Descomplicar (USAR APENAS ESTES — nunca inventar)
|
||||
${SERVICES}
|
||||
|
||||
## Guiao do episodio (base para a descricao)
|
||||
${SCRIPT_CONTENT}
|
||||
|
||||
## Instrucao
|
||||
Gera o JSON com a descricao WordPress completa. Output APENAS JSON valido."
|
||||
|
||||
echo "$WP_FULL_PROMPT" | claude --print > "$WP_FILE"
|
||||
|
||||
# Validate and auto-repair JSON output
|
||||
if ! python3 -c "import json; json.load(open('$WP_FILE'))" 2>/dev/null; then
|
||||
log_warn "EP${EP_PAD}: WP JSON inválido — a tentar reparação automática..."
|
||||
python3 - "$WP_FILE" << 'PYFIX'
|
||||
import sys, re, json
|
||||
|
||||
filepath = sys.argv[1]
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
raw = f.read()
|
||||
|
||||
# Encontrar e corrigir aspas não escapadas no content_html
|
||||
marker = '"content_html": "'
|
||||
start = raw.find(marker)
|
||||
if start == -1:
|
||||
print("SKIP: content_html não encontrado")
|
||||
sys.exit(1)
|
||||
|
||||
content_start = start + len(marker)
|
||||
end_pattern = re.search(r'",\s*\n\s*"hashtags"', raw[content_start:])
|
||||
if not end_pattern:
|
||||
end_pattern = re.search(r'",\s*\n\s*"wp_tags"', raw[content_start:])
|
||||
|
||||
if not end_pattern:
|
||||
print("SKIP: fim do content_html não encontrado")
|
||||
sys.exit(1)
|
||||
|
||||
content_end = content_start + end_pattern.start()
|
||||
raw_content = raw[content_start:content_end]
|
||||
fixed_content = re.sub(r'(?<!\\)"', '\\"', raw_content)
|
||||
fixed_raw = raw[:content_start] + fixed_content + raw[content_end:]
|
||||
|
||||
try:
|
||||
json.loads(fixed_raw)
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
f.write(fixed_raw)
|
||||
print("JSON reparado com sucesso")
|
||||
except Exception as e:
|
||||
print(f"Reparação falhou: {e}")
|
||||
sys.exit(1)
|
||||
PYFIX
|
||||
|
||||
if python3 -c "import json; json.load(open('$WP_FILE'))" 2>/dev/null; then
|
||||
log_info "EP${EP_PAD}: JSON reparado automaticamente"
|
||||
else
|
||||
log_error "EP${EP_PAD}: JSON inválido após reparação — verificação manual necessária"
|
||||
fi
|
||||
else
|
||||
log_info "EP${EP_PAD}: WP description saved to ${WP_FILE}"
|
||||
fi
|
||||
|
||||
# Update state
|
||||
STATUS="$(get_episode_status "$EP_NUM")"
|
||||
if [[ "$STATUS" == "not_found" ]]; then
|
||||
add_episode "$EP_NUM" "$PODCAST_TITLE" "$GUIDE_URL"
|
||||
fi
|
||||
set_episode_field "$EP_NUM" "status" "script_done"
|
||||
set_episode_field "$EP_NUM" "script_path" "$(basename "$SCRIPT_FILE")"
|
||||
set_episode_field "$EP_NUM" "wp_data_path" "$(basename "$WP_FILE")"
|
||||
|
||||
log_info "EP${EP_PAD}: Content generation complete"
|
||||
Executable
+48
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 [start_episode] [count]"
|
||||
echo " Generates canva-bulk.csv for cover image generation."
|
||||
echo " Default: next 7 episodes from pipeline state."
|
||||
exit 1
|
||||
}
|
||||
|
||||
ensure_state_file
|
||||
|
||||
START="${1:-$(jq -r '.next_episode' "$STATE_FILE")}"
|
||||
COUNT="${2:-7}"
|
||||
END=$((START + COUNT - 1))
|
||||
|
||||
MAP_FILE="${PROJECT_ROOT}/config/episode-guide-map.json"
|
||||
CSV_FILE="${PROJECT_ROOT}/canva-bulk.csv"
|
||||
|
||||
echo "number,title,keyword" > "$CSV_FILE"
|
||||
|
||||
for ((ep=START; ep<=END; ep++)); do
|
||||
EP_DATA="$(jq -r --arg n "$ep" '.[$n] // empty' "$MAP_FILE")"
|
||||
if [[ -z "$EP_DATA" ]]; then
|
||||
log_warn "Episode $ep not found in map, skipping"
|
||||
continue
|
||||
fi
|
||||
TITLE="$(echo "$EP_DATA" | jq -r '.podcast_title')"
|
||||
|
||||
# Try to get keyword from WP JSON if it exists
|
||||
EP_PAD="$(pad_number "$ep")"
|
||||
KEYWORD=""
|
||||
for wp_file in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*_wp.json; do
|
||||
if [[ -f "$wp_file" ]]; then
|
||||
KEYWORD="$(jq -r '.keyword // ""' "$wp_file" 2>/dev/null || echo "")"
|
||||
break
|
||||
fi
|
||||
done
|
||||
[[ -z "$KEYWORD" ]] && KEYWORD="$TITLE"
|
||||
|
||||
echo "${ep},\"${TITLE}\",\"${KEYWORD}\"" >> "$CSV_FILE"
|
||||
done
|
||||
|
||||
log_info "Generated Canva CSV: ${CSV_FILE} (episodes ${START}-${END})"
|
||||
echo "CSV saved to: ${CSV_FILE}"
|
||||
Executable
+71
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
PROJECT_ROOT="/media/ealmeida/Dados/Hub/05-Projectos/Podcast-Descomplicar-Digital"
|
||||
STATE_FILE="${PROJECT_ROOT}/pipeline-state.json"
|
||||
LOG_DIR="${PROJECT_ROOT}/logs"
|
||||
|
||||
# Auto-load .env if present
|
||||
if [[ -f "${PROJECT_ROOT}/.env" ]]; then
|
||||
set -a
|
||||
source "${PROJECT_ROOT}/.env"
|
||||
set +a
|
||||
fi
|
||||
|
||||
log() {
|
||||
local level="$1"; shift
|
||||
local msg="$*"
|
||||
local ts
|
||||
ts="$(date '+%Y-%m-%d %H:%M:%S')"
|
||||
mkdir -p "${LOG_DIR}"
|
||||
echo "[${ts}] [${level}] ${msg}" | tee -a "${LOG_DIR}/pipeline-$(date '+%Y-%m-%d').log"
|
||||
}
|
||||
|
||||
log_info() { log "INFO" "$@"; }
|
||||
log_warn() { log "WARN" "$@"; }
|
||||
log_error() { log "ERROR" "$@"; }
|
||||
|
||||
ensure_state_file() {
|
||||
if [[ ! -f "${STATE_FILE}" ]]; then
|
||||
echo '{"last_updated":"","next_episode":20,"next_publish_date":"","publish_time":"07:00","publish_days":["mon","tue","wed","thu","fri"],"episodes":[]}' | jq '.' > "${STATE_FILE}"
|
||||
log_info "Created new pipeline-state.json"
|
||||
fi
|
||||
}
|
||||
|
||||
get_episode_status() {
|
||||
local ep_num="$1"
|
||||
jq -r --argjson n "$ep_num" '.episodes[] | select(.number == $n) | .status // "not_found"' "${STATE_FILE}" 2>/dev/null || echo "not_found"
|
||||
}
|
||||
|
||||
set_episode_field() {
|
||||
local ep_num="$1" field="$2" value="$3"
|
||||
local tmp
|
||||
tmp="$(mktemp)"
|
||||
jq --argjson n "$ep_num" --arg f "$field" --arg v "$value" \
|
||||
'(.episodes[] | select(.number == $n))[$f] = $v | .last_updated = (now | todate)' \
|
||||
"${STATE_FILE}" > "$tmp" && mv "$tmp" "${STATE_FILE}"
|
||||
}
|
||||
|
||||
add_episode() {
|
||||
local ep_num="$1" title="$2" guide_url="$3"
|
||||
local tmp
|
||||
tmp="$(mktemp)"
|
||||
jq --argjson n "$ep_num" --arg t "$title" --arg g "$guide_url" \
|
||||
'.episodes += [{"number":$n,"title":$t,"source_guide_url":$g,"status":"pending","script_path":null,"wp_data_path":null,"audio_raw_path":null,"audio_final_path":null,"cover_path":null,"wp_post_id":null,"scheduled_date":null}] | .last_updated = (now | todate)' \
|
||||
"${STATE_FILE}" > "$tmp" && mv "$tmp" "${STATE_FILE}"
|
||||
}
|
||||
|
||||
next_weekday() {
|
||||
local base_date="$1"
|
||||
local d
|
||||
d="$(date -d "${base_date}" '+%u')"
|
||||
if [[ "$d" -ge 5 ]]; then
|
||||
date -d "${base_date} + $((8 - d)) days" '+%Y-%m-%d'
|
||||
else
|
||||
date -d "${base_date} + 1 day" '+%Y-%m-%d'
|
||||
fi
|
||||
}
|
||||
|
||||
pad_number() {
|
||||
printf '%03d' "$1"
|
||||
}
|
||||
Executable
+154
@@ -0,0 +1,154 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <episode_number> <raw_audio_path> [raw_audio_path_2 ...]"
|
||||
echo " Applies intro, outro, and loudness normalization to raw TTS audio."
|
||||
echo " Accepts multiple audio parts that will be concatenated in order."
|
||||
exit 1
|
||||
}
|
||||
|
||||
[[ $# -lt 2 ]] && usage
|
||||
EP_NUM="$1"; shift
|
||||
RAW_PARTS=("$@")
|
||||
EP_PAD="$(pad_number "$EP_NUM")"
|
||||
|
||||
ensure_state_file
|
||||
|
||||
for f in "${RAW_PARTS[@]}"; do
|
||||
if [[ ! -f "$f" ]]; then
|
||||
log_error "EP${EP_PAD}: Raw audio file not found: ${f}"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Load audio settings
|
||||
SETTINGS_FILE="${PROJECT_ROOT}/config/audio-settings.json"
|
||||
INTRO_FILE="${PROJECT_ROOT}/$(jq -r '.intro_file' "$SETTINGS_FILE")"
|
||||
OUTRO_FILE="${PROJECT_ROOT}/$(jq -r '.outro_file' "$SETTINGS_FILE")"
|
||||
FADE_IN="$(jq -r '.fade_in_duration' "$SETTINGS_FILE")"
|
||||
BG_DUR="$(jq -r '.background_duration' "$SETTINGS_FILE")"
|
||||
BG_VOL="$(jq -r '.background_volume_db' "$SETTINGS_FILE")"
|
||||
FADE_OUT="$(jq -r '.fade_out_duration' "$SETTINGS_FILE")"
|
||||
LUFS="$(jq -r '.loudness_target_lufs' "$SETTINGS_FILE")"
|
||||
BITRATE="$(jq -r '.export_bitrate' "$SETTINGS_FILE")"
|
||||
SAMPLE_RATE="$(jq -r '.export_sample_rate' "$SETTINGS_FILE")"
|
||||
MIN_DUR="$(jq -r '.min_duration_minutes' "$SETTINGS_FILE")"
|
||||
MAX_DUR="$(jq -r '.max_duration_minutes' "$SETTINGS_FILE")"
|
||||
|
||||
# Verify input files
|
||||
for f in "$INTRO_FILE" "$OUTRO_FILE"; do
|
||||
if [[ ! -f "$f" ]]; then
|
||||
log_error "EP${EP_PAD}: Required audio file not found: ${f}"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Create output directory
|
||||
OUTPUT_DIR="${PROJECT_ROOT}/Episodios/Audios/final"
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
# Get podcast title for filename
|
||||
MAP_FILE="${PROJECT_ROOT}/config/episode-guide-map.json"
|
||||
PODCAST_TITLE="$(jq -r --arg n "$EP_NUM" '.[$n].podcast_title // "episodio"' "$MAP_FILE")"
|
||||
SAFE_TITLE="$(echo "$PODCAST_TITLE" | sed 's/[^a-zA-Z0-9]/-/g' | sed 's/--*/-/g' | sed 's/-$//' | tr '[:upper:]' '[:lower:]')"
|
||||
OUTPUT_FILE="${OUTPUT_DIR}/ep_${EP_PAD}_${SAFE_TITLE}.mp3"
|
||||
|
||||
# Temp directory
|
||||
TMP_DIR="$(mktemp -d)"
|
||||
trap 'rm -rf "$TMP_DIR"' EXIT
|
||||
|
||||
log_info "EP${EP_PAD}: Starting post-production"
|
||||
|
||||
# Concatenate raw audio parts if multiple
|
||||
if [[ "${#RAW_PARTS[@]}" -gt 1 ]]; then
|
||||
log_info "EP${EP_PAD}: Concatenating ${#RAW_PARTS[@]} audio parts"
|
||||
CONCAT_LIST="${TMP_DIR}/concat_list.txt"
|
||||
for part in "${RAW_PARTS[@]}"; do
|
||||
# Normalize each part to same format first
|
||||
PART_BASE="$(basename "$part" | sed 's/\.[^.]*$//')"
|
||||
ffmpeg -y -v quiet -i "$part" -af "highshelf=f=4000:g=-6" -ar "$SAMPLE_RATE" -ac 2 "${TMP_DIR}/${PART_BASE}_norm.wav"
|
||||
echo "file '${TMP_DIR}/${PART_BASE}_norm.wav'" >> "$CONCAT_LIST"
|
||||
done
|
||||
ffmpeg -y -v quiet -f concat -safe 0 -i "$CONCAT_LIST" -c copy "${TMP_DIR}/raw_combined.wav"
|
||||
RAW_COMBINED="${TMP_DIR}/raw_combined.wav"
|
||||
else
|
||||
RAW_COMBINED="${RAW_PARTS[0]}"
|
||||
fi
|
||||
|
||||
# Get duration of raw audio
|
||||
RAW_DUR="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$RAW_COMBINED")"
|
||||
RAW_DUR_INT="${RAW_DUR%.*}"
|
||||
log_info "EP${EP_PAD}: Raw audio duration: ${RAW_DUR_INT}s"
|
||||
|
||||
# Create intro background: The Inspiring at low volume for first 30s
|
||||
ffmpeg -y -v quiet \
|
||||
-i "$INTRO_FILE" \
|
||||
-af "afade=t=in:st=0:d=${FADE_IN},volume=${BG_VOL}dB,afade=t=out:st=$((BG_DUR - 3)):d=3" \
|
||||
-t "$BG_DUR" \
|
||||
-ar "$SAMPLE_RATE" -ac 2 \
|
||||
"${TMP_DIR}/intro_bg.wav"
|
||||
|
||||
# Prepare raw audio (ensure stereo, correct sample rate, de-ess)
|
||||
ffmpeg -y -v quiet \
|
||||
-i "$RAW_COMBINED" \
|
||||
-af "highshelf=f=4000:g=-6" \
|
||||
-ar "$SAMPLE_RATE" -ac 2 \
|
||||
"${TMP_DIR}/tts_stereo.wav"
|
||||
|
||||
# Mix intro background with beginning of TTS
|
||||
ffmpeg -y -v quiet \
|
||||
-i "${TMP_DIR}/tts_stereo.wav" \
|
||||
-i "${TMP_DIR}/intro_bg.wav" \
|
||||
-filter_complex "[1]apad=whole_dur=${RAW_DUR_INT}[bg];[0][bg]amix=inputs=2:duration=first:dropout_transition=3[mixed]" \
|
||||
-map "[mixed]" \
|
||||
-ar "$SAMPLE_RATE" -ac 2 \
|
||||
"${TMP_DIR}/tts_with_intro.wav"
|
||||
|
||||
# Get outro duration
|
||||
OUTRO_DUR="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$OUTRO_FILE")"
|
||||
OUTRO_DUR_INT="${OUTRO_DUR%.*}"
|
||||
|
||||
# Create ending: outro audio + The Inspiring fade-out underneath
|
||||
ffmpeg -y -v quiet \
|
||||
-i "$OUTRO_FILE" \
|
||||
-i "$INTRO_FILE" \
|
||||
-filter_complex "[1]volume=${BG_VOL}dB,afade=t=out:st=$((OUTRO_DUR_INT - FADE_OUT)):d=${FADE_OUT},atrim=0:${OUTRO_DUR_INT}[music];[0][music]amix=inputs=2:duration=first[out]" \
|
||||
-map "[out]" \
|
||||
-ar "$SAMPLE_RATE" -ac 2 \
|
||||
"${TMP_DIR}/outro_mixed.wav"
|
||||
|
||||
# Concatenate TTS (with intro) + outro
|
||||
ffmpeg -y -v quiet \
|
||||
-i "${TMP_DIR}/tts_with_intro.wav" \
|
||||
-i "${TMP_DIR}/outro_mixed.wav" \
|
||||
-filter_complex "[0][1]concat=n=2:v=0:a=1[out]" \
|
||||
-map "[out]" \
|
||||
-ar "$SAMPLE_RATE" -ac 2 \
|
||||
"${TMP_DIR}/full_episode.wav"
|
||||
|
||||
# Normalize loudness and export as MP3
|
||||
ffmpeg -y -v quiet \
|
||||
-i "${TMP_DIR}/full_episode.wav" \
|
||||
-af "loudnorm=I=${LUFS}:TP=-1.5:LRA=11" \
|
||||
-ar "$SAMPLE_RATE" -ac 2 \
|
||||
-b:a "$BITRATE" \
|
||||
"$OUTPUT_FILE"
|
||||
|
||||
# Validate duration
|
||||
FINAL_DUR="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$OUTPUT_FILE")"
|
||||
FINAL_MIN="$(echo "${FINAL_DUR%.*} / 60" | bc)"
|
||||
log_info "EP${EP_PAD}: Final duration: ${FINAL_MIN} minutes (${FINAL_DUR%.*}s)"
|
||||
|
||||
if [[ "$FINAL_MIN" -lt "$MIN_DUR" ]] || [[ "$FINAL_MIN" -gt "$MAX_DUR" ]]; then
|
||||
log_warn "EP${EP_PAD}: Duration ${FINAL_MIN}min is outside expected range (${MIN_DUR}-${MAX_DUR}min)"
|
||||
fi
|
||||
|
||||
# Update state
|
||||
set_episode_field "$EP_NUM" "status" "produced"
|
||||
set_episode_field "$EP_NUM" "audio_final_path" "$(basename "$OUTPUT_FILE")"
|
||||
|
||||
log_info "EP${EP_PAD}: Post-production complete -> ${OUTPUT_FILE}"
|
||||
Executable
+140
@@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <episode_number> <scheduled_date>"
|
||||
echo " Publishes episode to WordPress via wp-cli over SSH."
|
||||
echo " scheduled_date format: YYYY-MM-DD"
|
||||
echo " Requires: final audio, WP JSON, cover image (optional)."
|
||||
exit 1
|
||||
}
|
||||
|
||||
[[ $# -lt 2 ]] && usage
|
||||
EP_NUM="$1"
|
||||
SCHED_DATE="$2"
|
||||
EP_PAD="$(pad_number "$EP_NUM")"
|
||||
PUBLISH_TIME="$(jq -r '.publish_time' "$STATE_FILE")"
|
||||
|
||||
ensure_state_file
|
||||
|
||||
# Find required files
|
||||
AUDIO_FILE=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Audios/final/ep_${EP_PAD}_"*.mp3; do
|
||||
[[ -f "$f" ]] && AUDIO_FILE="$f" && break
|
||||
done
|
||||
|
||||
WP_JSON=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*_wp.json; do
|
||||
[[ -f "$f" ]] && WP_JSON="$f" && break
|
||||
done
|
||||
|
||||
COVER_FILE=""
|
||||
while IFS= read -r -d '' f; do
|
||||
COVER_FILE="$f"
|
||||
break
|
||||
done < <(find "${PROJECT_ROOT}/Episodios/Capas_PodCast" -name "ep_${EP_PAD}_*" \( -name "*.jpg" -o -name "*.png" \) -print0 2>/dev/null)
|
||||
|
||||
# Validate required files
|
||||
if [[ -z "$AUDIO_FILE" || ! -f "$AUDIO_FILE" ]]; then
|
||||
log_error "EP${EP_PAD}: Audio file not found in Audios/final/"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "$WP_JSON" || ! -f "$WP_JSON" ]]; then
|
||||
log_error "EP${EP_PAD}: WP JSON file not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "$COVER_FILE" ]]; then
|
||||
log_warn "EP${EP_PAD}: Cover image not found, publishing without featured image"
|
||||
fi
|
||||
|
||||
# Read WP data
|
||||
TITLE="$(jq -r '.title' "$WP_JSON")"
|
||||
CONTENT="$(jq -r '.content_html' "$WP_JSON")"
|
||||
META_DESC="$(jq -r '.meta_description' "$WP_JSON")"
|
||||
TAGS="$(jq -r '.wp_tags | join(",")' "$WP_JSON")"
|
||||
|
||||
# Get audio metadata
|
||||
DURATION="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$AUDIO_FILE")"
|
||||
DUR_MIN=$((${DURATION%.*} / 60))
|
||||
DUR_SEC=$((${DURATION%.*} % 60))
|
||||
DURATION_FMT="$(printf '%d:%02d' "$DUR_MIN" "$DUR_SEC")"
|
||||
FILESIZE="$(du -h "$AUDIO_FILE" | cut -f1)"
|
||||
FILESIZE_RAW="$(stat -c%s "$AUDIO_FILE")"
|
||||
|
||||
log_info "EP${EP_PAD}: Publishing '${TITLE}' scheduled for ${SCHED_DATE} ${PUBLISH_TIME}"
|
||||
|
||||
# Generate wp-cli commands for SSH execution
|
||||
CMDS_FILE="${PROJECT_ROOT}/logs/publish_${EP_PAD}_commands.sh"
|
||||
mkdir -p "${PROJECT_ROOT}/logs"
|
||||
|
||||
AUDIO_BASENAME="$(basename "$AUDIO_FILE")"
|
||||
YEAR="$(date -d "$SCHED_DATE" '+%Y')"
|
||||
MONTH="$(date -d "$SCHED_DATE" '+%m')"
|
||||
UPLOAD_PATH="wp-content/uploads/podcast/${YEAR}/${MONTH}"
|
||||
|
||||
# Escape single quotes in content
|
||||
ESCAPED_CONTENT="$(echo "$CONTENT" | sed "s/'/'\\\\''/g")"
|
||||
ESCAPED_META="$(echo "$META_DESC" | sed "s/'/'\\\\''/g")"
|
||||
ESCAPED_TITLE="$(echo "$TITLE" | sed "s/'/'\\\\''/g")"
|
||||
|
||||
cat > "$CMDS_FILE" << CMDEOF
|
||||
#!/usr/bin/env bash
|
||||
# Auto-generated publish commands for EP${EP_PAD}
|
||||
# Run via SSH MCP on server (user: ealmeida, path: /home/ealmeida/public_html)
|
||||
set -euo pipefail
|
||||
|
||||
WP_PATH="/home/ealmeida/public_html"
|
||||
UPLOAD_DIR="\${WP_PATH}/${UPLOAD_PATH}"
|
||||
|
||||
# 1. Create upload directory
|
||||
mkdir -p "\${UPLOAD_DIR}"
|
||||
|
||||
# 2. Audio file must be uploaded to server first (via sftp MCP)
|
||||
# Source: ${AUDIO_FILE}
|
||||
# Target: \${UPLOAD_DIR}/${AUDIO_BASENAME}
|
||||
|
||||
# 3. Create podcast post
|
||||
POST_ID=\$(wp post create \\
|
||||
--post_type=podcast \\
|
||||
--post_title='${ESCAPED_TITLE}' \\
|
||||
--post_status=future \\
|
||||
--post_date='${SCHED_DATE} ${PUBLISH_TIME}:00' \\
|
||||
--tags_input='${TAGS}' \\
|
||||
--porcelain \\
|
||||
--allow-root \\
|
||||
--path="\${WP_PATH}")
|
||||
|
||||
echo "Created post: \${POST_ID}"
|
||||
|
||||
# 4. Add content (separate to avoid shell escaping issues)
|
||||
wp post update \${POST_ID} --post_content='${ESCAPED_CONTENT}' --allow-root --path="\${WP_PATH}"
|
||||
|
||||
# 5. Set SSP meta fields
|
||||
wp post meta update \${POST_ID} episode_type audio --allow-root --path="\${WP_PATH}"
|
||||
wp post meta update \${POST_ID} audio_file "https://descomplicar.pt/${UPLOAD_PATH}/${AUDIO_BASENAME}" --allow-root --path="\${WP_PATH}"
|
||||
wp post meta update \${POST_ID} duration "${DURATION_FMT}" --allow-root --path="\${WP_PATH}"
|
||||
wp post meta update \${POST_ID} filesize "${FILESIZE}" --allow-root --path="\${WP_PATH}"
|
||||
wp post meta update \${POST_ID} filesize_raw "${FILESIZE_RAW}" --allow-root --path="\${WP_PATH}"
|
||||
|
||||
# 6. Set Rank Math meta description
|
||||
wp post meta update \${POST_ID} rank_math_description '${ESCAPED_META}' --allow-root --path="\${WP_PATH}"
|
||||
|
||||
# 7. Fix permissions
|
||||
chown -R ealmeida:ealmeida "\${UPLOAD_DIR}"
|
||||
|
||||
echo "EP${EP_PAD} published as post \${POST_ID}, scheduled for ${SCHED_DATE} ${PUBLISH_TIME}"
|
||||
CMDEOF
|
||||
|
||||
chmod +x "$CMDS_FILE"
|
||||
|
||||
# Update state
|
||||
set_episode_field "$EP_NUM" "status" "published"
|
||||
set_episode_field "$EP_NUM" "scheduled_date" "$SCHED_DATE"
|
||||
|
||||
log_info "EP${EP_PAD}: Publish commands saved to ${CMDS_FILE}"
|
||||
log_info "EP${EP_PAD}: Upload audio via SFTP, then run commands via SSH"
|
||||
Executable
+292
@@ -0,0 +1,292 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# schedule-episode.sh — Agenda episódio completo no WordPress
|
||||
# Envia MP3 + capa via SCP, importa media, cria post com todos os metas
|
||||
#
|
||||
# Uso: ./scripts/schedule-episode.sh <ep_num> <YYYY-MM-DD> [--dry-run]
|
||||
#
|
||||
# Requisitos locais:
|
||||
# - MP3 em Episodios/Audios/final/ep_NNN_*.mp3
|
||||
# - Capa em banco-media: capas-geradas/podcast/podcast-epNNN-*.png
|
||||
# - ffprobe (para duração)
|
||||
#
|
||||
# Requisitos servidor:
|
||||
# - wp-cli com --allow-root
|
||||
# - SSH porta 9443, chave ~/.ssh/id_ed25519
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
# SSH config
|
||||
SSH_KEY="${HOME}/.ssh/id_ed25519"
|
||||
SSH_PORT=9443
|
||||
SSH_HOST="server.descomplicar.pt"
|
||||
SSH_USER="root"
|
||||
SSH_OPTS="-o IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
|
||||
WP_PATH="/home/ealmeida/public_html"
|
||||
WP_OWNER="ealmeida:ealmeida"
|
||||
SITE_URL="https://descomplicar.pt"
|
||||
SERIES_SLUG="podcast-descomplicar-digital"
|
||||
|
||||
# Banco de media (capas)
|
||||
CAPAS_DIR="/media/ealmeida/Dados/Hub/06-Operacoes/Conteúdos/banco-media/capas-geradas/podcast"
|
||||
|
||||
DRY_RUN=false
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <ep_num> <YYYY-MM-DD> [--dry-run]"
|
||||
echo " Agenda episodio completo no WordPress (audio + capa + metas + SEO)"
|
||||
exit 1
|
||||
}
|
||||
|
||||
ssh_cmd() {
|
||||
SSH_AUTH_SOCK= ssh -p "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "${SSH_USER}@${SSH_HOST}" "$@"
|
||||
}
|
||||
|
||||
scp_file() {
|
||||
SSH_AUTH_SOCK= scp -P "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "$1" "${SSH_USER}@${SSH_HOST}:$2"
|
||||
}
|
||||
|
||||
[[ $# -lt 2 ]] && usage
|
||||
EP_NUM="$1"
|
||||
SCHED_DATE="$2"
|
||||
[[ "${3:-}" == "--dry-run" ]] && DRY_RUN=true
|
||||
EP_PAD="$(pad_number "$EP_NUM")"
|
||||
|
||||
# === 1. Encontrar ficheiros locais ===
|
||||
|
||||
AUDIO_FILE=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Audios/final/ep_${EP_PAD}_"*.mp3; do
|
||||
[[ -f "$f" ]] && AUDIO_FILE="$f" && break
|
||||
done
|
||||
|
||||
COVER_FILE=""
|
||||
for f in "${CAPAS_DIR}/podcast-ep${EP_PAD}-"*.png; do
|
||||
[[ -f "$f" ]] && COVER_FILE="$f" && break
|
||||
done
|
||||
|
||||
GUIDE_FILE=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*.txt; do
|
||||
[[ -f "$f" ]] && GUIDE_FILE="$f" && break
|
||||
done
|
||||
|
||||
# Validar
|
||||
[[ -z "$AUDIO_FILE" ]] && log_error "EP${EP_PAD}: MP3 nao encontrado em Episodios/Audios/final/" && exit 1
|
||||
[[ -z "$COVER_FILE" ]] && log_error "EP${EP_PAD}: Capa PNG nao encontrada em ${CAPAS_DIR}/" && exit 1
|
||||
[[ -z "$GUIDE_FILE" ]] && log_warn "EP${EP_PAD}: Guiao .txt nao encontrado (conteudo WP ficara vazio)"
|
||||
|
||||
# === 2. Extrair metadata do audio ===
|
||||
|
||||
DURATION_RAW=$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$AUDIO_FILE")
|
||||
DUR_SEC_TOTAL=${DURATION_RAW%.*}
|
||||
DUR_MIN=$((DUR_SEC_TOTAL / 60))
|
||||
DUR_SEC=$((DUR_SEC_TOTAL % 60))
|
||||
DURATION_FMT="$(printf '%d:%02d' "$DUR_MIN" "$DUR_SEC")"
|
||||
FILESIZE_H="$(du -h "$AUDIO_FILE" | cut -f1)"
|
||||
FILESIZE_RAW="$(stat -c%s "$AUDIO_FILE")"
|
||||
AUDIO_BASENAME="$(basename "$AUDIO_FILE")"
|
||||
|
||||
# Extrair titulo do nome do ficheiro do guiao
|
||||
TITLE=""
|
||||
if [[ -n "$GUIDE_FILE" ]]; then
|
||||
TITLE="$(basename "$GUIDE_FILE" .txt | sed 's/^Episodio_[0-9]*_//' | tr '_' ' ')"
|
||||
fi
|
||||
# Fallback do audio
|
||||
[[ -z "$TITLE" ]] && TITLE="$(basename "$AUDIO_FILE" .mp3 | sed 's/^ep_[0-9]*_//' | tr '-' ' ')"
|
||||
|
||||
# Upload paths
|
||||
YEAR="$(date -d "$SCHED_DATE" '+%Y')"
|
||||
MONTH="$(date -d "$SCHED_DATE" '+%m')"
|
||||
AUDIO_REMOTE_DIR="${WP_PATH}/wp-content/uploads/podcast/${YEAR}/${MONTH}"
|
||||
AUDIO_URL="${SITE_URL}/wp-content/uploads/podcast/${YEAR}/${MONTH}/${AUDIO_BASENAME}"
|
||||
|
||||
log_info "EP${EP_PAD}: '${TITLE}' | ${DURATION_FMT} | ${FILESIZE_H} | ${SCHED_DATE} 07:00"
|
||||
|
||||
if $DRY_RUN; then
|
||||
log_info "[DRY-RUN] Audio: ${AUDIO_FILE}"
|
||||
log_info "[DRY-RUN] Capa: ${COVER_FILE}"
|
||||
log_info "[DRY-RUN] URL audio: ${AUDIO_URL}"
|
||||
log_info "[DRY-RUN] Titulo: ${TITLE}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# === 3. Enviar MP3 para o servidor ===
|
||||
|
||||
log_info "EP${EP_PAD}: Enviar MP3..."
|
||||
ssh_cmd "mkdir -p '${AUDIO_REMOTE_DIR}'"
|
||||
scp_file "$AUDIO_FILE" "${AUDIO_REMOTE_DIR}/"
|
||||
ssh_cmd "chown ${WP_OWNER} '${AUDIO_REMOTE_DIR}/${AUDIO_BASENAME}'"
|
||||
log_info "EP${EP_PAD}: MP3 enviado -> ${AUDIO_REMOTE_DIR}/${AUDIO_BASENAME}"
|
||||
|
||||
# === 4. Enviar capa e importar no WP ===
|
||||
|
||||
log_info "EP${EP_PAD}: Enviar capa..."
|
||||
COVER_BASENAME="$(basename "$COVER_FILE")"
|
||||
ssh_cmd "mkdir -p /tmp/podcast-upload"
|
||||
scp_file "$COVER_FILE" "/tmp/podcast-upload/${COVER_BASENAME}"
|
||||
ssh_cmd "chown ${WP_OWNER} '/tmp/podcast-upload/${COVER_BASENAME}'"
|
||||
|
||||
ATTACH_ID=$(ssh_cmd "cd '${WP_PATH}' && wp media import '/tmp/podcast-upload/${COVER_BASENAME}' --title='${COVER_BASENAME%.png}' --porcelain --allow-root 2>/dev/null")
|
||||
ssh_cmd "rm -f '/tmp/podcast-upload/${COVER_BASENAME}'"
|
||||
|
||||
if [[ -z "$ATTACH_ID" ]]; then
|
||||
log_error "EP${EP_PAD}: Falha ao importar capa no WP"
|
||||
exit 1
|
||||
fi
|
||||
log_info "EP${EP_PAD}: Capa importada (attach_id: ${ATTACH_ID})"
|
||||
|
||||
# === 5. Criar post podcast agendado ===
|
||||
|
||||
log_info "EP${EP_PAD}: Criar post..."
|
||||
POST_ID=$(ssh_cmd "cd '${WP_PATH}' && wp post create \
|
||||
--post_type=podcast \
|
||||
--post_title='$(echo "$TITLE" | sed "s/'/'\\\\''/g")' \
|
||||
--post_status=future \
|
||||
--post_date='${SCHED_DATE} 07:00:00' \
|
||||
--porcelain \
|
||||
--allow-root 2>/dev/null")
|
||||
|
||||
if [[ -z "$POST_ID" ]]; then
|
||||
log_error "EP${EP_PAD}: Falha ao criar post"
|
||||
exit 1
|
||||
fi
|
||||
log_info "EP${EP_PAD}: Post criado (ID: ${POST_ID})"
|
||||
|
||||
# === 6. Associar serie e featured image ===
|
||||
|
||||
ssh_cmd "cd '${WP_PATH}' && \
|
||||
wp post term set ${POST_ID} series '${SERIES_SLUG}' --allow-root 2>/dev/null && \
|
||||
wp post meta update ${POST_ID} _thumbnail_id ${ATTACH_ID} --allow-root 2>/dev/null"
|
||||
|
||||
# === 7. Metas SSP (Seriously Simple Podcasting) ===
|
||||
|
||||
ssh_cmd "cd '${WP_PATH}' && \
|
||||
wp post meta update ${POST_ID} episode_type audio --allow-root 2>/dev/null && \
|
||||
wp post meta update ${POST_ID} audio_file '${AUDIO_URL}' --allow-root 2>/dev/null && \
|
||||
wp post meta update ${POST_ID} duration '${DURATION_FMT}' --allow-root 2>/dev/null && \
|
||||
wp post meta update ${POST_ID} filesize '${FILESIZE_H}' --allow-root 2>/dev/null && \
|
||||
wp post meta update ${POST_ID} filesize_raw '${FILESIZE_RAW}' --allow-root 2>/dev/null && \
|
||||
wp post meta update ${POST_ID} date_recorded '${SCHED_DATE} 07:00:00' --allow-root 2>/dev/null"
|
||||
|
||||
# === 8. Aplicar conteudo WP + Rank Math + tags (se _wp.json existir) ===
|
||||
|
||||
WP_JSON=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*_wp.json; do
|
||||
[[ -f "$f" ]] && WP_JSON="$f" && break
|
||||
done
|
||||
|
||||
if [[ -n "$WP_JSON" ]]; then
|
||||
# Auto-reparar JSON se necessário
|
||||
if ! python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
|
||||
log_warn "EP${EP_PAD}: JSON inválido — a tentar reparação automática..."
|
||||
python3 - "$WP_JSON" << 'PYFIX'
|
||||
import sys, re, json
|
||||
filepath = sys.argv[1]
|
||||
with open(filepath, 'r') as f:
|
||||
raw = f.read()
|
||||
marker = '"content_html": "'
|
||||
start = raw.find(marker)
|
||||
if start == -1: sys.exit(1)
|
||||
content_start = start + len(marker)
|
||||
end_pattern = re.search(r'",\s*\n\s*"hashtags"', raw[content_start:]) or re.search(r'",\s*\n\s*"wp_tags"', raw[content_start:])
|
||||
if not end_pattern: sys.exit(1)
|
||||
content_end = content_start + end_pattern.start()
|
||||
fixed = raw[:content_start] + re.sub(r'(?<!\\)"', '\\"', raw[content_start:content_end]) + raw[content_end:]
|
||||
json.loads(fixed)
|
||||
with open(filepath, 'w') as f: f.write(fixed)
|
||||
PYFIX
|
||||
log_info "EP${EP_PAD}: JSON reparado"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -n "$WP_JSON" ]] && python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
|
||||
log_info "EP${EP_PAD}: Aplicar conteudo WP de $(basename "$WP_JSON")"
|
||||
|
||||
WP_CONTENT="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('content_html',''))" 2>/dev/null)"
|
||||
WP_META="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('meta_description',''))" 2>/dev/null)"
|
||||
WP_KEYWORD="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('keyword',''))" 2>/dev/null)"
|
||||
WP_TAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(','.join(d.get('wp_tags',[])))" 2>/dev/null)"
|
||||
WP_HASHTAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(' '.join(d.get('hashtags',[])))" 2>/dev/null)"
|
||||
|
||||
# Excerpt = primeira linha do meta + hashtags
|
||||
WP_EXCERPT=""
|
||||
if [[ -n "$WP_META" && -n "$WP_HASHTAGS" ]]; then
|
||||
WP_EXCERPT="${WP_META}
|
||||
|
||||
${WP_HASHTAGS}"
|
||||
fi
|
||||
|
||||
# Aplicar conteudo HTML
|
||||
if [[ -n "$WP_CONTENT" ]]; then
|
||||
ESCAPED_CONTENT="$(echo "$WP_CONTENT" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_content='${ESCAPED_CONTENT}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: post_content aplicado"
|
||||
fi
|
||||
|
||||
# Aplicar excerpt
|
||||
if [[ -n "$WP_EXCERPT" ]]; then
|
||||
ESCAPED_EXCERPT="$(echo "$WP_EXCERPT" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_excerpt='${ESCAPED_EXCERPT}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: post_excerpt aplicado"
|
||||
fi
|
||||
|
||||
# Aplicar tags
|
||||
if [[ -n "$WP_TAGS" ]]; then
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post term set ${POST_ID} post_tag ${WP_TAGS} --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: tags aplicadas"
|
||||
fi
|
||||
|
||||
# Rank Math: meta description + focus keyword
|
||||
if [[ -n "$WP_META" ]]; then
|
||||
ESCAPED_META="$(echo "$WP_META" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_description '${ESCAPED_META}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: rank_math_description aplicado"
|
||||
fi
|
||||
|
||||
if [[ -n "$WP_KEYWORD" ]]; then
|
||||
ESCAPED_KW="$(echo "$WP_KEYWORD" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_focus_keyword '${ESCAPED_KW}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: rank_math_focus_keyword aplicado"
|
||||
fi
|
||||
|
||||
# Rank Math: SEO title (preferir seo_title do JSON, fallback para title + sufixo)
|
||||
WP_SEO_TITLE="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('seo_title',''))" 2>/dev/null)"
|
||||
if [[ -z "$WP_SEO_TITLE" ]]; then
|
||||
WP_TITLE_FALLBACK="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('title',''))" 2>/dev/null)"
|
||||
[[ -n "$WP_TITLE_FALLBACK" ]] && WP_SEO_TITLE="${WP_TITLE_FALLBACK} | Podcast Descomplicar Digital"
|
||||
fi
|
||||
if [[ -n "$WP_SEO_TITLE" ]]; then
|
||||
ESCAPED_SEO_TITLE="$(echo "$WP_SEO_TITLE" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_title '${ESCAPED_SEO_TITLE}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: rank_math_title aplicado"
|
||||
fi
|
||||
|
||||
# Slug optimizado (preferir slug do JSON)
|
||||
WP_SLUG="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('slug',''))" 2>/dev/null)"
|
||||
if [[ -n "$WP_SLUG" ]]; then
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_name='${WP_SLUG}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: slug actualizado para ${WP_SLUG}"
|
||||
fi
|
||||
else
|
||||
log_info "EP${EP_PAD}: PENDENTE — WP JSON nao encontrado, gerar via generate-content.sh"
|
||||
fi
|
||||
|
||||
# === 9. Corrigir permissoes uploads ===
|
||||
|
||||
ssh_cmd "chown -R ${WP_OWNER} '${AUDIO_REMOTE_DIR}/' '${WP_PATH}/wp-content/uploads/${YEAR}/${MONTH}/' 2>/dev/null" || true
|
||||
|
||||
log_info "EP${EP_PAD}: Agendado para ${SCHED_DATE} 07:00 (post ${POST_ID})"
|
||||
|
||||
# Actualizar pipeline-state.json
|
||||
EP_TITLE="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('title',''))" 2>/dev/null || echo "")"
|
||||
AUDIO_BASENAME="$(basename "$AUDIO_FILE")"
|
||||
jq --argjson n "$EP_NUM" --arg t "$EP_TITLE" --arg a "Episodios/Audios/final/${AUDIO_BASENAME}" --arg s "$SCHED_DATE" \
|
||||
'if [.episodes[] | select(.num == $n)] | length > 0
|
||||
then (.episodes[] | select(.num == $n)) |= . + {status: "ready", title: $t, audio: $a, scheduled: $s}
|
||||
else .episodes += [{num: ($n | tonumber), title: $t, audio: $a, scheduled: $s, status: "ready"}]
|
||||
end | .last_updated = (now | todate)' \
|
||||
"${STATE_FILE}" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "${STATE_FILE}"
|
||||
log_info "EP${EP_PAD}: pipeline-state.json actualizado"
|
||||
|
||||
echo "${POST_ID}"
|
||||
Executable
+82
@@ -0,0 +1,82 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <input_file> <output_dir>"
|
||||
echo " Splits a podcast script into blocks for TTS generation."
|
||||
echo " Splits at the FAQ transition marker or at word limit."
|
||||
echo " Creates part_1.txt, part_2.txt, etc. in output_dir."
|
||||
exit 1
|
||||
}
|
||||
|
||||
[[ $# -lt 2 ]] && usage
|
||||
INPUT_FILE="$1"
|
||||
OUTPUT_DIR="$2"
|
||||
|
||||
if [[ ! -f "$INPUT_FILE" ]]; then
|
||||
log_error "Input file not found: ${INPUT_FILE}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
SETTINGS_FILE="${PROJECT_ROOT}/config/audio-settings.json"
|
||||
MAX_WORDS="$(jq -r '.tts_max_words_per_block' "$SETTINGS_FILE")"
|
||||
SPLIT_MARKER="$(jq -r '.tts_split_marker' "$SETTINGS_FILE")"
|
||||
|
||||
TOTAL_WORDS="$(wc -w < "$INPUT_FILE")"
|
||||
log_info "Total words: ${TOTAL_WORDS}, max per block: ${MAX_WORDS}"
|
||||
|
||||
if [[ "$TOTAL_WORDS" -le "$MAX_WORDS" ]]; then
|
||||
# No split needed
|
||||
cp "$INPUT_FILE" "${OUTPUT_DIR}/part_1.txt"
|
||||
log_info "No split needed (${TOTAL_WORDS} words). Created part_1.txt"
|
||||
echo "1"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Try to split at the FAQ marker
|
||||
MARKER_LINE="$(grep -nE "$SPLIT_MARKER" "$INPUT_FILE" | head -1 | cut -d: -f1)"
|
||||
|
||||
if [[ -n "$MARKER_LINE" && "$MARKER_LINE" -gt 1 ]]; then
|
||||
# Split at the marker line (FAQ section starts here)
|
||||
head -n "$((MARKER_LINE - 1))" "$INPUT_FILE" > "${OUTPUT_DIR}/part_1.txt"
|
||||
tail -n "+${MARKER_LINE}" "$INPUT_FILE" > "${OUTPUT_DIR}/part_2.txt"
|
||||
|
||||
WORDS_1="$(wc -w < "${OUTPUT_DIR}/part_1.txt")"
|
||||
WORDS_2="$(wc -w < "${OUTPUT_DIR}/part_2.txt")"
|
||||
log_info "Split at FAQ marker (line ${MARKER_LINE}): part_1=${WORDS_1} words, part_2=${WORDS_2} words"
|
||||
echo "2"
|
||||
else
|
||||
# No marker found — split at approximate midpoint by paragraph
|
||||
TOTAL_LINES="$(wc -l < "$INPUT_FILE")"
|
||||
MID_LINE=$((TOTAL_LINES / 2))
|
||||
|
||||
# Find nearest empty line (paragraph break) near midpoint
|
||||
SPLIT_LINE=""
|
||||
for offset in 0 1 -1 2 -2 3 -3 5 -5 10 -10; do
|
||||
CHECK=$((MID_LINE + offset))
|
||||
if [[ "$CHECK" -gt 0 && "$CHECK" -lt "$TOTAL_LINES" ]]; then
|
||||
LINE_CONTENT="$(sed -n "${CHECK}p" "$INPUT_FILE")"
|
||||
if [[ -z "$LINE_CONTENT" || "$LINE_CONTENT" =~ ^[[:space:]]*$ ]]; then
|
||||
SPLIT_LINE="$CHECK"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ -z "$SPLIT_LINE" ]]; then
|
||||
SPLIT_LINE="$MID_LINE"
|
||||
fi
|
||||
|
||||
head -n "$SPLIT_LINE" "$INPUT_FILE" > "${OUTPUT_DIR}/part_1.txt"
|
||||
tail -n "+$((SPLIT_LINE + 1))" "$INPUT_FILE" > "${OUTPUT_DIR}/part_2.txt"
|
||||
|
||||
WORDS_1="$(wc -w < "${OUTPUT_DIR}/part_1.txt")"
|
||||
WORDS_2="$(wc -w < "${OUTPUT_DIR}/part_2.txt")"
|
||||
log_info "Split at paragraph break (line ${SPLIT_LINE}): part_1=${WORDS_1} words, part_2=${WORDS_2} words"
|
||||
echo "2"
|
||||
fi
|
||||
Executable
+115
@@ -0,0 +1,115 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# transfer-to-server.sh — Transfere ficheiros do desktop para o CWP server via SCP
|
||||
# Utiliza a chave SSH em ~/.ssh/id_ed25519, porta 9443
|
||||
# Permissoes finais: ealmeida:ealmeida
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
SSH_KEY="${HOME}/.ssh/id_ed25519"
|
||||
SSH_PORT=9443
|
||||
SSH_HOST="server.descomplicar.pt"
|
||||
SSH_USER="root"
|
||||
SSH_OPTS="-o IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
|
||||
WP_PATH="/home/ealmeida/public_html"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <type> <local_file> [remote_subdir]"
|
||||
echo ""
|
||||
echo "Types:"
|
||||
echo " cover <file.png> -> wp-content/uploads/YYYY/MM/"
|
||||
echo " audio <file.mp3> [YYYY/MM] -> wp-content/uploads/podcast/YYYY/MM/"
|
||||
echo " batch <dir_of_files> <type> -> envia todos os ficheiros do directorio"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 cover /path/to/ep001.png"
|
||||
echo " $0 audio /path/to/ep020.mp3 2026/04"
|
||||
echo " $0 batch /path/to/capas/ cover"
|
||||
exit 1
|
||||
}
|
||||
|
||||
scp_file() {
|
||||
local src="$1" dst="$2"
|
||||
SSH_AUTH_SOCK= scp -P "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "$src" "${SSH_USER}@${SSH_HOST}:${dst}"
|
||||
}
|
||||
|
||||
ssh_cmd() {
|
||||
SSH_AUTH_SOCK= ssh -p "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "${SSH_USER}@${SSH_HOST}" "$@"
|
||||
}
|
||||
|
||||
[[ $# -lt 2 ]] && usage
|
||||
|
||||
TYPE="$1"
|
||||
shift
|
||||
|
||||
case "$TYPE" in
|
||||
cover)
|
||||
LOCAL_FILE="$1"
|
||||
[[ ! -f "$LOCAL_FILE" ]] && log_error "Ficheiro nao encontrado: $LOCAL_FILE" && exit 1
|
||||
|
||||
YEAR="$(date '+%Y')"
|
||||
MONTH="$(date '+%m')"
|
||||
REMOTE_DIR="${WP_PATH}/wp-content/uploads/${YEAR}/${MONTH}"
|
||||
|
||||
ssh_cmd "mkdir -p '${REMOTE_DIR}'"
|
||||
scp_file "$LOCAL_FILE" "${REMOTE_DIR}/"
|
||||
BASENAME="$(basename "$LOCAL_FILE")"
|
||||
ssh_cmd "chown ealmeida:ealmeida '${REMOTE_DIR}/${BASENAME}'"
|
||||
|
||||
log_info "Cover enviada: ${BASENAME} -> ${REMOTE_DIR}/"
|
||||
;;
|
||||
|
||||
audio)
|
||||
LOCAL_FILE="$1"
|
||||
[[ ! -f "$LOCAL_FILE" ]] && log_error "Ficheiro nao encontrado: $LOCAL_FILE" && exit 1
|
||||
|
||||
if [[ $# -ge 2 ]]; then
|
||||
SUBDIR="$2"
|
||||
else
|
||||
YEAR="$(date '+%Y')"
|
||||
MONTH="$(date '+%m')"
|
||||
SUBDIR="${YEAR}/${MONTH}"
|
||||
fi
|
||||
REMOTE_DIR="${WP_PATH}/wp-content/uploads/podcast/${SUBDIR}"
|
||||
|
||||
ssh_cmd "mkdir -p '${REMOTE_DIR}'"
|
||||
scp_file "$LOCAL_FILE" "${REMOTE_DIR}/"
|
||||
BASENAME="$(basename "$LOCAL_FILE")"
|
||||
ssh_cmd "chown ealmeida:ealmeida '${REMOTE_DIR}/${BASENAME}'"
|
||||
|
||||
log_info "Audio enviado: ${BASENAME} -> ${REMOTE_DIR}/"
|
||||
;;
|
||||
|
||||
batch)
|
||||
LOCAL_DIR="$1"
|
||||
BATCH_TYPE="${2:-cover}"
|
||||
[[ ! -d "$LOCAL_DIR" ]] && log_error "Directorio nao encontrado: $LOCAL_DIR" && exit 1
|
||||
|
||||
YEAR="$(date '+%Y')"
|
||||
MONTH="$(date '+%m')"
|
||||
|
||||
case "$BATCH_TYPE" in
|
||||
cover) REMOTE_DIR="${WP_PATH}/wp-content/uploads/${YEAR}/${MONTH}" ;;
|
||||
audio) REMOTE_DIR="${WP_PATH}/wp-content/uploads/podcast/${YEAR}/${MONTH}" ;;
|
||||
*) log_error "Tipo batch invalido: $BATCH_TYPE" && exit 1 ;;
|
||||
esac
|
||||
|
||||
ssh_cmd "mkdir -p '${REMOTE_DIR}'"
|
||||
|
||||
COUNT=0
|
||||
for f in "${LOCAL_DIR}"/*.{png,jpg,mp3,wav} ; do
|
||||
[[ ! -f "$f" ]] && continue
|
||||
scp_file "$f" "${REMOTE_DIR}/"
|
||||
COUNT=$((COUNT + 1))
|
||||
done
|
||||
|
||||
ssh_cmd "chown -R ealmeida:ealmeida '${REMOTE_DIR}/'"
|
||||
log_info "Batch ${BATCH_TYPE}: ${COUNT} ficheiros enviados para ${REMOTE_DIR}/"
|
||||
;;
|
||||
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
Executable
+62
@@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate TTS for a single text file via Gemini API."""
|
||||
import sys, wave, os
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 3:
|
||||
print("Usage: tts-single-part.py <input.txt> <output.wav>")
|
||||
sys.exit(1)
|
||||
|
||||
input_file = sys.argv[1]
|
||||
output_file = sys.argv[2]
|
||||
|
||||
api_key = os.environ.get("GEMINI_API_KEY")
|
||||
if not api_key:
|
||||
print("ERROR: GEMINI_API_KEY not set")
|
||||
sys.exit(1)
|
||||
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
|
||||
style = (
|
||||
"Lê este texto em português de Portugal (PT-PT), com um tom enérgico, "
|
||||
"confiante, educativo, inspirador e profissional. Mantém o ritmo natural "
|
||||
"e envolvente, como se estivesses a conversar diretamente com o ouvinte, "
|
||||
"transmitindo proximidade e autoridade. Faz pequenas pausas para dar ênfase "
|
||||
"às ideias-chave e assegura que cada transição entre temas é fluida. Evita "
|
||||
"soar robótico ou demasiado formal; o objetivo é informar, motivar e criar "
|
||||
"ligação com quem está a ouvir."
|
||||
)
|
||||
|
||||
with open(input_file, "r") as f:
|
||||
text = f.read()
|
||||
|
||||
words = len(text.split())
|
||||
print(f"Generating TTS for {words} words...")
|
||||
|
||||
client = genai.Client(api_key=api_key)
|
||||
response = client.models.generate_content(
|
||||
model="gemini-2.5-pro-preview-tts",
|
||||
contents=style + "\n\n" + text,
|
||||
config=types.GenerateContentConfig(
|
||||
response_modalities=["AUDIO"],
|
||||
speech_config=types.SpeechConfig(
|
||||
voice_config=types.VoiceConfig(
|
||||
prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Sulafat")
|
||||
)
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
data = response.candidates[0].content.parts[0].inline_data.data
|
||||
with wave.open(output_file, "wb") as wf:
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2)
|
||||
wf.setframerate(24000)
|
||||
wf.writeframes(data)
|
||||
|
||||
duration = (len(data) // 2) / 24000
|
||||
print(f"OK: {duration:.0f}s -> {output_file}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Executable
+570
@@ -0,0 +1,570 @@
|
||||
#!/bin/bash
|
||||
|
||||
# =================================================================
|
||||
# COMANDOS DE INSTALACAO WiP - Websites Inteligentes e Poderosos
|
||||
# Versao: 3.0
|
||||
# Data: 24 Fevereiro 2026
|
||||
# Autor: Descomplicar - Emanuel Almeida
|
||||
# =================================================================
|
||||
#
|
||||
# IMPORTANTE: Este script corre no servidor CWP via SSH MCP
|
||||
# SEMPRE usar --allow-root (user shell e /usr/sbin/nologin no CWP)
|
||||
# Path base: /home/USER/dominio.pt (user CWP != nome dominio)
|
||||
#
|
||||
# Mudancas v3.0:
|
||||
# - Removido MainWP (descontinuado)
|
||||
# - Tema hello-elementor (era astra)
|
||||
# - fluentform no core (era wpforms-lite)
|
||||
# - Adicionado complianz-gdpr (GDPR, core)
|
||||
# - Adicionado clean-admin mu-plugin
|
||||
# - --allow-root obrigatorio em todos os comandos
|
||||
# - Removidos: wpvivid, bit-integrations, ai-engine, branda
|
||||
# =================================================================
|
||||
|
||||
# Cores para output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
log() { echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] $1${NC}"; }
|
||||
warn() { echo -e "${YELLOW}[WARN] $1${NC}"; }
|
||||
err() { echo -e "${RED}[ERROR] $1${NC}"; }
|
||||
|
||||
# Helper: executa wp com flags obrigatorias
|
||||
wp_run() {
|
||||
local wp_path=$1
|
||||
shift
|
||||
wp "$@" --allow-root --path="$wp_path"
|
||||
}
|
||||
|
||||
# =================================================================
|
||||
# SETUP WORDPRESS BASE
|
||||
# =================================================================
|
||||
setup_wordpress_base() {
|
||||
local domain=$1
|
||||
local wp_path=$2
|
||||
local admin_user=$3
|
||||
local admin_email=$4
|
||||
|
||||
log "Configurando WordPress base: $domain (path: $wp_path)"
|
||||
|
||||
# Download WordPress em PT-PT
|
||||
wp_run "$wp_path" core download --locale=pt_PT --skip-content
|
||||
|
||||
# Configurar wp-config
|
||||
wp_run "$wp_path" config create \
|
||||
--dbname="${domain//./_}_db" \
|
||||
--dbuser="$DB_USER" \
|
||||
--dbpass="$DB_PASS" \
|
||||
--dbhost="localhost"
|
||||
|
||||
# Instalar WordPress
|
||||
wp_run "$wp_path" core install \
|
||||
--url="https://$domain" \
|
||||
--title="$domain" \
|
||||
--admin_user="$admin_user" \
|
||||
--admin_password="$(openssl rand -base64 12)" \
|
||||
--admin_email="$admin_email" \
|
||||
--skip-email
|
||||
|
||||
# Configuracoes base Portugal
|
||||
wp_run "$wp_path" option update timezone_string 'Europe/Lisbon'
|
||||
wp_run "$wp_path" option update date_format 'd/m/Y'
|
||||
wp_run "$wp_path" option update time_format 'H:i'
|
||||
wp_run "$wp_path" option update start_of_week 1
|
||||
wp_run "$wp_path" option update blogdescription ''
|
||||
wp_run "$wp_path" option update default_comment_status 'closed'
|
||||
wp_run "$wp_path" option update comment_moderation 1
|
||||
|
||||
# Permalinks
|
||||
wp_run "$wp_path" rewrite structure '/%postname%/' --hard
|
||||
wp_run "$wp_path" rewrite flush
|
||||
|
||||
# Remover plugins e conteudo padrao
|
||||
wp_run "$wp_path" plugin delete hello akismet 2>/dev/null || true
|
||||
wp_run "$wp_path" post delete 1 2 --force 2>/dev/null || true
|
||||
wp_run "$wp_path" comment delete 1 --force 2>/dev/null || true
|
||||
|
||||
# Tema hello-elementor (sera activado com Elementor)
|
||||
wp_run "$wp_path" theme install hello-elementor --activate
|
||||
|
||||
log "WordPress base configurado para $domain"
|
||||
}
|
||||
|
||||
# =================================================================
|
||||
# INSTALAR CORE WiP (TODOS OS PLANOS)
|
||||
# =================================================================
|
||||
install_core_wip() {
|
||||
local wp_path=$1
|
||||
|
||||
log "Instalando plugins core WiP..."
|
||||
|
||||
# Core Seguranca (3)
|
||||
wp_run "$wp_path" plugin install \
|
||||
wordfence \
|
||||
wp-security-audit-log \
|
||||
complianz-gdpr \
|
||||
--activate
|
||||
|
||||
# Core Performance (4)
|
||||
wp_run "$wp_path" plugin install \
|
||||
wp-fastest-cache \
|
||||
webp-express \
|
||||
insert-headers-and-footers \
|
||||
updraftplus \
|
||||
--activate
|
||||
|
||||
# Core Design (4 — elementor-free por defeito, tema ja activado)
|
||||
wp_run "$wp_path" plugin install \
|
||||
elementor \
|
||||
elementskit-lite \
|
||||
loco-translate \
|
||||
seo-by-rank-math \
|
||||
--activate
|
||||
|
||||
# Core Marketing (3)
|
||||
wp_run "$wp_path" plugin install \
|
||||
fluent-crm \
|
||||
fluent-smtp \
|
||||
fluentform \
|
||||
--activate
|
||||
|
||||
log "Core WiP instalado (15 plugins + tema)"
|
||||
}
|
||||
|
||||
# =================================================================
|
||||
# MU-PLUGIN: DESCOMPLICAR CLEAN ADMIN
|
||||
# Remove banners promo do admin (Elementor, ElementsKit, etc.)
|
||||
# =================================================================
|
||||
install_clean_admin() {
|
||||
local wp_path=$1
|
||||
local clean_admin_src="${HOME}/.claude-work/descomplicar-clean-admin.php"
|
||||
local mu_plugins_dir="${wp_path}/wp-content/mu-plugins"
|
||||
|
||||
log "A instalar mu-plugin descomplicar-clean-admin..."
|
||||
|
||||
if [ ! -f "$clean_admin_src" ]; then
|
||||
warn "Ficheiro nao encontrado: $clean_admin_src"
|
||||
warn "Fazer download do repositorio antes de continuar."
|
||||
return 1
|
||||
fi
|
||||
|
||||
mkdir -p "$mu_plugins_dir"
|
||||
cp "$clean_admin_src" "${mu_plugins_dir}/descomplicar-clean-admin.php"
|
||||
|
||||
# Corrigir permissoes
|
||||
local cwp_user
|
||||
cwp_user=$(echo "$wp_path" | cut -d/ -f3)
|
||||
chown "${cwp_user}:${cwp_user}" "${mu_plugins_dir}/descomplicar-clean-admin.php"
|
||||
|
||||
log "mu-plugin clean-admin instalado em $mu_plugins_dir"
|
||||
}
|
||||
|
||||
# =================================================================
|
||||
# CONFIGURAR WPFC (WP Fastest Cache)
|
||||
# A configuracao via SQL e necessaria — opcoes estao como JSON string
|
||||
# =================================================================
|
||||
configure_wpfc() {
|
||||
local wp_path=$1
|
||||
local db_prefix
|
||||
db_prefix=$(wp_run "$wp_path" config get table_prefix 2>/dev/null || echo "wp_")
|
||||
|
||||
log "Configurando WP Fastest Cache..."
|
||||
|
||||
local wpfc_config='{"is_mobile_theme":"","wpFastestCacheStatus":"on","wpFastestCacheSsl":"on","wpFastestCacheMobile":"on","wpFastestCacheLoggedInUsers":"on","wpFastestCacheNewPost":"on","wpFastestCacheUpdatePost":"on","wpFastestCacheByPass":"","wpFastestCacheRenderBlocking":"on","wpFastestCacheSmallImage":"","wpFastestCacheCombineCss":"on","wpFastestCacheCombineJs":"on","wpFastestCacheMinifyCss":"on","wpFastestCacheMinifyCssExclude":"","wpFastestCacheMinifyJs":"on","wpFastestCacheMinifyJsExclude":"","wpFastestCacheCDN":"","wpFastestCacheCDNexclude":"","wpFastestCacheLanguage":"","wpFastestCacheWordPress":""}'
|
||||
|
||||
wp_run "$wp_path" option update WpFastestCacheOptions "$wpfc_config"
|
||||
|
||||
log "WPFC configurado"
|
||||
}
|
||||
|
||||
# =================================================================
|
||||
# INSTALAR TRADUCOES PT-PT
|
||||
# =================================================================
|
||||
install_translations() {
|
||||
local wp_path=$1
|
||||
local translations_src="/media/ealmeida/Dados/Dev/WordPress/Traducao-Plugins-PT-PT"
|
||||
|
||||
log "A instalar traducoes PT-PT..."
|
||||
|
||||
if [ ! -d "$translations_src" ]; then
|
||||
warn "Biblioteca de traducoes nao encontrada: $translations_src"
|
||||
warn "Instalar traducoes manualmente via Loco Translate."
|
||||
return 1
|
||||
fi
|
||||
|
||||
local loco_dir="${wp_path}/wp-content/languages/loco/plugins"
|
||||
mkdir -p "$loco_dir"
|
||||
|
||||
# Copiar ficheiros .po/.mo da biblioteca
|
||||
local count=0
|
||||
for po_file in "$translations_src"/**/*.po "$translations_src"/**/*.mo; do
|
||||
[ -f "$po_file" ] || continue
|
||||
cp "$po_file" "$loco_dir/"
|
||||
count=$((count + 1))
|
||||
done
|
||||
|
||||
local cwp_user
|
||||
cwp_user=$(echo "$wp_path" | cut -d/ -f3)
|
||||
chown -R "${cwp_user}:${cwp_user}" "$loco_dir"
|
||||
|
||||
log "Traducoes PT-PT instaladas ($count ficheiros)"
|
||||
}
|
||||
|
||||
# =================================================================
|
||||
# OFERTA STARTER (~22 plugins)
|
||||
# =================================================================
|
||||
install_starter() {
|
||||
local domain=$1
|
||||
local wp_path=$2
|
||||
local admin_user=${3:-"admin"}
|
||||
local admin_email=${4:-"admin@descomplicar.pt"}
|
||||
|
||||
log "=== INSTALACAO STARTER: $domain ==="
|
||||
|
||||
setup_wordpress_base "$domain" "$wp_path" "$admin_user" "$admin_email"
|
||||
install_core_wip "$wp_path"
|
||||
install_clean_admin "$wp_path"
|
||||
|
||||
# Plugins unicos Starter (7)
|
||||
log "Instalando plugins unicos Starter..."
|
||||
wp_run "$wp_path" plugin install \
|
||||
bdthemes-element-pack-lite \
|
||||
happy-elementor-addons \
|
||||
envato-elements \
|
||||
bit-integrations \
|
||||
bit-social \
|
||||
ai-engine \
|
||||
branda-white-labeling \
|
||||
--activate
|
||||
|
||||
configure_wpfc "$wp_path"
|
||||
install_translations "$wp_path"
|
||||
|
||||
# Corrigir permissoes finais
|
||||
local cwp_user
|
||||
cwp_user=$(echo "$wp_path" | cut -d/ -f3)
|
||||
chown -R "${cwp_user}:${cwp_user}" "$wp_path"
|
||||
|
||||
log "STARTER instalado (~22 plugins)"
|
||||
site_info "$wp_path"
|
||||
}
|
||||
|
||||
# =================================================================
|
||||
# OFERTA CORPORATE (~28 plugins)
|
||||
# =================================================================
|
||||
install_corporate() {
|
||||
local domain=$1
|
||||
local wp_path=$2
|
||||
local admin_user=${3:-"admin"}
|
||||
local admin_email=${4:-"admin@descomplicar.pt"}
|
||||
|
||||
log "=== INSTALACAO CORPORATE: $domain ==="
|
||||
|
||||
install_starter "$domain" "$wp_path" "$admin_user" "$admin_email"
|
||||
|
||||
log "Upgrade para CORPORATE..."
|
||||
|
||||
# Elementor Free -> Pro (instalar ZIP manualmente, nao esta em wp.org)
|
||||
warn "ATENCAO: Elementor Pro requer instalacao manual via ZIP."
|
||||
warn "Desactivar elementor free e instalar elementor-pro via admin ou WP-CLI upload."
|
||||
# wp_run "$wp_path" plugin install /caminho/elementor-pro.zip --activate
|
||||
|
||||
# bdthemes lite -> premium
|
||||
wp_run "$wp_path" plugin deactivate bdthemes-element-pack-lite
|
||||
wp_run "$wp_path" plugin delete bdthemes-element-pack-lite
|
||||
warn "ATENCAO: bdthemes-element-pack (premium) requer instalacao manual via ZIP."
|
||||
|
||||
# Adicionais Corporate
|
||||
wp_run "$wp_path" plugin install \
|
||||
google-site-kit \
|
||||
wp-event-solution \
|
||||
premium-addons-for-elementor \
|
||||
--activate
|
||||
|
||||
# happyfiles-pro: instalar via ZIP (premium)
|
||||
warn "ATENCAO: happyfiles-pro requer instalacao manual via ZIP."
|
||||
|
||||
# Corrigir permissoes
|
||||
local cwp_user
|
||||
cwp_user=$(echo "$wp_path" | cut -d/ -f3)
|
||||
chown -R "${cwp_user}:${cwp_user}" "$wp_path"
|
||||
|
||||
log "CORPORATE instalado (~28 plugins)"
|
||||
}
|
||||
|
||||
# =================================================================
|
||||
# OFERTA CARE (~40 plugins)
|
||||
# =================================================================
|
||||
install_care() {
|
||||
local domain=$1
|
||||
local wp_path=$2
|
||||
local admin_user=${3:-"admin"}
|
||||
local admin_email=${4:-"admin@descomplicar.pt"}
|
||||
|
||||
log "=== INSTALACAO CARE: $domain ==="
|
||||
|
||||
install_corporate "$domain" "$wp_path" "$admin_user" "$admin_email"
|
||||
|
||||
log "Adicionando funcionalidades CARE..."
|
||||
|
||||
# KiviCare (todos premium — instalar via ZIP)
|
||||
warn "ATENCAO: KiviCare requer instalacao manual via ZIP (5 plugins)."
|
||||
warn "Plugins: kivicare-clinic-management-system, kivicare-pro,"
|
||||
warn "kivicare-telemed-addon, kivicare-google-meet, kivicare-webhook-addon"
|
||||
|
||||
# Care custom
|
||||
wp_run "$wp_path" plugin install fluent-support click-to-chat-for-whatsapp --activate
|
||||
warn "Instalar via ZIP: care-notificacao-whatsms-main, sinc-care, sinccare-fatura"
|
||||
|
||||
# E-commerce light
|
||||
wp_run "$wp_path" plugin install \
|
||||
woocommerce \
|
||||
multibanco-ifthen-software-gateway-for-woocommerce \
|
||||
contribuinte-checkout \
|
||||
moloni \
|
||||
--activate
|
||||
|
||||
# Configuracoes WooCommerce PT
|
||||
wp_run "$wp_path" option update woocommerce_store_address 'Portugal'
|
||||
wp_run "$wp_path" option update woocommerce_currency 'EUR'
|
||||
wp_run "$wp_path" option update woocommerce_default_country 'PT'
|
||||
wp_run "$wp_path" option update woocommerce_calc_taxes 'yes'
|
||||
|
||||
# Corrigir permissoes
|
||||
local cwp_user
|
||||
cwp_user=$(echo "$wp_path" | cut -d/ -f3)
|
||||
chown -R "${cwp_user}:${cwp_user}" "$wp_path"
|
||||
|
||||
log "CARE instalado (~40 plugins)"
|
||||
}
|
||||
|
||||
# =================================================================
|
||||
# OFERTA ECOMMERCE (~38 plugins)
|
||||
# =================================================================
|
||||
install_ecommerce() {
|
||||
local domain=$1
|
||||
local wp_path=$2
|
||||
local admin_user=${3:-"admin"}
|
||||
local admin_email=${4:-"admin@descomplicar.pt"}
|
||||
|
||||
log "=== INSTALACAO ECOMMERCE: $domain ==="
|
||||
|
||||
install_corporate "$domain" "$wp_path" "$admin_user" "$admin_email"
|
||||
|
||||
log "Adicionando funcionalidades E-COMMERCE..."
|
||||
|
||||
# WooCommerce stack
|
||||
wp_run "$wp_path" plugin install \
|
||||
woocommerce \
|
||||
multibanco-ifthen-software-gateway-for-woocommerce \
|
||||
wholesalex \
|
||||
woo-save-abandoned-carts \
|
||||
money-manager \
|
||||
betterdocs \
|
||||
fluent-support \
|
||||
click-to-chat-for-whatsapp \
|
||||
--activate
|
||||
|
||||
# wpfunnels disponivel em wp.org (lite)
|
||||
wp_run "$wp_path" plugin install wpfunnels --activate
|
||||
warn "wpfunnels-pro e woocommerce-dashboard-stats: instalar via ZIP"
|
||||
|
||||
# Configuracoes WooCommerce PT
|
||||
wp_run "$wp_path" option update woocommerce_store_address 'Portugal'
|
||||
wp_run "$wp_path" option update woocommerce_currency 'EUR'
|
||||
wp_run "$wp_path" option update woocommerce_default_country 'PT'
|
||||
wp_run "$wp_path" option update woocommerce_calc_taxes 'yes'
|
||||
wp_run "$wp_path" option update woocommerce_enable_coupons 'yes'
|
||||
wp_run "$wp_path" option update woocommerce_manage_stock 'yes'
|
||||
|
||||
# Instalar paginas WooCommerce
|
||||
wp_run "$wp_path" wc tool run install_pages
|
||||
|
||||
# IVA Portugal
|
||||
wp_run "$wp_path" wc tax create --country=PT --rate=23 --name="IVA Normal" --class=standard
|
||||
wp_run "$wp_path" wc tax create --country=PT --rate=13 --name="IVA Intermedio" --class=reduced-rate
|
||||
wp_run "$wp_path" wc tax create --country=PT --rate=6 --name="IVA Reduzido" --class=zero-rate
|
||||
|
||||
# Corrigir permissoes
|
||||
local cwp_user
|
||||
cwp_user=$(echo "$wp_path" | cut -d/ -f3)
|
||||
chown -R "${cwp_user}:${cwp_user}" "$wp_path"
|
||||
|
||||
log "ECOMMERCE instalado (~38 plugins)"
|
||||
}
|
||||
|
||||
# =================================================================
|
||||
# OFERTA CHALLENGE (~40+ plugins)
|
||||
# =================================================================
|
||||
install_challenge() {
|
||||
local domain=$1
|
||||
local wp_path=$2
|
||||
local sector=${3:-"general"}
|
||||
local admin_user=${4:-"admin"}
|
||||
local admin_email=${5:-"admin@descomplicar.pt"}
|
||||
|
||||
log "=== INSTALACAO CHALLENGE: $domain (Sector: $sector) ==="
|
||||
|
||||
install_ecommerce "$domain" "$wp_path" "$admin_user" "$admin_email"
|
||||
|
||||
log "Adicionando funcionalidades CHALLENGE..."
|
||||
|
||||
# Migration & compliance
|
||||
wp_run "$wp_path" plugin install \
|
||||
product-import-export-for-woo \
|
||||
wpconsent-cookies-banner-privacy-suite \
|
||||
--activate
|
||||
|
||||
warn "fg-prestashop-to-woocommerce-premium: instalar via ZIP (plugin premium)"
|
||||
|
||||
# Plugins sector-especificos
|
||||
case $sector in
|
||||
"automotive"|"auto")
|
||||
log "Configurando para sector AUTOMOVEL..."
|
||||
# Plugins especificos automovel instalados conforme cliente
|
||||
;;
|
||||
"healthcare"|"saude")
|
||||
log "Configurando para sector SAUDE..."
|
||||
warn "Considerar instalar KiviCare (ver plano Care)"
|
||||
;;
|
||||
"education"|"educacao")
|
||||
log "Configurando para sector EDUCACAO..."
|
||||
wp_run "$wp_path" plugin install learnpress --activate
|
||||
;;
|
||||
*)
|
||||
log "Challenge generico — ajustar conforme cliente"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Corrigir permissoes
|
||||
local cwp_user
|
||||
cwp_user=$(echo "$wp_path" | cut -d/ -f3)
|
||||
chown -R "${cwp_user}:${cwp_user}" "$wp_path"
|
||||
|
||||
log "CHALLENGE instalado (~40+ plugins, sector: $sector)"
|
||||
}
|
||||
|
||||
# =================================================================
|
||||
# INFO SITE
|
||||
# =================================================================
|
||||
site_info() {
|
||||
local wp_path=$1
|
||||
|
||||
echo -e "${BLUE}=== INFORMACOES DO SITE ===${NC}"
|
||||
echo -e "${BLUE}Path: $wp_path${NC}"
|
||||
echo -e "${BLUE}WordPress: $(wp_run "$wp_path" core version 2>/dev/null)${NC}"
|
||||
echo -e "${BLUE}Tema activo: $(wp_run "$wp_path" theme list --status=active --field=name 2>/dev/null)${NC}"
|
||||
echo -e "${BLUE}Plugins activos: $(wp_run "$wp_path" plugin list --status=active --format=count 2>/dev/null)${NC}"
|
||||
echo -e "${BLUE}==============================${NC}"
|
||||
|
||||
echo -e "${YELLOW}Plugins activos:${NC}"
|
||||
wp_run "$wp_path" plugin list --status=active --format=table
|
||||
}
|
||||
|
||||
# =================================================================
|
||||
# MENU PRINCIPAL
|
||||
# =================================================================
|
||||
show_menu() {
|
||||
echo -e "${BLUE}"
|
||||
echo "=================================================="
|
||||
echo " WiP - Websites Inteligentes e Poderosos"
|
||||
echo " Instalador v3.0 (Fev 2026)"
|
||||
echo "=================================================="
|
||||
echo -e "${NC}"
|
||||
echo "1) Starter - Corporativo basico (~22 plugins)"
|
||||
echo "2) Corporate - Empresarial avancado (~28 plugins)"
|
||||
echo "3) Care - Sector saude (~40 plugins)"
|
||||
echo "4) E-commerce - Loja online completa (~38 plugins)"
|
||||
echo "5) Challenge - Especializado premium (~40+ plugins)"
|
||||
echo "6) Info site - Informacoes site existente"
|
||||
echo "7) Sair"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# =================================================================
|
||||
# MAIN
|
||||
# =================================================================
|
||||
main() {
|
||||
if ! command -v wp &> /dev/null; then
|
||||
err "WP-CLI nao encontrado. Verificar instalacao."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$DB_USER" ] || [ -z "$DB_PASS" ]; then
|
||||
err "Variaveis DB_USER e DB_PASS nao definidas."
|
||||
echo "Exportar antes de correr: export DB_USER=user DB_PASS=pass"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
while true; do
|
||||
show_menu
|
||||
read -p "Escolha [1-7]: " choice
|
||||
|
||||
case $choice in
|
||||
1)
|
||||
read -p "Dominio (ex: cliente.pt): " domain
|
||||
read -p "Path completo (ex: /home/ealmeida/cliente.pt): " wp_path
|
||||
read -p "Admin user [admin]: " admin_user
|
||||
read -p "Admin email: " admin_email
|
||||
admin_user=${admin_user:-admin}
|
||||
install_starter "$domain" "$wp_path" "$admin_user" "$admin_email"
|
||||
;;
|
||||
2)
|
||||
read -p "Dominio: " domain
|
||||
read -p "Path completo: " wp_path
|
||||
read -p "Admin user [admin]: " admin_user
|
||||
read -p "Admin email: " admin_email
|
||||
admin_user=${admin_user:-admin}
|
||||
install_corporate "$domain" "$wp_path" "$admin_user" "$admin_email"
|
||||
;;
|
||||
3)
|
||||
read -p "Dominio: " domain
|
||||
read -p "Path completo: " wp_path
|
||||
read -p "Admin user [admin]: " admin_user
|
||||
read -p "Admin email: " admin_email
|
||||
admin_user=${admin_user:-admin}
|
||||
install_care "$domain" "$wp_path" "$admin_user" "$admin_email"
|
||||
;;
|
||||
4)
|
||||
read -p "Dominio: " domain
|
||||
read -p "Path completo: " wp_path
|
||||
read -p "Admin user [admin]: " admin_user
|
||||
read -p "Admin email: " admin_email
|
||||
admin_user=${admin_user:-admin}
|
||||
install_ecommerce "$domain" "$wp_path" "$admin_user" "$admin_email"
|
||||
;;
|
||||
5)
|
||||
read -p "Dominio: " domain
|
||||
read -p "Path completo: " wp_path
|
||||
read -p "Sector [automotive/healthcare/education/general]: " sector
|
||||
read -p "Admin user [admin]: " admin_user
|
||||
read -p "Admin email: " admin_email
|
||||
sector=${sector:-general}
|
||||
admin_user=${admin_user:-admin}
|
||||
install_challenge "$domain" "$wp_path" "$sector" "$admin_user" "$admin_email"
|
||||
;;
|
||||
6)
|
||||
read -p "Path completo do site: " wp_path
|
||||
site_info "$wp_path"
|
||||
;;
|
||||
7)
|
||||
log "Saindo."
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
err "Opcao invalida."
|
||||
;;
|
||||
esac
|
||||
|
||||
echo ""
|
||||
read -p "Pressione Enter para continuar..."
|
||||
clear
|
||||
done
|
||||
}
|
||||
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
main "$@"
|
||||
fi
|
||||
Reference in New Issue
Block a user