feat: scripts de projectos vindos do Hub (podcast, alojadamaria, clip, ocr, etc.)

Movidos do vault Hub para centralizar scripts. Hub mantem symlinks.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-28 20:53:29 +01:00
parent e810bbb114
commit 6035542b67
27 changed files with 4246 additions and 0 deletions
+161
View File
@@ -0,0 +1,161 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/scripts/lib.sh"
usage() {
cat << EOF
Podcast Descomplicar Digital — Pipeline Automatizado
Usage: $0 <command> [args]
Commands:
batch N Process next N episodes (content generation)
status Show pipeline state
retry NNN Re-run failed stage for episode NNN
csv [start] [N] Generate Canva CSV (default: next 7 episodes)
publish NNN D Generate publish commands for episode NNN on date D (YYYY-MM-DD)
init NNN Add episode NNN to pipeline state
produce NNN F Post-produce episode NNN from raw audio file F
Examples:
$0 batch 5
$0 status
$0 csv 20 10
$0 produce 20 /path/to/raw.wav
$0 publish 20 2026-04-14
EOF
exit 1
}
[[ $# -lt 1 ]] && usage
CMD="$1"; shift
ensure_state_file
mkdir -p "$LOG_DIR"
case "$CMD" in
batch)
COUNT="${1:-5}"
START="$(jq -r '.next_episode' "$STATE_FILE")"
END=$((START + COUNT - 1))
log_info "=== BATCH: Processing episodes ${START}-${END} ==="
CURRENT_DATE="$(jq -r '.next_publish_date' "$STATE_FILE")"
[[ -z "$CURRENT_DATE" || "$CURRENT_DATE" == "null" ]] && CURRENT_DATE="$(date '+%Y-%m-%d')"
for ((ep=START; ep<=END; ep++)); do
log_info "--- Episode ${ep} ---"
STATUS="$(get_episode_status "$ep")"
if [[ "$STATUS" == "not_found" || "$STATUS" == "pending" ]]; then
"${SCRIPT_DIR}/scripts/generate-content.sh" "$ep" || {
log_error "EP$(pad_number "$ep"): Content generation failed, stopping batch"
exit 1
}
else
log_info "EP$(pad_number "$ep"): Already at status '${STATUS}', skipping content generation"
fi
STATUS="$(get_episode_status "$ep")"
if [[ "$STATUS" == "script_done" ]]; then
log_warn "EP$(pad_number "$ep"): Audio TTS needed (manual step via AI Studio)"
log_warn " -> Generate audio, save to Episodios/Audios/raw/ep_$(pad_number "$ep")_raw.wav"
log_warn " -> Then run: $0 produce ${ep} <raw_audio_path>"
fi
CURRENT_DATE="$(next_weekday "$CURRENT_DATE")"
done
jq --argjson n "$((END + 1))" --arg d "$CURRENT_DATE" \
'.next_episode = $n | .next_publish_date = $d' \
"$STATE_FILE" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "$STATE_FILE"
log_info "=== BATCH COMPLETE. Next episode: $((END + 1)), next date: ${CURRENT_DATE} ==="
;;
status)
echo "=== Pipeline Status ==="
echo "Next episode: $(jq -r '.next_episode' "$STATE_FILE")"
echo "Next publish date: $(jq -r '.next_publish_date' "$STATE_FILE")"
echo ""
EPISODE_COUNT="$(jq '.episodes | length' "$STATE_FILE")"
if [[ "$EPISODE_COUNT" -gt 0 ]]; then
echo "Episodes in pipeline:"
jq -r '.episodes[] | " EP\(.number | tostring | if length < 3 then "0" * (3 - length) + . else . end): \(.status) - \(.title)"' "$STATE_FILE"
echo ""
echo "Counts:"
jq -r '.episodes | group_by(.status) | map({status: .[0].status, count: length}) | .[] | " \(.status): \(.count)"' "$STATE_FILE"
else
echo "No episodes in pipeline yet. Run 'batch' to start."
fi
;;
retry)
EP="${1:?Episode number required}"
STATUS="$(get_episode_status "$EP")"
log_info "Retrying EP$(pad_number "$EP") (current status: ${STATUS})"
case "$STATUS" in
pending|not_found)
"${SCRIPT_DIR}/scripts/generate-content.sh" "$EP"
;;
script_done)
log_warn "Audio TTS needed — manual step via AI Studio"
;;
audio_done)
RAW=""
for f in "${PROJECT_ROOT}/Episodios/Audios/raw/ep_$(pad_number "$EP")_"*; do
[[ -f "$f" ]] && RAW="$f" && break
done
if [[ -n "$RAW" ]]; then
"${SCRIPT_DIR}/scripts/post-produce.sh" "$EP" "$RAW"
else
log_error "Raw audio not found. Use: $0 produce ${EP} <path>"
fi
;;
produced)
log_info "Ready to publish. Use: $0 publish ${EP} YYYY-MM-DD"
;;
*)
log_warn "Unknown status: ${STATUS}"
;;
esac
;;
csv)
"${SCRIPT_DIR}/scripts/generate-csv.sh" "${@}"
;;
produce)
EP="${1:?Episode number required}"
RAW="${2:?Raw audio path required}"
"${SCRIPT_DIR}/scripts/post-produce.sh" "$EP" "$RAW"
;;
publish)
EP="${1:?Episode number required}"
SCHED_DATE="${2:-$(jq -r '.next_publish_date' "$STATE_FILE")}"
"${SCRIPT_DIR}/scripts/publish-episode.sh" "$EP" "$SCHED_DATE"
;;
init)
EP="${1:?Episode number required}"
MAP_FILE="${PROJECT_ROOT}/config/episode-guide-map.json"
EP_DATA="$(jq -r --arg n "$EP" '.[$n] // empty' "$MAP_FILE")"
if [[ -n "$EP_DATA" ]]; then
TITLE="$(echo "$EP_DATA" | jq -r '.podcast_title')"
URL="$(echo "$EP_DATA" | jq -r '.guide_url')"
add_episode "$EP" "$TITLE" "$URL"
log_info "Added EP$(pad_number "$EP"): ${TITLE}"
else
log_error "Episode $EP not found in episode-guide-map.json"
fi
;;
*)
usage
;;
esac
+224
View File
@@ -0,0 +1,224 @@
#!/usr/bin/env bash
set -euo pipefail
# apply-wp-content.sh — Aplica conteúdo _wp.json a posts já agendados no WordPress
#
# Uso: ./scripts/apply-wp-content.sh <ep_num> [post_id]
# Se post_id não for fornecido, pesquisa por data na pipeline-state.json
#
# Útil para: episódios que foram agendados antes do _wp.json existir
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
# SSH config
SSH_KEY="${HOME}/.ssh/id_ed25519"
SSH_PORT=9443
SSH_HOST="server.descomplicar.pt"
SSH_USER="root"
SSH_OPTS="-o IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
WP_PATH="/home/ealmeida/public_html"
ssh_cmd() {
SSH_AUTH_SOCK= ssh -p "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "${SSH_USER}@${SSH_HOST}" "$@"
}
usage() {
echo "Usage: $0 <ep_num> [post_id]"
exit 1
}
[[ $# -lt 1 ]] && usage
EP_NUM="$1"
MANUAL_POST_ID="${2:-}"
EP_PAD="$(pad_number "$EP_NUM")"
# Encontrar _wp.json (find para lidar com nomes com acentos)
WP_JSON=""
while IFS= read -r f; do
[[ -f "$f" ]] && WP_JSON="$f" && break
done < <(find "${PROJECT_ROOT}" -maxdepth 3 -name "Episodio_${EP_PAD}_*_wp.json" 2>/dev/null)
if [[ -z "$WP_JSON" ]]; then
log_error "EP${EP_PAD}: _wp.json não encontrado"
exit 1
fi
# Validar e auto-reparar JSON se necessário
if ! python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
log_warn "EP${EP_PAD}: JSON inválido — a tentar reparação automática..."
python3 - "$WP_JSON" << 'PYFIX'
import sys, re, json
filepath = sys.argv[1]
with open(filepath, 'r') as f:
raw = f.read()
marker = '"content_html": "'
start = raw.find(marker)
if start == -1: sys.exit(1)
content_start = start + len(marker)
end_pattern = re.search(r'",\s*\n\s*"hashtags"', raw[content_start:]) or re.search(r'",\s*\n\s*"wp_tags"', raw[content_start:])
if not end_pattern: sys.exit(1)
content_end = content_start + end_pattern.start()
fixed = raw[:content_start] + re.sub(r'(?<!\\)"', '\\"', raw[content_start:content_end]) + raw[content_end:]
json.loads(fixed)
with open(filepath, 'w') as f: f.write(fixed)
print("reparado")
PYFIX
if ! python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
log_error "EP${EP_PAD}: JSON inválido após reparação"
exit 1
fi
log_info "EP${EP_PAD}: JSON reparado automaticamente"
fi
log_info "EP${EP_PAD}: Usando $(basename "$WP_JSON")"
# Encontrar post_id
POST_ID="$MANUAL_POST_ID"
if [[ -z "$POST_ID" ]]; then
# Tentar obter por data agendada da pipeline-state
SCHED_DATE="$(jq -r --argjson n "$EP_NUM" '.episodes[] | select(.num == $n) | .scheduled // empty' "$STATE_FILE")"
if [[ -z "$SCHED_DATE" ]]; then
log_error "EP${EP_PAD}: Não encontrado em pipeline-state.json, especifique post_id manualmente"
exit 1
fi
log_info "EP${EP_PAD}: A pesquisar post por data ${SCHED_DATE}..."
POST_ID="$(ssh_cmd "cd '${WP_PATH}' && wp db query \
\"SELECT ID FROM wpah_posts WHERE post_type='podcast' AND post_status='future' AND DATE(post_date)='${SCHED_DATE}' LIMIT 1\" \
--skip-column-names \
--allow-root 2>/dev/null")"
fi
if [[ -z "$POST_ID" ]]; then
log_error "EP${EP_PAD}: Post não encontrado no WordPress"
exit 1
fi
log_info "EP${EP_PAD}: Post ID = ${POST_ID}"
# Extrair campos do JSON via Python (mais robusto que jq para HTML com acentos)
WP_CONTENT="$(python3 -c "import json,sys; d=json.load(open('$WP_JSON')); print(d.get('content_html',''))" 2>/dev/null)"
WP_META="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('meta_description',''))" 2>/dev/null)"
WP_KEYWORD="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('keyword',''))" 2>/dev/null)"
WP_TAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(','.join(d.get('wp_tags',[])))" 2>/dev/null)"
WP_HASHTAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(' '.join(d.get('hashtags',[])))" 2>/dev/null)"
WP_SEO_TITLE="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('seo_title',''))" 2>/dev/null)"
WP_SLUG="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('slug',''))" 2>/dev/null)"
# Auto-fix RankMath: slug sem keyword → reconstruir; título sem número → adicionar ano
_AUTOFIX="$(python3 - "$WP_JSON" "$WP_SLUG" "$WP_SEO_TITLE" <<'PYFIX'
import json, sys, re, unicodedata, time
def slugify(text):
text = unicodedata.normalize("NFD", text.lower())
text = "".join(c for c in text if unicodedata.category(c) != "Mn")
text = re.sub(r'[^a-z0-9\s-]', '', text)
return re.sub(r'[-\s]+', '-', text.strip()).rstrip('-')
STOPWORDS = {"a","o","as","os","de","da","do","das","dos","e","em","no","na","nos","nas","para","por","pelo","pela"}
def strip_sw(s):
return " ".join(t for t in slugify(s).replace("-", " ").split() if t not in STOPWORDS)
d = json.load(open(sys.argv[1]))
slug = sys.argv[2]
seo_title = sys.argv[3]
kw = d.get('keyword', '')
# Fix 1: slug sem keyword
slug_fixed = slug
if kw and slug and strip_sw(kw) not in strip_sw(slug):
fk_slug = slugify(kw)
orig_words = [w for w in slug.split('-') if w not in fk_slug.split('-') and len(w) > 3][:2]
slug_fixed = (fk_slug + ('-' + '-'.join(orig_words) if orig_words else ''))[:75].rstrip('-')
# Fix 2: seo_title sem número → adicionar ano
seo_fixed = seo_title
if seo_title and not re.search(r'\d', seo_title):
year = time.strftime('%Y')
# Inserir ano antes do separador "|" se existir, senão no fim do título
# Não truncar o texto principal — o ano vai imediatamente antes do "|"
if ' | ' in seo_title:
parts = seo_title.split(' | ', 1)
seo_fixed = f"{parts[0].rstrip()} {year} | {parts[1]}"
else:
t = seo_title.rstrip()
seo_fixed = (t[:55].rsplit(' ', 1)[0] + f' {year}') if len(t) > 55 else f'{t} {year}'
print(slug_fixed)
print(seo_fixed)
PYFIX
)"
# Aplicar valores corrigidos
WP_SLUG_NEW="$(echo "$_AUTOFIX" | sed -n '1p')"
WP_SEO_TITLE_NEW="$(echo "$_AUTOFIX" | sed -n '2p')"
if [[ -n "$WP_SLUG_NEW" && "$WP_SLUG_NEW" != "$WP_SLUG" ]]; then
log_info "EP${EP_PAD}: slug auto-corrigido: '${WP_SLUG}' → '${WP_SLUG_NEW}'"
WP_SLUG="$WP_SLUG_NEW"
fi
if [[ -n "$WP_SEO_TITLE_NEW" && "$WP_SEO_TITLE_NEW" != "$WP_SEO_TITLE" ]]; then
log_info "EP${EP_PAD}: seo_title auto-corrigido: ano adicionado → '${WP_SEO_TITLE_NEW}'"
WP_SEO_TITLE="$WP_SEO_TITLE_NEW"
fi
# Excerpt = meta + hashtags
WP_EXCERPT=""
if [[ -n "$WP_META" && -n "$WP_HASHTAGS" ]]; then
WP_EXCERPT="${WP_META}
${WP_HASHTAGS}"
fi
# Aplicar post_content
if [[ -n "$WP_CONTENT" ]]; then
ESCAPED_CONTENT="$(printf '%s' "$WP_CONTENT" | python3 -c "import sys; data=sys.stdin.read(); print(data.replace(\"'\", \"'\\\\''\" ))" 2>/dev/null || echo "$WP_CONTENT" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_content='${ESCAPED_CONTENT}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: post_content aplicado"
fi
# Aplicar excerpt
if [[ -n "$WP_EXCERPT" ]]; then
ESCAPED_EXCERPT="$(printf '%s' "$WP_EXCERPT" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_excerpt='${ESCAPED_EXCERPT}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: post_excerpt aplicado"
fi
# Aplicar slug
if [[ -n "$WP_SLUG" ]]; then
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_name='${WP_SLUG}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: slug actualizado -> ${WP_SLUG}"
fi
# Aplicar tags
if [[ -n "$WP_TAGS" ]]; then
ssh_cmd "cd '${WP_PATH}' && wp post term set ${POST_ID} post_tag ${WP_TAGS} --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: tags aplicadas"
fi
# Rank Math: description
if [[ -n "$WP_META" ]]; then
ESCAPED_META="$(echo "$WP_META" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_description '${ESCAPED_META}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: rank_math_description aplicado"
fi
# Rank Math: focus keyword
if [[ -n "$WP_KEYWORD" ]]; then
ESCAPED_KW="$(echo "$WP_KEYWORD" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_focus_keyword '${ESCAPED_KW}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: rank_math_focus_keyword aplicado"
fi
# Rank Math: SEO title
if [[ -z "$WP_SEO_TITLE" ]]; then
WP_TITLE_FALLBACK="$(jq -r '.title // empty' "$WP_JSON")"
[[ -n "$WP_TITLE_FALLBACK" ]] && WP_SEO_TITLE="${WP_TITLE_FALLBACK} | Podcast Descomplicar Digital"
fi
if [[ -n "$WP_SEO_TITLE" ]]; then
ESCAPED_SEO_TITLE="$(echo "$WP_SEO_TITLE" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_title '${ESCAPED_SEO_TITLE}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: rank_math_title aplicado"
fi
log_info "EP${EP_PAD}: Conteúdo WP aplicado com sucesso (post ${POST_ID})"
echo "${POST_ID}"
+177
View File
@@ -0,0 +1,177 @@
#!/usr/bin/env bash
# batch-prepare.sh — Pipeline completo para lote de episódios
# Uso: ./scripts/batch-prepare.sh [--dry-run]
# Para cada episódio: TTS → pós-produção → agendar no WordPress
# Retomável: salta episódios já prontos
set -uo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
DRY_RUN=false
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
# Verificar GEMINI_API_KEY
if [[ -z "${GEMINI_API_KEY:-}" ]]; then
log_error "GEMINI_API_KEY não definida. Exporta primeiro: export GEMINI_API_KEY=..."
exit 1
fi
# Lote: episódios e datas de publicação (ordem do calendário)
EP_ORDER=(65 132 137 66 67 134 139 69 78 79 80 81 82 83 84)
declare -A EP_DATES=(
[65]="2026-07-02"
[132]="2026-07-06"
[137]="2026-07-07"
[66]="2026-07-08"
[67]="2026-07-09"
[134]="2026-07-13"
[139]="2026-07-14"
[69]="2026-07-15"
[78]="2026-07-16"
[79]="2026-07-20"
[80]="2026-07-21"
[81]="2026-07-22"
[82]="2026-07-23"
[83]="2026-07-27"
[84]="2026-07-28"
)
BATCH_LOG="${LOG_DIR}/batch-$(date +%Y%m%d-%H%M%S).log"
mkdir -p "${LOG_DIR}"
declare -A RESULTS
_batch_log() {
local msg="$*"
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ${msg}" | tee -a "${BATCH_LOG}"
}
_batch_log "=== BATCH START — $(date) ==="
_batch_log "Episódios: ${EP_ORDER[*]}"
[[ "$DRY_RUN" == true ]] && _batch_log "MODO DRY-RUN activado"
for EP_NUM in "${EP_ORDER[@]}"; do
EP_PAD="$(pad_number "$EP_NUM")"
SCHED_DATE="${EP_DATES[$EP_NUM]}"
_batch_log "--- EP${EP_PAD} (${SCHED_DATE}) ---"
# Verificar se já está agendado (status ready no pipeline-state.json)
ALREADY_STATUS=$(jq -r --argjson n "$EP_NUM" \
'.episodes[] | select(.num == $n) | .status // ""' \
"${STATE_FILE}" 2>/dev/null || echo "")
if [[ "$ALREADY_STATUS" == "ready" ]]; then
_batch_log "EP${EP_PAD}: já agendado — a saltar"
RESULTS[$EP_NUM]="skip"
continue
fi
# --- Etapa 1: TTS ---
FINAL_MP3=""
for f in "${PROJECT_ROOT}/Episodios/Audios/final/ep_${EP_PAD}_"*.mp3; do
[[ -f "$f" ]] && FINAL_MP3="$f" && break
done
if [[ -z "$FINAL_MP3" ]]; then
_batch_log "EP${EP_PAD}: [1/3] A gerar áudio TTS..."
if [[ "$DRY_RUN" == false ]]; then
if ! bash "${SCRIPT_DIR}/generate-audio.sh" "$EP_NUM" >> "${BATCH_LOG}" 2>&1; then
_batch_log "EP${EP_PAD}: ERRO no TTS — a saltar episódio"
RESULTS[$EP_NUM]="erro_tts"
continue
fi
else
_batch_log "EP${EP_PAD}: [DRY-RUN] generate-audio.sh ${EP_NUM}"
fi
# --- Etapa 2: Pós-produção ---
RAW_FILES=()
for f in "${PROJECT_ROOT}/Episodios/Audios/raw/ep_${EP_PAD}_p"*.wav; do
[[ -f "$f" ]] && RAW_FILES+=("$f")
done
if [[ ${#RAW_FILES[@]} -eq 0 && "$DRY_RUN" == false ]]; then
_batch_log "EP${EP_PAD}: ERRO — sem ficheiros raw após TTS — a saltar episódio"
RESULTS[$EP_NUM]="erro_sem_raw"
continue
fi
_batch_log "EP${EP_PAD}: [2/3] Pós-produção (${#RAW_FILES[@]} partes)..."
if [[ "$DRY_RUN" == false ]]; then
if ! bash "${SCRIPT_DIR}/post-produce.sh" "$EP_NUM" "${RAW_FILES[@]}" >> "${BATCH_LOG}" 2>&1; then
_batch_log "EP${EP_PAD}: ERRO na pós-produção — a saltar episódio"
RESULTS[$EP_NUM]="erro_postprod"
continue
fi
else
_batch_log "EP${EP_PAD}: [DRY-RUN] post-produce.sh ${EP_NUM} <raw_files>"
fi
# Encontrar MP3 final
for f in "${PROJECT_ROOT}/Episodios/Audios/final/ep_${EP_PAD}_"*.mp3; do
[[ -f "$f" ]] && FINAL_MP3="$f" && break
done
if [[ -z "$FINAL_MP3" && "$DRY_RUN" == false ]]; then
_batch_log "EP${EP_PAD}: ERRO — MP3 final não encontrado após pós-produção"
RESULTS[$EP_NUM]="erro_sem_mp3"
continue
fi
else
_batch_log "EP${EP_PAD}: [1/3] Áudio já existe: $(basename "${FINAL_MP3}") — a saltar TTS+pós-prod"
fi
# --- Etapa 3: Agendar no WordPress ---
_batch_log "EP${EP_PAD}: [3/3] A agendar no WordPress para ${SCHED_DATE}..."
if [[ "$DRY_RUN" == false ]]; then
if ! bash "${SCRIPT_DIR}/schedule-episode.sh" "$EP_NUM" "$SCHED_DATE" >> "${BATCH_LOG}" 2>&1; then
_batch_log "EP${EP_PAD}: ERRO no agendamento"
RESULTS[$EP_NUM]="erro_schedule"
continue
fi
else
_batch_log "EP${EP_PAD}: [DRY-RUN] schedule-episode.sh ${EP_NUM} ${SCHED_DATE}"
fi
RESULTS[$EP_NUM]="ok"
_batch_log "EP${EP_PAD}: ✓ COMPLETO"
done
# Resumo final
_batch_log ""
_batch_log "=== RESUMO ==="
OK_COUNT=0
ERR_COUNT=0
SKIP_COUNT=0
for EP_NUM in "${EP_ORDER[@]}"; do
EP_PAD="$(pad_number "$EP_NUM")"
STATUS="${RESULTS[$EP_NUM]:-desconhecido}"
case "$STATUS" in
ok)
_batch_log "✓ EP${EP_PAD} — completo"
((OK_COUNT++)) || true
;;
skip)
_batch_log "⏭ EP${EP_PAD} — já agendado"
((SKIP_COUNT++)) || true
;;
*)
_batch_log "✗ EP${EP_PAD} — ERRO: ${STATUS}"
((ERR_COUNT++)) || true
;;
esac
done
_batch_log ""
_batch_log "Total: ${OK_COUNT} ok | ${SKIP_COUNT} saltados | ${ERR_COUNT} erros"
_batch_log "Log completo: ${BATCH_LOG}"
_batch_log "=== BATCH END — $(date) ==="
[[ "$ERR_COUNT" -gt 0 ]] && exit 1
exit 0
+68
View File
@@ -0,0 +1,68 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
# Episodios a produzir (ordem do calendario)
EPISODES=(136 37 38 39 40 41 42 44 135 46 47 48 49 50 51)
TOTAL=${#EPISODES[@]}
DONE=0
FAILED=0
log_info "=== BATCH PRODUCE: ${TOTAL} episodios ==="
for EP in "${EPISODES[@]}"; do
EP_PAD="$(pad_number "$EP")"
DONE=$((DONE + 1))
log_info "--- [${DONE}/${TOTAL}] EP${EP_PAD} ---"
# Step 1: Generate audio (split + TTS)
log_info "EP${EP_PAD}: Generating audio..."
if bash "${SCRIPT_DIR}/generate-audio.sh" "$EP" 2>&1; then
log_info "EP${EP_PAD}: Audio generation OK"
else
log_error "EP${EP_PAD}: Audio generation FAILED, skipping"
FAILED=$((FAILED + 1))
continue
fi
# Step 2: Collect raw audio parts
RAW_DIR="${PROJECT_ROOT}/Episodios/Audios/raw"
RAW_FILES=()
for p in "${RAW_DIR}/ep_${EP_PAD}_p"*.wav; do
[[ -f "$p" ]] && RAW_FILES+=("$p")
done
if [[ ${#RAW_FILES[@]} -eq 0 ]]; then
# Try alternate path with accent
RAW_DIR="${PROJECT_ROOT}/Episodios/Audios/raw"
for p in "${RAW_DIR}/ep_${EP_PAD}_p"*.wav; do
[[ -f "$p" ]] && RAW_FILES+=("$p")
done
fi
if [[ ${#RAW_FILES[@]} -eq 0 ]]; then
log_error "EP${EP_PAD}: No raw audio files found, skipping post-production"
FAILED=$((FAILED + 1))
continue
fi
# Sort parts numerically
IFS=$'\n' RAW_FILES_SORTED=($(printf '%s\n' "${RAW_FILES[@]}" | sort)); unset IFS
# Step 3: Post-produce
log_info "EP${EP_PAD}: Post-producing ${#RAW_FILES_SORTED[@]} parts..."
if bash "${SCRIPT_DIR}/post-produce.sh" "$EP" "${RAW_FILES_SORTED[@]}" 2>&1; then
log_info "EP${EP_PAD}: Post-production OK"
else
log_error "EP${EP_PAD}: Post-production FAILED"
FAILED=$((FAILED + 1))
continue
fi
log_info "EP${EP_PAD}: DONE"
done
log_info "=== BATCH COMPLETE: ${DONE} processed, $((DONE - FAILED)) OK, ${FAILED} failed ==="
+134
View File
@@ -0,0 +1,134 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
EPISODES=(88 89 55 56 133 57 58 59 60 61 62 63 138 140 64)
SPLIT_MARKER="$(jq -r '.tts_split_marker' "${PROJECT_ROOT}/config/audio-settings.json")"
MAX_WORDS=1000
RAW_DIR="${PROJECT_ROOT}/Episodios/Audios/raw"
TOTAL=${#EPISODES[@]}
DONE=0
FAILED=0
mkdir -p "$RAW_DIR"
split_and_generate() {
local ep_num="$1"
local ep_pad
ep_pad="$(pad_number "$ep_num")"
# Find script file
local script_file=""
for f in "${PROJECT_ROOT}/Episodios/Episodio_${ep_pad}_"*.txt; do
[[ -f "$f" ]] && script_file="$f" && break
done
[[ -z "$script_file" ]] && { log_error "EP${ep_pad}: No script found"; return 1; }
local total_words
total_words="$(wc -w < "$script_file")"
log_info "EP${ep_pad}: ${total_words} words from $(basename "$script_file")"
# Check if final MP3 already exists
local existing_mp3
existing_mp3="$(ls "${PROJECT_ROOT}/Episodios/Audios/final/ep_${ep_pad}_"*.mp3 2>/dev/null | head -1)"
if [[ -n "$existing_mp3" ]]; then
log_info "EP${ep_pad}: Final MP3 already exists, SKIPPING"
return 0
fi
# Split
local tmp_dir
tmp_dir="$(mktemp -d)"
local marker_line
marker_line="$(grep -nE "$SPLIT_MARKER" "$script_file" | head -1 | cut -d: -f1 || echo "")"
if [[ -n "$marker_line" && "$marker_line" -gt 1 ]]; then
head -n "$((marker_line - 1))" "$script_file" > "${tmp_dir}/corpo.txt"
tail -n "+${marker_line}" "$script_file" > "${tmp_dir}/faq.txt"
else
cp "$script_file" "${tmp_dir}/corpo.txt"
: > "${tmp_dir}/faq.txt"
fi
local corpo_words
corpo_words="$(wc -w < "${tmp_dir}/corpo.txt")"
local num_parts=0
if [[ "$corpo_words" -gt "$MAX_WORDS" ]]; then
local corpo_lines mid check line split_line
corpo_lines="$(wc -l < "${tmp_dir}/corpo.txt")"
mid=$((corpo_lines / 2))
split_line="$mid"
for offset in 0 1 -1 2 -2 3 -3 5 -5 10 -10; do
check=$((mid + offset))
if [[ "$check" -gt 0 && "$check" -lt "$corpo_lines" ]]; then
line="$(sed -n "${check}p" "${tmp_dir}/corpo.txt")"
if [[ -z "$line" || "$line" =~ ^[[:space:]]*$ ]]; then
split_line="$check"
break
fi
fi
done
head -n "$split_line" "${tmp_dir}/corpo.txt" > "${tmp_dir}/part_1.txt"
tail -n "+$((split_line + 1))" "${tmp_dir}/corpo.txt" > "${tmp_dir}/part_2.txt"
if [[ -s "${tmp_dir}/faq.txt" ]]; then
cp "${tmp_dir}/faq.txt" "${tmp_dir}/part_3.txt"
num_parts=3
else
num_parts=2
fi
else
cp "${tmp_dir}/corpo.txt" "${tmp_dir}/part_1.txt"
if [[ -s "${tmp_dir}/faq.txt" ]]; then
cp "${tmp_dir}/faq.txt" "${tmp_dir}/part_2.txt"
num_parts=2
else
num_parts=1
fi
fi
log_info "EP${ep_pad}: Split into ${num_parts} parts"
# Generate TTS for each part (skip if raw already exists)
local raw_files=()
for ((i=1; i<=num_parts; i++)); do
local part_file="${tmp_dir}/part_${i}.txt"
local output="${RAW_DIR}/ep_${ep_pad}_p${i}.wav"
local part_words
part_words="$(wc -w < "$part_file")"
if [[ -f "$output" ]]; then
log_info "EP${ep_pad}: Part ${i}/${num_parts} already exists (${part_words} words), skipping"
else
log_info "EP${ep_pad}: Generating part ${i}/${num_parts} (${part_words} words)..."
python3 "${SCRIPT_DIR}/tts-single-part.py" "$part_file" "$output"
log_info "EP${ep_pad}: Part ${i} done"
fi
raw_files+=("$output")
done
# Post-produce
log_info "EP${ep_pad}: Post-producing..."
bash "${SCRIPT_DIR}/post-produce.sh" "$ep_num" "${raw_files[@]}"
log_info "EP${ep_pad}: COMPLETE"
rm -rf "$tmp_dir"
}
log_info "=== BATCH TTS: ${TOTAL} episodios ==="
for EP in "${EPISODES[@]}"; do
DONE=$((DONE + 1))
log_info "--- [${DONE}/${TOTAL}] EP$(pad_number "$EP") ---"
if split_and_generate "$EP"; then
log_info "EP$(pad_number "$EP"): OK"
else
log_error "EP$(pad_number "$EP"): FAILED"
FAILED=$((FAILED + 1))
fi
done
log_info "=== BATCH COMPLETE: ${TOTAL} processed, $((TOTAL - FAILED)) OK, ${FAILED} failed ==="
+168
View File
@@ -0,0 +1,168 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
usage() {
echo "Usage: $0 <episode_number>"
echo " Generates TTS audio via Gemini API for the given episode."
echo " Requires GEMINI_API_KEY environment variable."
echo " Splits text into blocks if needed (max ~1000 words per block)."
exit 1
}
[[ $# -lt 1 ]] && usage
EP_NUM="$1"
EP_PAD="$(pad_number "$EP_NUM")"
ensure_state_file
# Check API key
if [[ -z "${GEMINI_API_KEY:-}" ]]; then
log_error "GEMINI_API_KEY not set. Export it first."
exit 1
fi
# Load settings
SETTINGS_FILE="${PROJECT_ROOT}/config/audio-settings.json"
TTS_MODEL="$(jq -r '.tts_model' "$SETTINGS_FILE")"
TTS_VOICE="$(jq -r '.tts_voice' "$SETTINGS_FILE")"
MAX_WORDS="$(jq -r '.tts_max_words_per_block' "$SETTINGS_FILE")"
SPLIT_MARKER="$(jq -r '.tts_split_marker' "$SETTINGS_FILE")"
# Find script file
SCRIPT_FILE=""
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*.txt; do
if [[ -f "$f" ]]; then
SCRIPT_FILE="$f"
break
fi
done
if [[ -z "$SCRIPT_FILE" ]]; then
log_error "EP${EP_PAD}: No script .txt found"
exit 1
fi
TOTAL_WORDS="$(wc -w < "$SCRIPT_FILE")"
log_info "EP${EP_PAD}: Script has ${TOTAL_WORDS} words, max per block: ${MAX_WORDS}"
# Output directory
RAW_DIR="${PROJECT_ROOT}/Episodios/Audios/raw"
mkdir -p "$RAW_DIR"
# Split if needed
TMP_DIR="$(mktemp -d)"
trap 'rm -rf "$TMP_DIR"' EXIT
if [[ "$TOTAL_WORDS" -le "$MAX_WORDS" ]]; then
cp "$SCRIPT_FILE" "${TMP_DIR}/part_1.txt"
NUM_PARTS=1
else
# Smart split: first at FAQ marker, then subdivide large parts
MARKER_LINE="$(grep -nE "$SPLIT_MARKER" "$SCRIPT_FILE" | head -1 | cut -d: -f1 || echo "")"
if [[ -n "$MARKER_LINE" && "$MARKER_LINE" -gt 1 ]]; then
head -n "$((MARKER_LINE - 1))" "$SCRIPT_FILE" > "${TMP_DIR}/corpo.txt"
tail -n "+${MARKER_LINE}" "$SCRIPT_FILE" > "${TMP_DIR}/faq.txt"
else
cp "$SCRIPT_FILE" "${TMP_DIR}/corpo.txt"
: > "${TMP_DIR}/faq.txt"
fi
# Subdivide corpo if too long
CORPO_WORDS="$(wc -w < "${TMP_DIR}/corpo.txt")"
if [[ "$CORPO_WORDS" -gt "$MAX_WORDS" ]]; then
CORPO_LINES="$(wc -l < "${TMP_DIR}/corpo.txt")"
MID=$((CORPO_LINES / 2))
# Find nearest paragraph break
SPLIT_LINE="$MID"
for offset in 0 1 -1 2 -2 3 -3 5 -5 10 -10; do
CHECK=$((MID + offset))
if [[ "$CHECK" -gt 0 && "$CHECK" -lt "$CORPO_LINES" ]]; then
LINE="$(sed -n "${CHECK}p" "${TMP_DIR}/corpo.txt")"
if [[ -z "$LINE" || "$LINE" =~ ^[[:space:]]*$ ]]; then
SPLIT_LINE="$CHECK"
break
fi
fi
done
head -n "$SPLIT_LINE" "${TMP_DIR}/corpo.txt" > "${TMP_DIR}/part_1.txt"
tail -n "+$((SPLIT_LINE + 1))" "${TMP_DIR}/corpo.txt" > "${TMP_DIR}/part_2.txt"
if [[ -s "${TMP_DIR}/faq.txt" ]]; then
cp "${TMP_DIR}/faq.txt" "${TMP_DIR}/part_3.txt"
NUM_PARTS=3
else
NUM_PARTS=2
fi
else
cp "${TMP_DIR}/corpo.txt" "${TMP_DIR}/part_1.txt"
if [[ -s "${TMP_DIR}/faq.txt" ]]; then
cp "${TMP_DIR}/faq.txt" "${TMP_DIR}/part_2.txt"
NUM_PARTS=2
else
NUM_PARTS=1
fi
fi
fi
log_info "EP${EP_PAD}: Split into ${NUM_PARTS} parts"
# TTS style prompt
STYLE="Lê este texto em português de Portugal (PT-PT), com um tom enérgico, confiante, educativo, inspirador e profissional. Mantém o ritmo natural e envolvente, como se estivesses a conversar diretamente com o ouvinte, transmitindo proximidade e autoridade. Faz pequenas pausas para dar ênfase às ideias-chave e assegura que cada transição entre temas é fluida. Evita soar robótico ou demasiado formal; o objetivo é informar, motivar e criar ligação com quem está a ouvir."
# Generate each part
AUDIO_FILES=()
for ((i=1; i<=NUM_PARTS; i++)); do
PART_FILE="${TMP_DIR}/part_${i}.txt"
PART_TEXT="$(cat "$PART_FILE")"
PART_WORDS="$(wc -w < "$PART_FILE")"
OUTPUT="${RAW_DIR}/ep_${EP_PAD}_p${i}.wav"
log_info "EP${EP_PAD}: Generating part ${i}/${NUM_PARTS} (${PART_WORDS} words)..."
python3 -c "
import wave, sys
from google import genai
from google.genai import types
client = genai.Client(api_key='${GEMINI_API_KEY}')
with open('${PART_FILE}', 'r') as f:
text = f.read()
response = client.models.generate_content(
model='${TTS_MODEL}',
contents='''${STYLE}''' + '\n\n' + text,
config=types.GenerateContentConfig(
response_modalities=['AUDIO'],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name='${TTS_VOICE}')
)
),
),
)
data = response.candidates[0].content.parts[0].inline_data.data
with wave.open('${OUTPUT}', 'wb') as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(24000)
wf.writeframes(data)
duration = (len(data) // 2) / 24000
print(f'{duration:.0f}')
"
DURATION="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$OUTPUT")"
log_info "EP${EP_PAD}: Part ${i} done: ${DURATION%.*}s"
AUDIO_FILES+=("$OUTPUT")
done
# Update state
set_episode_field "$EP_NUM" "status" "audio_done"
# Print output files for post-produce.sh
log_info "EP${EP_PAD}: All ${NUM_PARTS} parts generated. Run post-production:"
echo "bash scripts/post-produce.sh ${EP_NUM} ${AUDIO_FILES[*]}"
+160
View File
@@ -0,0 +1,160 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
usage() {
echo "Usage: $0 <episode_number>"
echo " Generates podcast script and WP description for the given episode."
echo " Reads episode data from config/episode-guide-map.json."
echo " Skips script generation if .txt already exists."
exit 1
}
[[ $# -lt 1 ]] && usage
EP_NUM="$1"
EP_PAD="$(pad_number "$EP_NUM")"
ensure_state_file
# Load episode data from map
MAP_FILE="${PROJECT_ROOT}/config/episode-guide-map.json"
EP_DATA="$(jq -r --arg n "$EP_NUM" '.[$n] // empty' "$MAP_FILE")"
if [[ -z "$EP_DATA" ]]; then
log_error "Episode $EP_NUM not found in episode-guide-map.json"
exit 1
fi
GUIDE_TITLE="$(echo "$EP_DATA" | jq -r '.guide_title')"
PODCAST_TITLE="$(echo "$EP_DATA" | jq -r '.podcast_title')"
GUIDE_URL="$(echo "$EP_DATA" | jq -r '.guide_url')"
SAFE_TITLE="$(echo "$PODCAST_TITLE" | sed 's/[^a-zA-Z0-9]/_/g' | sed 's/__*/_/g' | sed 's/_$//')"
SCRIPT_FILE="${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_${SAFE_TITLE}.txt"
WP_FILE="${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_${SAFE_TITLE}_wp.json"
# Check if script already exists (reuse existing .txt files)
EXISTING_SCRIPT=""
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*.txt; do
if [[ -f "$f" ]]; then
EXISTING_SCRIPT="$f"
SCRIPT_FILE="$f"
break
fi
done
SERVICES="$(cat "${PROJECT_ROOT}/config/service-links.json")"
SCRIPT_PROMPT="$(cat "${PROJECT_ROOT}/prompts/generate-script.md")"
WP_PROMPT="$(cat "${PROJECT_ROOT}/prompts/generate-wp-description.md")"
# Step 1: Generate script (or skip if exists)
if [[ -n "$EXISTING_SCRIPT" ]]; then
log_info "EP${EP_PAD}: Script already exists at ${EXISTING_SCRIPT}, skipping generation"
else
log_info "EP${EP_PAD}: Generating podcast script for '${PODCAST_TITLE}'"
FULL_PROMPT="${SCRIPT_PROMPT}
---
## Dados do episodio
- Numero: ${EP_NUM}
- Titulo: ${PODCAST_TITLE}
- Guia de referencia: ${GUIDE_TITLE}
- URL do guia: ${GUIDE_URL}
## Links de servicos Descomplicar (usar quando contextual)
${SERVICES}
## Instrucao
Gera o guiao completo seguindo a estrutura obrigatoria acima. Output apenas o texto limpo."
echo "$FULL_PROMPT" | claude --print > "$SCRIPT_FILE"
log_info "EP${EP_PAD}: Script saved to ${SCRIPT_FILE}"
fi
# Step 2: Generate WP description
log_info "EP${EP_PAD}: Generating WordPress description"
SCRIPT_CONTENT="$(cat "$SCRIPT_FILE")"
WP_FULL_PROMPT="${WP_PROMPT}
---
## Dados do episodio
- Numero: ${EP_NUM}
- Titulo: ${PODCAST_TITLE}
- URL do guia relacionado: ${GUIDE_URL}
## Links de servicos Descomplicar (USAR APENAS ESTES — nunca inventar)
${SERVICES}
## Guiao do episodio (base para a descricao)
${SCRIPT_CONTENT}
## Instrucao
Gera o JSON com a descricao WordPress completa. Output APENAS JSON valido."
echo "$WP_FULL_PROMPT" | claude --print > "$WP_FILE"
# Validate and auto-repair JSON output
if ! python3 -c "import json; json.load(open('$WP_FILE'))" 2>/dev/null; then
log_warn "EP${EP_PAD}: WP JSON inválido — a tentar reparação automática..."
python3 - "$WP_FILE" << 'PYFIX'
import sys, re, json
filepath = sys.argv[1]
with open(filepath, 'r', encoding='utf-8') as f:
raw = f.read()
# Encontrar e corrigir aspas não escapadas no content_html
marker = '"content_html": "'
start = raw.find(marker)
if start == -1:
print("SKIP: content_html não encontrado")
sys.exit(1)
content_start = start + len(marker)
end_pattern = re.search(r'",\s*\n\s*"hashtags"', raw[content_start:])
if not end_pattern:
end_pattern = re.search(r'",\s*\n\s*"wp_tags"', raw[content_start:])
if not end_pattern:
print("SKIP: fim do content_html não encontrado")
sys.exit(1)
content_end = content_start + end_pattern.start()
raw_content = raw[content_start:content_end]
fixed_content = re.sub(r'(?<!\\)"', '\\"', raw_content)
fixed_raw = raw[:content_start] + fixed_content + raw[content_end:]
try:
json.loads(fixed_raw)
with open(filepath, 'w', encoding='utf-8') as f:
f.write(fixed_raw)
print("JSON reparado com sucesso")
except Exception as e:
print(f"Reparação falhou: {e}")
sys.exit(1)
PYFIX
if python3 -c "import json; json.load(open('$WP_FILE'))" 2>/dev/null; then
log_info "EP${EP_PAD}: JSON reparado automaticamente"
else
log_error "EP${EP_PAD}: JSON inválido após reparação — verificação manual necessária"
fi
else
log_info "EP${EP_PAD}: WP description saved to ${WP_FILE}"
fi
# Update state
STATUS="$(get_episode_status "$EP_NUM")"
if [[ "$STATUS" == "not_found" ]]; then
add_episode "$EP_NUM" "$PODCAST_TITLE" "$GUIDE_URL"
fi
set_episode_field "$EP_NUM" "status" "script_done"
set_episode_field "$EP_NUM" "script_path" "$(basename "$SCRIPT_FILE")"
set_episode_field "$EP_NUM" "wp_data_path" "$(basename "$WP_FILE")"
log_info "EP${EP_PAD}: Content generation complete"
+48
View File
@@ -0,0 +1,48 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
usage() {
echo "Usage: $0 [start_episode] [count]"
echo " Generates canva-bulk.csv for cover image generation."
echo " Default: next 7 episodes from pipeline state."
exit 1
}
ensure_state_file
START="${1:-$(jq -r '.next_episode' "$STATE_FILE")}"
COUNT="${2:-7}"
END=$((START + COUNT - 1))
MAP_FILE="${PROJECT_ROOT}/config/episode-guide-map.json"
CSV_FILE="${PROJECT_ROOT}/canva-bulk.csv"
echo "number,title,keyword" > "$CSV_FILE"
for ((ep=START; ep<=END; ep++)); do
EP_DATA="$(jq -r --arg n "$ep" '.[$n] // empty' "$MAP_FILE")"
if [[ -z "$EP_DATA" ]]; then
log_warn "Episode $ep not found in map, skipping"
continue
fi
TITLE="$(echo "$EP_DATA" | jq -r '.podcast_title')"
# Try to get keyword from WP JSON if it exists
EP_PAD="$(pad_number "$ep")"
KEYWORD=""
for wp_file in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*_wp.json; do
if [[ -f "$wp_file" ]]; then
KEYWORD="$(jq -r '.keyword // ""' "$wp_file" 2>/dev/null || echo "")"
break
fi
done
[[ -z "$KEYWORD" ]] && KEYWORD="$TITLE"
echo "${ep},\"${TITLE}\",\"${KEYWORD}\"" >> "$CSV_FILE"
done
log_info "Generated Canva CSV: ${CSV_FILE} (episodes ${START}-${END})"
echo "CSV saved to: ${CSV_FILE}"
+71
View File
@@ -0,0 +1,71 @@
#!/usr/bin/env bash
set -euo pipefail
PROJECT_ROOT="/media/ealmeida/Dados/Hub/05-Projectos/Podcast-Descomplicar-Digital"
STATE_FILE="${PROJECT_ROOT}/pipeline-state.json"
LOG_DIR="${PROJECT_ROOT}/logs"
# Auto-load .env if present
if [[ -f "${PROJECT_ROOT}/.env" ]]; then
set -a
source "${PROJECT_ROOT}/.env"
set +a
fi
log() {
local level="$1"; shift
local msg="$*"
local ts
ts="$(date '+%Y-%m-%d %H:%M:%S')"
mkdir -p "${LOG_DIR}"
echo "[${ts}] [${level}] ${msg}" | tee -a "${LOG_DIR}/pipeline-$(date '+%Y-%m-%d').log"
}
log_info() { log "INFO" "$@"; }
log_warn() { log "WARN" "$@"; }
log_error() { log "ERROR" "$@"; }
ensure_state_file() {
if [[ ! -f "${STATE_FILE}" ]]; then
echo '{"last_updated":"","next_episode":20,"next_publish_date":"","publish_time":"07:00","publish_days":["mon","tue","wed","thu","fri"],"episodes":[]}' | jq '.' > "${STATE_FILE}"
log_info "Created new pipeline-state.json"
fi
}
get_episode_status() {
local ep_num="$1"
jq -r --argjson n "$ep_num" '.episodes[] | select(.number == $n) | .status // "not_found"' "${STATE_FILE}" 2>/dev/null || echo "not_found"
}
set_episode_field() {
local ep_num="$1" field="$2" value="$3"
local tmp
tmp="$(mktemp)"
jq --argjson n "$ep_num" --arg f "$field" --arg v "$value" \
'(.episodes[] | select(.number == $n))[$f] = $v | .last_updated = (now | todate)' \
"${STATE_FILE}" > "$tmp" && mv "$tmp" "${STATE_FILE}"
}
add_episode() {
local ep_num="$1" title="$2" guide_url="$3"
local tmp
tmp="$(mktemp)"
jq --argjson n "$ep_num" --arg t "$title" --arg g "$guide_url" \
'.episodes += [{"number":$n,"title":$t,"source_guide_url":$g,"status":"pending","script_path":null,"wp_data_path":null,"audio_raw_path":null,"audio_final_path":null,"cover_path":null,"wp_post_id":null,"scheduled_date":null}] | .last_updated = (now | todate)' \
"${STATE_FILE}" > "$tmp" && mv "$tmp" "${STATE_FILE}"
}
next_weekday() {
local base_date="$1"
local d
d="$(date -d "${base_date}" '+%u')"
if [[ "$d" -ge 5 ]]; then
date -d "${base_date} + $((8 - d)) days" '+%Y-%m-%d'
else
date -d "${base_date} + 1 day" '+%Y-%m-%d'
fi
}
pad_number() {
printf '%03d' "$1"
}
+154
View File
@@ -0,0 +1,154 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
usage() {
echo "Usage: $0 <episode_number> <raw_audio_path> [raw_audio_path_2 ...]"
echo " Applies intro, outro, and loudness normalization to raw TTS audio."
echo " Accepts multiple audio parts that will be concatenated in order."
exit 1
}
[[ $# -lt 2 ]] && usage
EP_NUM="$1"; shift
RAW_PARTS=("$@")
EP_PAD="$(pad_number "$EP_NUM")"
ensure_state_file
for f in "${RAW_PARTS[@]}"; do
if [[ ! -f "$f" ]]; then
log_error "EP${EP_PAD}: Raw audio file not found: ${f}"
exit 1
fi
done
# Load audio settings
SETTINGS_FILE="${PROJECT_ROOT}/config/audio-settings.json"
INTRO_FILE="${PROJECT_ROOT}/$(jq -r '.intro_file' "$SETTINGS_FILE")"
OUTRO_FILE="${PROJECT_ROOT}/$(jq -r '.outro_file' "$SETTINGS_FILE")"
FADE_IN="$(jq -r '.fade_in_duration' "$SETTINGS_FILE")"
BG_DUR="$(jq -r '.background_duration' "$SETTINGS_FILE")"
BG_VOL="$(jq -r '.background_volume_db' "$SETTINGS_FILE")"
FADE_OUT="$(jq -r '.fade_out_duration' "$SETTINGS_FILE")"
LUFS="$(jq -r '.loudness_target_lufs' "$SETTINGS_FILE")"
BITRATE="$(jq -r '.export_bitrate' "$SETTINGS_FILE")"
SAMPLE_RATE="$(jq -r '.export_sample_rate' "$SETTINGS_FILE")"
MIN_DUR="$(jq -r '.min_duration_minutes' "$SETTINGS_FILE")"
MAX_DUR="$(jq -r '.max_duration_minutes' "$SETTINGS_FILE")"
# Verify input files
for f in "$INTRO_FILE" "$OUTRO_FILE"; do
if [[ ! -f "$f" ]]; then
log_error "EP${EP_PAD}: Required audio file not found: ${f}"
exit 1
fi
done
# Create output directory
OUTPUT_DIR="${PROJECT_ROOT}/Episodios/Audios/final"
mkdir -p "$OUTPUT_DIR"
# Get podcast title for filename
MAP_FILE="${PROJECT_ROOT}/config/episode-guide-map.json"
PODCAST_TITLE="$(jq -r --arg n "$EP_NUM" '.[$n].podcast_title // "episodio"' "$MAP_FILE")"
SAFE_TITLE="$(echo "$PODCAST_TITLE" | sed 's/[^a-zA-Z0-9]/-/g' | sed 's/--*/-/g' | sed 's/-$//' | tr '[:upper:]' '[:lower:]')"
OUTPUT_FILE="${OUTPUT_DIR}/ep_${EP_PAD}_${SAFE_TITLE}.mp3"
# Temp directory
TMP_DIR="$(mktemp -d)"
trap 'rm -rf "$TMP_DIR"' EXIT
log_info "EP${EP_PAD}: Starting post-production"
# Concatenate raw audio parts if multiple
if [[ "${#RAW_PARTS[@]}" -gt 1 ]]; then
log_info "EP${EP_PAD}: Concatenating ${#RAW_PARTS[@]} audio parts"
CONCAT_LIST="${TMP_DIR}/concat_list.txt"
for part in "${RAW_PARTS[@]}"; do
# Normalize each part to same format first
PART_BASE="$(basename "$part" | sed 's/\.[^.]*$//')"
ffmpeg -y -v quiet -i "$part" -af "highshelf=f=4000:g=-6" -ar "$SAMPLE_RATE" -ac 2 "${TMP_DIR}/${PART_BASE}_norm.wav"
echo "file '${TMP_DIR}/${PART_BASE}_norm.wav'" >> "$CONCAT_LIST"
done
ffmpeg -y -v quiet -f concat -safe 0 -i "$CONCAT_LIST" -c copy "${TMP_DIR}/raw_combined.wav"
RAW_COMBINED="${TMP_DIR}/raw_combined.wav"
else
RAW_COMBINED="${RAW_PARTS[0]}"
fi
# Get duration of raw audio
RAW_DUR="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$RAW_COMBINED")"
RAW_DUR_INT="${RAW_DUR%.*}"
log_info "EP${EP_PAD}: Raw audio duration: ${RAW_DUR_INT}s"
# Create intro background: The Inspiring at low volume for first 30s
ffmpeg -y -v quiet \
-i "$INTRO_FILE" \
-af "afade=t=in:st=0:d=${FADE_IN},volume=${BG_VOL}dB,afade=t=out:st=$((BG_DUR - 3)):d=3" \
-t "$BG_DUR" \
-ar "$SAMPLE_RATE" -ac 2 \
"${TMP_DIR}/intro_bg.wav"
# Prepare raw audio (ensure stereo, correct sample rate, de-ess)
ffmpeg -y -v quiet \
-i "$RAW_COMBINED" \
-af "highshelf=f=4000:g=-6" \
-ar "$SAMPLE_RATE" -ac 2 \
"${TMP_DIR}/tts_stereo.wav"
# Mix intro background with beginning of TTS
ffmpeg -y -v quiet \
-i "${TMP_DIR}/tts_stereo.wav" \
-i "${TMP_DIR}/intro_bg.wav" \
-filter_complex "[1]apad=whole_dur=${RAW_DUR_INT}[bg];[0][bg]amix=inputs=2:duration=first:dropout_transition=3[mixed]" \
-map "[mixed]" \
-ar "$SAMPLE_RATE" -ac 2 \
"${TMP_DIR}/tts_with_intro.wav"
# Get outro duration
OUTRO_DUR="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$OUTRO_FILE")"
OUTRO_DUR_INT="${OUTRO_DUR%.*}"
# Create ending: outro audio + The Inspiring fade-out underneath
ffmpeg -y -v quiet \
-i "$OUTRO_FILE" \
-i "$INTRO_FILE" \
-filter_complex "[1]volume=${BG_VOL}dB,afade=t=out:st=$((OUTRO_DUR_INT - FADE_OUT)):d=${FADE_OUT},atrim=0:${OUTRO_DUR_INT}[music];[0][music]amix=inputs=2:duration=first[out]" \
-map "[out]" \
-ar "$SAMPLE_RATE" -ac 2 \
"${TMP_DIR}/outro_mixed.wav"
# Concatenate TTS (with intro) + outro
ffmpeg -y -v quiet \
-i "${TMP_DIR}/tts_with_intro.wav" \
-i "${TMP_DIR}/outro_mixed.wav" \
-filter_complex "[0][1]concat=n=2:v=0:a=1[out]" \
-map "[out]" \
-ar "$SAMPLE_RATE" -ac 2 \
"${TMP_DIR}/full_episode.wav"
# Normalize loudness and export as MP3
ffmpeg -y -v quiet \
-i "${TMP_DIR}/full_episode.wav" \
-af "loudnorm=I=${LUFS}:TP=-1.5:LRA=11" \
-ar "$SAMPLE_RATE" -ac 2 \
-b:a "$BITRATE" \
"$OUTPUT_FILE"
# Validate duration
FINAL_DUR="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$OUTPUT_FILE")"
FINAL_MIN="$(echo "${FINAL_DUR%.*} / 60" | bc)"
log_info "EP${EP_PAD}: Final duration: ${FINAL_MIN} minutes (${FINAL_DUR%.*}s)"
if [[ "$FINAL_MIN" -lt "$MIN_DUR" ]] || [[ "$FINAL_MIN" -gt "$MAX_DUR" ]]; then
log_warn "EP${EP_PAD}: Duration ${FINAL_MIN}min is outside expected range (${MIN_DUR}-${MAX_DUR}min)"
fi
# Update state
set_episode_field "$EP_NUM" "status" "produced"
set_episode_field "$EP_NUM" "audio_final_path" "$(basename "$OUTPUT_FILE")"
log_info "EP${EP_PAD}: Post-production complete -> ${OUTPUT_FILE}"
+140
View File
@@ -0,0 +1,140 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
usage() {
echo "Usage: $0 <episode_number> <scheduled_date>"
echo " Publishes episode to WordPress via wp-cli over SSH."
echo " scheduled_date format: YYYY-MM-DD"
echo " Requires: final audio, WP JSON, cover image (optional)."
exit 1
}
[[ $# -lt 2 ]] && usage
EP_NUM="$1"
SCHED_DATE="$2"
EP_PAD="$(pad_number "$EP_NUM")"
PUBLISH_TIME="$(jq -r '.publish_time' "$STATE_FILE")"
ensure_state_file
# Find required files
AUDIO_FILE=""
for f in "${PROJECT_ROOT}/Episodios/Audios/final/ep_${EP_PAD}_"*.mp3; do
[[ -f "$f" ]] && AUDIO_FILE="$f" && break
done
WP_JSON=""
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*_wp.json; do
[[ -f "$f" ]] && WP_JSON="$f" && break
done
COVER_FILE=""
while IFS= read -r -d '' f; do
COVER_FILE="$f"
break
done < <(find "${PROJECT_ROOT}/Episodios/Capas_PodCast" -name "ep_${EP_PAD}_*" \( -name "*.jpg" -o -name "*.png" \) -print0 2>/dev/null)
# Validate required files
if [[ -z "$AUDIO_FILE" || ! -f "$AUDIO_FILE" ]]; then
log_error "EP${EP_PAD}: Audio file not found in Audios/final/"
exit 1
fi
if [[ -z "$WP_JSON" || ! -f "$WP_JSON" ]]; then
log_error "EP${EP_PAD}: WP JSON file not found"
exit 1
fi
if [[ -z "$COVER_FILE" ]]; then
log_warn "EP${EP_PAD}: Cover image not found, publishing without featured image"
fi
# Read WP data
TITLE="$(jq -r '.title' "$WP_JSON")"
CONTENT="$(jq -r '.content_html' "$WP_JSON")"
META_DESC="$(jq -r '.meta_description' "$WP_JSON")"
TAGS="$(jq -r '.wp_tags | join(",")' "$WP_JSON")"
# Get audio metadata
DURATION="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$AUDIO_FILE")"
DUR_MIN=$((${DURATION%.*} / 60))
DUR_SEC=$((${DURATION%.*} % 60))
DURATION_FMT="$(printf '%d:%02d' "$DUR_MIN" "$DUR_SEC")"
FILESIZE="$(du -h "$AUDIO_FILE" | cut -f1)"
FILESIZE_RAW="$(stat -c%s "$AUDIO_FILE")"
log_info "EP${EP_PAD}: Publishing '${TITLE}' scheduled for ${SCHED_DATE} ${PUBLISH_TIME}"
# Generate wp-cli commands for SSH execution
CMDS_FILE="${PROJECT_ROOT}/logs/publish_${EP_PAD}_commands.sh"
mkdir -p "${PROJECT_ROOT}/logs"
AUDIO_BASENAME="$(basename "$AUDIO_FILE")"
YEAR="$(date -d "$SCHED_DATE" '+%Y')"
MONTH="$(date -d "$SCHED_DATE" '+%m')"
UPLOAD_PATH="wp-content/uploads/podcast/${YEAR}/${MONTH}"
# Escape single quotes in content
ESCAPED_CONTENT="$(echo "$CONTENT" | sed "s/'/'\\\\''/g")"
ESCAPED_META="$(echo "$META_DESC" | sed "s/'/'\\\\''/g")"
ESCAPED_TITLE="$(echo "$TITLE" | sed "s/'/'\\\\''/g")"
cat > "$CMDS_FILE" << CMDEOF
#!/usr/bin/env bash
# Auto-generated publish commands for EP${EP_PAD}
# Run via SSH MCP on server (user: ealmeida, path: /home/ealmeida/public_html)
set -euo pipefail
WP_PATH="/home/ealmeida/public_html"
UPLOAD_DIR="\${WP_PATH}/${UPLOAD_PATH}"
# 1. Create upload directory
mkdir -p "\${UPLOAD_DIR}"
# 2. Audio file must be uploaded to server first (via sftp MCP)
# Source: ${AUDIO_FILE}
# Target: \${UPLOAD_DIR}/${AUDIO_BASENAME}
# 3. Create podcast post
POST_ID=\$(wp post create \\
--post_type=podcast \\
--post_title='${ESCAPED_TITLE}' \\
--post_status=future \\
--post_date='${SCHED_DATE} ${PUBLISH_TIME}:00' \\
--tags_input='${TAGS}' \\
--porcelain \\
--allow-root \\
--path="\${WP_PATH}")
echo "Created post: \${POST_ID}"
# 4. Add content (separate to avoid shell escaping issues)
wp post update \${POST_ID} --post_content='${ESCAPED_CONTENT}' --allow-root --path="\${WP_PATH}"
# 5. Set SSP meta fields
wp post meta update \${POST_ID} episode_type audio --allow-root --path="\${WP_PATH}"
wp post meta update \${POST_ID} audio_file "https://descomplicar.pt/${UPLOAD_PATH}/${AUDIO_BASENAME}" --allow-root --path="\${WP_PATH}"
wp post meta update \${POST_ID} duration "${DURATION_FMT}" --allow-root --path="\${WP_PATH}"
wp post meta update \${POST_ID} filesize "${FILESIZE}" --allow-root --path="\${WP_PATH}"
wp post meta update \${POST_ID} filesize_raw "${FILESIZE_RAW}" --allow-root --path="\${WP_PATH}"
# 6. Set Rank Math meta description
wp post meta update \${POST_ID} rank_math_description '${ESCAPED_META}' --allow-root --path="\${WP_PATH}"
# 7. Fix permissions
chown -R ealmeida:ealmeida "\${UPLOAD_DIR}"
echo "EP${EP_PAD} published as post \${POST_ID}, scheduled for ${SCHED_DATE} ${PUBLISH_TIME}"
CMDEOF
chmod +x "$CMDS_FILE"
# Update state
set_episode_field "$EP_NUM" "status" "published"
set_episode_field "$EP_NUM" "scheduled_date" "$SCHED_DATE"
log_info "EP${EP_PAD}: Publish commands saved to ${CMDS_FILE}"
log_info "EP${EP_PAD}: Upload audio via SFTP, then run commands via SSH"
+292
View File
@@ -0,0 +1,292 @@
#!/usr/bin/env bash
set -euo pipefail
# schedule-episode.sh — Agenda episódio completo no WordPress
# Envia MP3 + capa via SCP, importa media, cria post com todos os metas
#
# Uso: ./scripts/schedule-episode.sh <ep_num> <YYYY-MM-DD> [--dry-run]
#
# Requisitos locais:
# - MP3 em Episodios/Audios/final/ep_NNN_*.mp3
# - Capa em banco-media: capas-geradas/podcast/podcast-epNNN-*.png
# - ffprobe (para duração)
#
# Requisitos servidor:
# - wp-cli com --allow-root
# - SSH porta 9443, chave ~/.ssh/id_ed25519
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
# SSH config
SSH_KEY="${HOME}/.ssh/id_ed25519"
SSH_PORT=9443
SSH_HOST="server.descomplicar.pt"
SSH_USER="root"
SSH_OPTS="-o IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
WP_PATH="/home/ealmeida/public_html"
WP_OWNER="ealmeida:ealmeida"
SITE_URL="https://descomplicar.pt"
SERIES_SLUG="podcast-descomplicar-digital"
# Banco de media (capas)
CAPAS_DIR="/media/ealmeida/Dados/Hub/06-Operacoes/Conteúdos/banco-media/capas-geradas/podcast"
DRY_RUN=false
usage() {
echo "Usage: $0 <ep_num> <YYYY-MM-DD> [--dry-run]"
echo " Agenda episodio completo no WordPress (audio + capa + metas + SEO)"
exit 1
}
ssh_cmd() {
SSH_AUTH_SOCK= ssh -p "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "${SSH_USER}@${SSH_HOST}" "$@"
}
scp_file() {
SSH_AUTH_SOCK= scp -P "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "$1" "${SSH_USER}@${SSH_HOST}:$2"
}
[[ $# -lt 2 ]] && usage
EP_NUM="$1"
SCHED_DATE="$2"
[[ "${3:-}" == "--dry-run" ]] && DRY_RUN=true
EP_PAD="$(pad_number "$EP_NUM")"
# === 1. Encontrar ficheiros locais ===
AUDIO_FILE=""
for f in "${PROJECT_ROOT}/Episodios/Audios/final/ep_${EP_PAD}_"*.mp3; do
[[ -f "$f" ]] && AUDIO_FILE="$f" && break
done
COVER_FILE=""
for f in "${CAPAS_DIR}/podcast-ep${EP_PAD}-"*.png; do
[[ -f "$f" ]] && COVER_FILE="$f" && break
done
GUIDE_FILE=""
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*.txt; do
[[ -f "$f" ]] && GUIDE_FILE="$f" && break
done
# Validar
[[ -z "$AUDIO_FILE" ]] && log_error "EP${EP_PAD}: MP3 nao encontrado em Episodios/Audios/final/" && exit 1
[[ -z "$COVER_FILE" ]] && log_error "EP${EP_PAD}: Capa PNG nao encontrada em ${CAPAS_DIR}/" && exit 1
[[ -z "$GUIDE_FILE" ]] && log_warn "EP${EP_PAD}: Guiao .txt nao encontrado (conteudo WP ficara vazio)"
# === 2. Extrair metadata do audio ===
DURATION_RAW=$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$AUDIO_FILE")
DUR_SEC_TOTAL=${DURATION_RAW%.*}
DUR_MIN=$((DUR_SEC_TOTAL / 60))
DUR_SEC=$((DUR_SEC_TOTAL % 60))
DURATION_FMT="$(printf '%d:%02d' "$DUR_MIN" "$DUR_SEC")"
FILESIZE_H="$(du -h "$AUDIO_FILE" | cut -f1)"
FILESIZE_RAW="$(stat -c%s "$AUDIO_FILE")"
AUDIO_BASENAME="$(basename "$AUDIO_FILE")"
# Extrair titulo do nome do ficheiro do guiao
TITLE=""
if [[ -n "$GUIDE_FILE" ]]; then
TITLE="$(basename "$GUIDE_FILE" .txt | sed 's/^Episodio_[0-9]*_//' | tr '_' ' ')"
fi
# Fallback do audio
[[ -z "$TITLE" ]] && TITLE="$(basename "$AUDIO_FILE" .mp3 | sed 's/^ep_[0-9]*_//' | tr '-' ' ')"
# Upload paths
YEAR="$(date -d "$SCHED_DATE" '+%Y')"
MONTH="$(date -d "$SCHED_DATE" '+%m')"
AUDIO_REMOTE_DIR="${WP_PATH}/wp-content/uploads/podcast/${YEAR}/${MONTH}"
AUDIO_URL="${SITE_URL}/wp-content/uploads/podcast/${YEAR}/${MONTH}/${AUDIO_BASENAME}"
log_info "EP${EP_PAD}: '${TITLE}' | ${DURATION_FMT} | ${FILESIZE_H} | ${SCHED_DATE} 07:00"
if $DRY_RUN; then
log_info "[DRY-RUN] Audio: ${AUDIO_FILE}"
log_info "[DRY-RUN] Capa: ${COVER_FILE}"
log_info "[DRY-RUN] URL audio: ${AUDIO_URL}"
log_info "[DRY-RUN] Titulo: ${TITLE}"
exit 0
fi
# === 3. Enviar MP3 para o servidor ===
log_info "EP${EP_PAD}: Enviar MP3..."
ssh_cmd "mkdir -p '${AUDIO_REMOTE_DIR}'"
scp_file "$AUDIO_FILE" "${AUDIO_REMOTE_DIR}/"
ssh_cmd "chown ${WP_OWNER} '${AUDIO_REMOTE_DIR}/${AUDIO_BASENAME}'"
log_info "EP${EP_PAD}: MP3 enviado -> ${AUDIO_REMOTE_DIR}/${AUDIO_BASENAME}"
# === 4. Enviar capa e importar no WP ===
log_info "EP${EP_PAD}: Enviar capa..."
COVER_BASENAME="$(basename "$COVER_FILE")"
ssh_cmd "mkdir -p /tmp/podcast-upload"
scp_file "$COVER_FILE" "/tmp/podcast-upload/${COVER_BASENAME}"
ssh_cmd "chown ${WP_OWNER} '/tmp/podcast-upload/${COVER_BASENAME}'"
ATTACH_ID=$(ssh_cmd "cd '${WP_PATH}' && wp media import '/tmp/podcast-upload/${COVER_BASENAME}' --title='${COVER_BASENAME%.png}' --porcelain --allow-root 2>/dev/null")
ssh_cmd "rm -f '/tmp/podcast-upload/${COVER_BASENAME}'"
if [[ -z "$ATTACH_ID" ]]; then
log_error "EP${EP_PAD}: Falha ao importar capa no WP"
exit 1
fi
log_info "EP${EP_PAD}: Capa importada (attach_id: ${ATTACH_ID})"
# === 5. Criar post podcast agendado ===
log_info "EP${EP_PAD}: Criar post..."
POST_ID=$(ssh_cmd "cd '${WP_PATH}' && wp post create \
--post_type=podcast \
--post_title='$(echo "$TITLE" | sed "s/'/'\\\\''/g")' \
--post_status=future \
--post_date='${SCHED_DATE} 07:00:00' \
--porcelain \
--allow-root 2>/dev/null")
if [[ -z "$POST_ID" ]]; then
log_error "EP${EP_PAD}: Falha ao criar post"
exit 1
fi
log_info "EP${EP_PAD}: Post criado (ID: ${POST_ID})"
# === 6. Associar serie e featured image ===
ssh_cmd "cd '${WP_PATH}' && \
wp post term set ${POST_ID} series '${SERIES_SLUG}' --allow-root 2>/dev/null && \
wp post meta update ${POST_ID} _thumbnail_id ${ATTACH_ID} --allow-root 2>/dev/null"
# === 7. Metas SSP (Seriously Simple Podcasting) ===
ssh_cmd "cd '${WP_PATH}' && \
wp post meta update ${POST_ID} episode_type audio --allow-root 2>/dev/null && \
wp post meta update ${POST_ID} audio_file '${AUDIO_URL}' --allow-root 2>/dev/null && \
wp post meta update ${POST_ID} duration '${DURATION_FMT}' --allow-root 2>/dev/null && \
wp post meta update ${POST_ID} filesize '${FILESIZE_H}' --allow-root 2>/dev/null && \
wp post meta update ${POST_ID} filesize_raw '${FILESIZE_RAW}' --allow-root 2>/dev/null && \
wp post meta update ${POST_ID} date_recorded '${SCHED_DATE} 07:00:00' --allow-root 2>/dev/null"
# === 8. Aplicar conteudo WP + Rank Math + tags (se _wp.json existir) ===
WP_JSON=""
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*_wp.json; do
[[ -f "$f" ]] && WP_JSON="$f" && break
done
if [[ -n "$WP_JSON" ]]; then
# Auto-reparar JSON se necessário
if ! python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
log_warn "EP${EP_PAD}: JSON inválido — a tentar reparação automática..."
python3 - "$WP_JSON" << 'PYFIX'
import sys, re, json
filepath = sys.argv[1]
with open(filepath, 'r') as f:
raw = f.read()
marker = '"content_html": "'
start = raw.find(marker)
if start == -1: sys.exit(1)
content_start = start + len(marker)
end_pattern = re.search(r'",\s*\n\s*"hashtags"', raw[content_start:]) or re.search(r'",\s*\n\s*"wp_tags"', raw[content_start:])
if not end_pattern: sys.exit(1)
content_end = content_start + end_pattern.start()
fixed = raw[:content_start] + re.sub(r'(?<!\\)"', '\\"', raw[content_start:content_end]) + raw[content_end:]
json.loads(fixed)
with open(filepath, 'w') as f: f.write(fixed)
PYFIX
log_info "EP${EP_PAD}: JSON reparado"
fi
fi
if [[ -n "$WP_JSON" ]] && python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
log_info "EP${EP_PAD}: Aplicar conteudo WP de $(basename "$WP_JSON")"
WP_CONTENT="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('content_html',''))" 2>/dev/null)"
WP_META="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('meta_description',''))" 2>/dev/null)"
WP_KEYWORD="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('keyword',''))" 2>/dev/null)"
WP_TAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(','.join(d.get('wp_tags',[])))" 2>/dev/null)"
WP_HASHTAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(' '.join(d.get('hashtags',[])))" 2>/dev/null)"
# Excerpt = primeira linha do meta + hashtags
WP_EXCERPT=""
if [[ -n "$WP_META" && -n "$WP_HASHTAGS" ]]; then
WP_EXCERPT="${WP_META}
${WP_HASHTAGS}"
fi
# Aplicar conteudo HTML
if [[ -n "$WP_CONTENT" ]]; then
ESCAPED_CONTENT="$(echo "$WP_CONTENT" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_content='${ESCAPED_CONTENT}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: post_content aplicado"
fi
# Aplicar excerpt
if [[ -n "$WP_EXCERPT" ]]; then
ESCAPED_EXCERPT="$(echo "$WP_EXCERPT" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_excerpt='${ESCAPED_EXCERPT}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: post_excerpt aplicado"
fi
# Aplicar tags
if [[ -n "$WP_TAGS" ]]; then
ssh_cmd "cd '${WP_PATH}' && wp post term set ${POST_ID} post_tag ${WP_TAGS} --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: tags aplicadas"
fi
# Rank Math: meta description + focus keyword
if [[ -n "$WP_META" ]]; then
ESCAPED_META="$(echo "$WP_META" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_description '${ESCAPED_META}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: rank_math_description aplicado"
fi
if [[ -n "$WP_KEYWORD" ]]; then
ESCAPED_KW="$(echo "$WP_KEYWORD" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_focus_keyword '${ESCAPED_KW}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: rank_math_focus_keyword aplicado"
fi
# Rank Math: SEO title (preferir seo_title do JSON, fallback para title + sufixo)
WP_SEO_TITLE="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('seo_title',''))" 2>/dev/null)"
if [[ -z "$WP_SEO_TITLE" ]]; then
WP_TITLE_FALLBACK="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('title',''))" 2>/dev/null)"
[[ -n "$WP_TITLE_FALLBACK" ]] && WP_SEO_TITLE="${WP_TITLE_FALLBACK} | Podcast Descomplicar Digital"
fi
if [[ -n "$WP_SEO_TITLE" ]]; then
ESCAPED_SEO_TITLE="$(echo "$WP_SEO_TITLE" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_title '${ESCAPED_SEO_TITLE}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: rank_math_title aplicado"
fi
# Slug optimizado (preferir slug do JSON)
WP_SLUG="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('slug',''))" 2>/dev/null)"
if [[ -n "$WP_SLUG" ]]; then
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_name='${WP_SLUG}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: slug actualizado para ${WP_SLUG}"
fi
else
log_info "EP${EP_PAD}: PENDENTE — WP JSON nao encontrado, gerar via generate-content.sh"
fi
# === 9. Corrigir permissoes uploads ===
ssh_cmd "chown -R ${WP_OWNER} '${AUDIO_REMOTE_DIR}/' '${WP_PATH}/wp-content/uploads/${YEAR}/${MONTH}/' 2>/dev/null" || true
log_info "EP${EP_PAD}: Agendado para ${SCHED_DATE} 07:00 (post ${POST_ID})"
# Actualizar pipeline-state.json
EP_TITLE="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('title',''))" 2>/dev/null || echo "")"
AUDIO_BASENAME="$(basename "$AUDIO_FILE")"
jq --argjson n "$EP_NUM" --arg t "$EP_TITLE" --arg a "Episodios/Audios/final/${AUDIO_BASENAME}" --arg s "$SCHED_DATE" \
'if [.episodes[] | select(.num == $n)] | length > 0
then (.episodes[] | select(.num == $n)) |= . + {status: "ready", title: $t, audio: $a, scheduled: $s}
else .episodes += [{num: ($n | tonumber), title: $t, audio: $a, scheduled: $s, status: "ready"}]
end | .last_updated = (now | todate)' \
"${STATE_FILE}" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "${STATE_FILE}"
log_info "EP${EP_PAD}: pipeline-state.json actualizado"
echo "${POST_ID}"
+82
View File
@@ -0,0 +1,82 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
usage() {
echo "Usage: $0 <input_file> <output_dir>"
echo " Splits a podcast script into blocks for TTS generation."
echo " Splits at the FAQ transition marker or at word limit."
echo " Creates part_1.txt, part_2.txt, etc. in output_dir."
exit 1
}
[[ $# -lt 2 ]] && usage
INPUT_FILE="$1"
OUTPUT_DIR="$2"
if [[ ! -f "$INPUT_FILE" ]]; then
log_error "Input file not found: ${INPUT_FILE}"
exit 1
fi
mkdir -p "$OUTPUT_DIR"
SETTINGS_FILE="${PROJECT_ROOT}/config/audio-settings.json"
MAX_WORDS="$(jq -r '.tts_max_words_per_block' "$SETTINGS_FILE")"
SPLIT_MARKER="$(jq -r '.tts_split_marker' "$SETTINGS_FILE")"
TOTAL_WORDS="$(wc -w < "$INPUT_FILE")"
log_info "Total words: ${TOTAL_WORDS}, max per block: ${MAX_WORDS}"
if [[ "$TOTAL_WORDS" -le "$MAX_WORDS" ]]; then
# No split needed
cp "$INPUT_FILE" "${OUTPUT_DIR}/part_1.txt"
log_info "No split needed (${TOTAL_WORDS} words). Created part_1.txt"
echo "1"
exit 0
fi
# Try to split at the FAQ marker
MARKER_LINE="$(grep -nE "$SPLIT_MARKER" "$INPUT_FILE" | head -1 | cut -d: -f1)"
if [[ -n "$MARKER_LINE" && "$MARKER_LINE" -gt 1 ]]; then
# Split at the marker line (FAQ section starts here)
head -n "$((MARKER_LINE - 1))" "$INPUT_FILE" > "${OUTPUT_DIR}/part_1.txt"
tail -n "+${MARKER_LINE}" "$INPUT_FILE" > "${OUTPUT_DIR}/part_2.txt"
WORDS_1="$(wc -w < "${OUTPUT_DIR}/part_1.txt")"
WORDS_2="$(wc -w < "${OUTPUT_DIR}/part_2.txt")"
log_info "Split at FAQ marker (line ${MARKER_LINE}): part_1=${WORDS_1} words, part_2=${WORDS_2} words"
echo "2"
else
# No marker found — split at approximate midpoint by paragraph
TOTAL_LINES="$(wc -l < "$INPUT_FILE")"
MID_LINE=$((TOTAL_LINES / 2))
# Find nearest empty line (paragraph break) near midpoint
SPLIT_LINE=""
for offset in 0 1 -1 2 -2 3 -3 5 -5 10 -10; do
CHECK=$((MID_LINE + offset))
if [[ "$CHECK" -gt 0 && "$CHECK" -lt "$TOTAL_LINES" ]]; then
LINE_CONTENT="$(sed -n "${CHECK}p" "$INPUT_FILE")"
if [[ -z "$LINE_CONTENT" || "$LINE_CONTENT" =~ ^[[:space:]]*$ ]]; then
SPLIT_LINE="$CHECK"
break
fi
fi
done
if [[ -z "$SPLIT_LINE" ]]; then
SPLIT_LINE="$MID_LINE"
fi
head -n "$SPLIT_LINE" "$INPUT_FILE" > "${OUTPUT_DIR}/part_1.txt"
tail -n "+$((SPLIT_LINE + 1))" "$INPUT_FILE" > "${OUTPUT_DIR}/part_2.txt"
WORDS_1="$(wc -w < "${OUTPUT_DIR}/part_1.txt")"
WORDS_2="$(wc -w < "${OUTPUT_DIR}/part_2.txt")"
log_info "Split at paragraph break (line ${SPLIT_LINE}): part_1=${WORDS_1} words, part_2=${WORDS_2} words"
echo "2"
fi
+115
View File
@@ -0,0 +1,115 @@
#!/usr/bin/env bash
set -euo pipefail
# transfer-to-server.sh — Transfere ficheiros do desktop para o CWP server via SCP
# Utiliza a chave SSH em ~/.ssh/id_ed25519, porta 9443
# Permissoes finais: ealmeida:ealmeida
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
SSH_KEY="${HOME}/.ssh/id_ed25519"
SSH_PORT=9443
SSH_HOST="server.descomplicar.pt"
SSH_USER="root"
SSH_OPTS="-o IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
WP_PATH="/home/ealmeida/public_html"
usage() {
echo "Usage: $0 <type> <local_file> [remote_subdir]"
echo ""
echo "Types:"
echo " cover <file.png> -> wp-content/uploads/YYYY/MM/"
echo " audio <file.mp3> [YYYY/MM] -> wp-content/uploads/podcast/YYYY/MM/"
echo " batch <dir_of_files> <type> -> envia todos os ficheiros do directorio"
echo ""
echo "Examples:"
echo " $0 cover /path/to/ep001.png"
echo " $0 audio /path/to/ep020.mp3 2026/04"
echo " $0 batch /path/to/capas/ cover"
exit 1
}
scp_file() {
local src="$1" dst="$2"
SSH_AUTH_SOCK= scp -P "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "$src" "${SSH_USER}@${SSH_HOST}:${dst}"
}
ssh_cmd() {
SSH_AUTH_SOCK= ssh -p "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "${SSH_USER}@${SSH_HOST}" "$@"
}
[[ $# -lt 2 ]] && usage
TYPE="$1"
shift
case "$TYPE" in
cover)
LOCAL_FILE="$1"
[[ ! -f "$LOCAL_FILE" ]] && log_error "Ficheiro nao encontrado: $LOCAL_FILE" && exit 1
YEAR="$(date '+%Y')"
MONTH="$(date '+%m')"
REMOTE_DIR="${WP_PATH}/wp-content/uploads/${YEAR}/${MONTH}"
ssh_cmd "mkdir -p '${REMOTE_DIR}'"
scp_file "$LOCAL_FILE" "${REMOTE_DIR}/"
BASENAME="$(basename "$LOCAL_FILE")"
ssh_cmd "chown ealmeida:ealmeida '${REMOTE_DIR}/${BASENAME}'"
log_info "Cover enviada: ${BASENAME} -> ${REMOTE_DIR}/"
;;
audio)
LOCAL_FILE="$1"
[[ ! -f "$LOCAL_FILE" ]] && log_error "Ficheiro nao encontrado: $LOCAL_FILE" && exit 1
if [[ $# -ge 2 ]]; then
SUBDIR="$2"
else
YEAR="$(date '+%Y')"
MONTH="$(date '+%m')"
SUBDIR="${YEAR}/${MONTH}"
fi
REMOTE_DIR="${WP_PATH}/wp-content/uploads/podcast/${SUBDIR}"
ssh_cmd "mkdir -p '${REMOTE_DIR}'"
scp_file "$LOCAL_FILE" "${REMOTE_DIR}/"
BASENAME="$(basename "$LOCAL_FILE")"
ssh_cmd "chown ealmeida:ealmeida '${REMOTE_DIR}/${BASENAME}'"
log_info "Audio enviado: ${BASENAME} -> ${REMOTE_DIR}/"
;;
batch)
LOCAL_DIR="$1"
BATCH_TYPE="${2:-cover}"
[[ ! -d "$LOCAL_DIR" ]] && log_error "Directorio nao encontrado: $LOCAL_DIR" && exit 1
YEAR="$(date '+%Y')"
MONTH="$(date '+%m')"
case "$BATCH_TYPE" in
cover) REMOTE_DIR="${WP_PATH}/wp-content/uploads/${YEAR}/${MONTH}" ;;
audio) REMOTE_DIR="${WP_PATH}/wp-content/uploads/podcast/${YEAR}/${MONTH}" ;;
*) log_error "Tipo batch invalido: $BATCH_TYPE" && exit 1 ;;
esac
ssh_cmd "mkdir -p '${REMOTE_DIR}'"
COUNT=0
for f in "${LOCAL_DIR}"/*.{png,jpg,mp3,wav} ; do
[[ ! -f "$f" ]] && continue
scp_file "$f" "${REMOTE_DIR}/"
COUNT=$((COUNT + 1))
done
ssh_cmd "chown -R ealmeida:ealmeida '${REMOTE_DIR}/'"
log_info "Batch ${BATCH_TYPE}: ${COUNT} ficheiros enviados para ${REMOTE_DIR}/"
;;
*)
usage
;;
esac
+62
View File
@@ -0,0 +1,62 @@
#!/usr/bin/env python3
"""Generate TTS for a single text file via Gemini API."""
import sys, wave, os
def main():
if len(sys.argv) < 3:
print("Usage: tts-single-part.py <input.txt> <output.wav>")
sys.exit(1)
input_file = sys.argv[1]
output_file = sys.argv[2]
api_key = os.environ.get("GEMINI_API_KEY")
if not api_key:
print("ERROR: GEMINI_API_KEY not set")
sys.exit(1)
from google import genai
from google.genai import types
style = (
"Lê este texto em português de Portugal (PT-PT), com um tom enérgico, "
"confiante, educativo, inspirador e profissional. Mantém o ritmo natural "
"e envolvente, como se estivesses a conversar diretamente com o ouvinte, "
"transmitindo proximidade e autoridade. Faz pequenas pausas para dar ênfase "
"às ideias-chave e assegura que cada transição entre temas é fluida. Evita "
"soar robótico ou demasiado formal; o objetivo é informar, motivar e criar "
"ligação com quem está a ouvir."
)
with open(input_file, "r") as f:
text = f.read()
words = len(text.split())
print(f"Generating TTS for {words} words...")
client = genai.Client(api_key=api_key)
response = client.models.generate_content(
model="gemini-2.5-pro-preview-tts",
contents=style + "\n\n" + text,
config=types.GenerateContentConfig(
response_modalities=["AUDIO"],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Sulafat")
)
),
),
)
data = response.candidates[0].content.parts[0].inline_data.data
with wave.open(output_file, "wb") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(24000)
wf.writeframes(data)
duration = (len(data) // 2) / 24000
print(f"OK: {duration:.0f}s -> {output_file}")
if __name__ == "__main__":
main()