feat: scripts de projectos vindos do Hub (podcast, alojadamaria, clip, ocr, etc.)
Movidos do vault Hub para centralizar scripts. Hub mantem symlinks. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Executable
+161
@@ -0,0 +1,161 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/scripts/lib.sh"
|
||||
|
||||
usage() {
|
||||
cat << EOF
|
||||
Podcast Descomplicar Digital — Pipeline Automatizado
|
||||
|
||||
Usage: $0 <command> [args]
|
||||
|
||||
Commands:
|
||||
batch N Process next N episodes (content generation)
|
||||
status Show pipeline state
|
||||
retry NNN Re-run failed stage for episode NNN
|
||||
csv [start] [N] Generate Canva CSV (default: next 7 episodes)
|
||||
publish NNN D Generate publish commands for episode NNN on date D (YYYY-MM-DD)
|
||||
init NNN Add episode NNN to pipeline state
|
||||
produce NNN F Post-produce episode NNN from raw audio file F
|
||||
|
||||
Examples:
|
||||
$0 batch 5
|
||||
$0 status
|
||||
$0 csv 20 10
|
||||
$0 produce 20 /path/to/raw.wav
|
||||
$0 publish 20 2026-04-14
|
||||
EOF
|
||||
exit 1
|
||||
}
|
||||
|
||||
[[ $# -lt 1 ]] && usage
|
||||
|
||||
CMD="$1"; shift
|
||||
|
||||
ensure_state_file
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
case "$CMD" in
|
||||
batch)
|
||||
COUNT="${1:-5}"
|
||||
START="$(jq -r '.next_episode' "$STATE_FILE")"
|
||||
END=$((START + COUNT - 1))
|
||||
log_info "=== BATCH: Processing episodes ${START}-${END} ==="
|
||||
|
||||
CURRENT_DATE="$(jq -r '.next_publish_date' "$STATE_FILE")"
|
||||
[[ -z "$CURRENT_DATE" || "$CURRENT_DATE" == "null" ]] && CURRENT_DATE="$(date '+%Y-%m-%d')"
|
||||
|
||||
for ((ep=START; ep<=END; ep++)); do
|
||||
log_info "--- Episode ${ep} ---"
|
||||
|
||||
STATUS="$(get_episode_status "$ep")"
|
||||
if [[ "$STATUS" == "not_found" || "$STATUS" == "pending" ]]; then
|
||||
"${SCRIPT_DIR}/scripts/generate-content.sh" "$ep" || {
|
||||
log_error "EP$(pad_number "$ep"): Content generation failed, stopping batch"
|
||||
exit 1
|
||||
}
|
||||
else
|
||||
log_info "EP$(pad_number "$ep"): Already at status '${STATUS}', skipping content generation"
|
||||
fi
|
||||
|
||||
STATUS="$(get_episode_status "$ep")"
|
||||
if [[ "$STATUS" == "script_done" ]]; then
|
||||
log_warn "EP$(pad_number "$ep"): Audio TTS needed (manual step via AI Studio)"
|
||||
log_warn " -> Generate audio, save to Episodios/Audios/raw/ep_$(pad_number "$ep")_raw.wav"
|
||||
log_warn " -> Then run: $0 produce ${ep} <raw_audio_path>"
|
||||
fi
|
||||
|
||||
CURRENT_DATE="$(next_weekday "$CURRENT_DATE")"
|
||||
done
|
||||
|
||||
jq --argjson n "$((END + 1))" --arg d "$CURRENT_DATE" \
|
||||
'.next_episode = $n | .next_publish_date = $d' \
|
||||
"$STATE_FILE" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "$STATE_FILE"
|
||||
|
||||
log_info "=== BATCH COMPLETE. Next episode: $((END + 1)), next date: ${CURRENT_DATE} ==="
|
||||
;;
|
||||
|
||||
status)
|
||||
echo "=== Pipeline Status ==="
|
||||
echo "Next episode: $(jq -r '.next_episode' "$STATE_FILE")"
|
||||
echo "Next publish date: $(jq -r '.next_publish_date' "$STATE_FILE")"
|
||||
echo ""
|
||||
EPISODE_COUNT="$(jq '.episodes | length' "$STATE_FILE")"
|
||||
if [[ "$EPISODE_COUNT" -gt 0 ]]; then
|
||||
echo "Episodes in pipeline:"
|
||||
jq -r '.episodes[] | " EP\(.number | tostring | if length < 3 then "0" * (3 - length) + . else . end): \(.status) - \(.title)"' "$STATE_FILE"
|
||||
echo ""
|
||||
echo "Counts:"
|
||||
jq -r '.episodes | group_by(.status) | map({status: .[0].status, count: length}) | .[] | " \(.status): \(.count)"' "$STATE_FILE"
|
||||
else
|
||||
echo "No episodes in pipeline yet. Run 'batch' to start."
|
||||
fi
|
||||
;;
|
||||
|
||||
retry)
|
||||
EP="${1:?Episode number required}"
|
||||
STATUS="$(get_episode_status "$EP")"
|
||||
log_info "Retrying EP$(pad_number "$EP") (current status: ${STATUS})"
|
||||
|
||||
case "$STATUS" in
|
||||
pending|not_found)
|
||||
"${SCRIPT_DIR}/scripts/generate-content.sh" "$EP"
|
||||
;;
|
||||
script_done)
|
||||
log_warn "Audio TTS needed — manual step via AI Studio"
|
||||
;;
|
||||
audio_done)
|
||||
RAW=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Audios/raw/ep_$(pad_number "$EP")_"*; do
|
||||
[[ -f "$f" ]] && RAW="$f" && break
|
||||
done
|
||||
if [[ -n "$RAW" ]]; then
|
||||
"${SCRIPT_DIR}/scripts/post-produce.sh" "$EP" "$RAW"
|
||||
else
|
||||
log_error "Raw audio not found. Use: $0 produce ${EP} <path>"
|
||||
fi
|
||||
;;
|
||||
produced)
|
||||
log_info "Ready to publish. Use: $0 publish ${EP} YYYY-MM-DD"
|
||||
;;
|
||||
*)
|
||||
log_warn "Unknown status: ${STATUS}"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
|
||||
csv)
|
||||
"${SCRIPT_DIR}/scripts/generate-csv.sh" "${@}"
|
||||
;;
|
||||
|
||||
produce)
|
||||
EP="${1:?Episode number required}"
|
||||
RAW="${2:?Raw audio path required}"
|
||||
"${SCRIPT_DIR}/scripts/post-produce.sh" "$EP" "$RAW"
|
||||
;;
|
||||
|
||||
publish)
|
||||
EP="${1:?Episode number required}"
|
||||
SCHED_DATE="${2:-$(jq -r '.next_publish_date' "$STATE_FILE")}"
|
||||
"${SCRIPT_DIR}/scripts/publish-episode.sh" "$EP" "$SCHED_DATE"
|
||||
;;
|
||||
|
||||
init)
|
||||
EP="${1:?Episode number required}"
|
||||
MAP_FILE="${PROJECT_ROOT}/config/episode-guide-map.json"
|
||||
EP_DATA="$(jq -r --arg n "$EP" '.[$n] // empty' "$MAP_FILE")"
|
||||
if [[ -n "$EP_DATA" ]]; then
|
||||
TITLE="$(echo "$EP_DATA" | jq -r '.podcast_title')"
|
||||
URL="$(echo "$EP_DATA" | jq -r '.guide_url')"
|
||||
add_episode "$EP" "$TITLE" "$URL"
|
||||
log_info "Added EP$(pad_number "$EP"): ${TITLE}"
|
||||
else
|
||||
log_error "Episode $EP not found in episode-guide-map.json"
|
||||
fi
|
||||
;;
|
||||
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
Executable
+224
@@ -0,0 +1,224 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# apply-wp-content.sh — Aplica conteúdo _wp.json a posts já agendados no WordPress
|
||||
#
|
||||
# Uso: ./scripts/apply-wp-content.sh <ep_num> [post_id]
|
||||
# Se post_id não for fornecido, pesquisa por data na pipeline-state.json
|
||||
#
|
||||
# Útil para: episódios que foram agendados antes do _wp.json existir
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
# SSH config
|
||||
SSH_KEY="${HOME}/.ssh/id_ed25519"
|
||||
SSH_PORT=9443
|
||||
SSH_HOST="server.descomplicar.pt"
|
||||
SSH_USER="root"
|
||||
SSH_OPTS="-o IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
|
||||
WP_PATH="/home/ealmeida/public_html"
|
||||
|
||||
ssh_cmd() {
|
||||
SSH_AUTH_SOCK= ssh -p "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "${SSH_USER}@${SSH_HOST}" "$@"
|
||||
}
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <ep_num> [post_id]"
|
||||
exit 1
|
||||
}
|
||||
|
||||
[[ $# -lt 1 ]] && usage
|
||||
EP_NUM="$1"
|
||||
MANUAL_POST_ID="${2:-}"
|
||||
EP_PAD="$(pad_number "$EP_NUM")"
|
||||
|
||||
# Encontrar _wp.json (find para lidar com nomes com acentos)
|
||||
WP_JSON=""
|
||||
while IFS= read -r f; do
|
||||
[[ -f "$f" ]] && WP_JSON="$f" && break
|
||||
done < <(find "${PROJECT_ROOT}" -maxdepth 3 -name "Episodio_${EP_PAD}_*_wp.json" 2>/dev/null)
|
||||
|
||||
if [[ -z "$WP_JSON" ]]; then
|
||||
log_error "EP${EP_PAD}: _wp.json não encontrado"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validar e auto-reparar JSON se necessário
|
||||
if ! python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
|
||||
log_warn "EP${EP_PAD}: JSON inválido — a tentar reparação automática..."
|
||||
python3 - "$WP_JSON" << 'PYFIX'
|
||||
import sys, re, json
|
||||
filepath = sys.argv[1]
|
||||
with open(filepath, 'r') as f:
|
||||
raw = f.read()
|
||||
marker = '"content_html": "'
|
||||
start = raw.find(marker)
|
||||
if start == -1: sys.exit(1)
|
||||
content_start = start + len(marker)
|
||||
end_pattern = re.search(r'",\s*\n\s*"hashtags"', raw[content_start:]) or re.search(r'",\s*\n\s*"wp_tags"', raw[content_start:])
|
||||
if not end_pattern: sys.exit(1)
|
||||
content_end = content_start + end_pattern.start()
|
||||
fixed = raw[:content_start] + re.sub(r'(?<!\\)"', '\\"', raw[content_start:content_end]) + raw[content_end:]
|
||||
json.loads(fixed)
|
||||
with open(filepath, 'w') as f: f.write(fixed)
|
||||
print("reparado")
|
||||
PYFIX
|
||||
if ! python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
|
||||
log_error "EP${EP_PAD}: JSON inválido após reparação"
|
||||
exit 1
|
||||
fi
|
||||
log_info "EP${EP_PAD}: JSON reparado automaticamente"
|
||||
fi
|
||||
log_info "EP${EP_PAD}: Usando $(basename "$WP_JSON")"
|
||||
|
||||
# Encontrar post_id
|
||||
POST_ID="$MANUAL_POST_ID"
|
||||
if [[ -z "$POST_ID" ]]; then
|
||||
# Tentar obter por data agendada da pipeline-state
|
||||
SCHED_DATE="$(jq -r --argjson n "$EP_NUM" '.episodes[] | select(.num == $n) | .scheduled // empty' "$STATE_FILE")"
|
||||
if [[ -z "$SCHED_DATE" ]]; then
|
||||
log_error "EP${EP_PAD}: Não encontrado em pipeline-state.json, especifique post_id manualmente"
|
||||
exit 1
|
||||
fi
|
||||
log_info "EP${EP_PAD}: A pesquisar post por data ${SCHED_DATE}..."
|
||||
POST_ID="$(ssh_cmd "cd '${WP_PATH}' && wp db query \
|
||||
\"SELECT ID FROM wpah_posts WHERE post_type='podcast' AND post_status='future' AND DATE(post_date)='${SCHED_DATE}' LIMIT 1\" \
|
||||
--skip-column-names \
|
||||
--allow-root 2>/dev/null")"
|
||||
fi
|
||||
|
||||
if [[ -z "$POST_ID" ]]; then
|
||||
log_error "EP${EP_PAD}: Post não encontrado no WordPress"
|
||||
exit 1
|
||||
fi
|
||||
log_info "EP${EP_PAD}: Post ID = ${POST_ID}"
|
||||
|
||||
# Extrair campos do JSON via Python (mais robusto que jq para HTML com acentos)
|
||||
WP_CONTENT="$(python3 -c "import json,sys; d=json.load(open('$WP_JSON')); print(d.get('content_html',''))" 2>/dev/null)"
|
||||
WP_META="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('meta_description',''))" 2>/dev/null)"
|
||||
WP_KEYWORD="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('keyword',''))" 2>/dev/null)"
|
||||
WP_TAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(','.join(d.get('wp_tags',[])))" 2>/dev/null)"
|
||||
WP_HASHTAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(' '.join(d.get('hashtags',[])))" 2>/dev/null)"
|
||||
WP_SEO_TITLE="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('seo_title',''))" 2>/dev/null)"
|
||||
WP_SLUG="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('slug',''))" 2>/dev/null)"
|
||||
|
||||
# Auto-fix RankMath: slug sem keyword → reconstruir; título sem número → adicionar ano
|
||||
_AUTOFIX="$(python3 - "$WP_JSON" "$WP_SLUG" "$WP_SEO_TITLE" <<'PYFIX'
|
||||
import json, sys, re, unicodedata, time
|
||||
|
||||
def slugify(text):
|
||||
text = unicodedata.normalize("NFD", text.lower())
|
||||
text = "".join(c for c in text if unicodedata.category(c) != "Mn")
|
||||
text = re.sub(r'[^a-z0-9\s-]', '', text)
|
||||
return re.sub(r'[-\s]+', '-', text.strip()).rstrip('-')
|
||||
|
||||
STOPWORDS = {"a","o","as","os","de","da","do","das","dos","e","em","no","na","nos","nas","para","por","pelo","pela"}
|
||||
|
||||
def strip_sw(s):
|
||||
return " ".join(t for t in slugify(s).replace("-", " ").split() if t not in STOPWORDS)
|
||||
|
||||
d = json.load(open(sys.argv[1]))
|
||||
slug = sys.argv[2]
|
||||
seo_title = sys.argv[3]
|
||||
kw = d.get('keyword', '')
|
||||
|
||||
# Fix 1: slug sem keyword
|
||||
slug_fixed = slug
|
||||
if kw and slug and strip_sw(kw) not in strip_sw(slug):
|
||||
fk_slug = slugify(kw)
|
||||
orig_words = [w for w in slug.split('-') if w not in fk_slug.split('-') and len(w) > 3][:2]
|
||||
slug_fixed = (fk_slug + ('-' + '-'.join(orig_words) if orig_words else ''))[:75].rstrip('-')
|
||||
|
||||
# Fix 2: seo_title sem número → adicionar ano
|
||||
seo_fixed = seo_title
|
||||
if seo_title and not re.search(r'\d', seo_title):
|
||||
year = time.strftime('%Y')
|
||||
# Inserir ano antes do separador "|" se existir, senão no fim do título
|
||||
# Não truncar o texto principal — o ano vai imediatamente antes do "|"
|
||||
if ' | ' in seo_title:
|
||||
parts = seo_title.split(' | ', 1)
|
||||
seo_fixed = f"{parts[0].rstrip()} {year} | {parts[1]}"
|
||||
else:
|
||||
t = seo_title.rstrip()
|
||||
seo_fixed = (t[:55].rsplit(' ', 1)[0] + f' {year}') if len(t) > 55 else f'{t} {year}'
|
||||
|
||||
print(slug_fixed)
|
||||
print(seo_fixed)
|
||||
PYFIX
|
||||
)"
|
||||
|
||||
# Aplicar valores corrigidos
|
||||
WP_SLUG_NEW="$(echo "$_AUTOFIX" | sed -n '1p')"
|
||||
WP_SEO_TITLE_NEW="$(echo "$_AUTOFIX" | sed -n '2p')"
|
||||
|
||||
if [[ -n "$WP_SLUG_NEW" && "$WP_SLUG_NEW" != "$WP_SLUG" ]]; then
|
||||
log_info "EP${EP_PAD}: slug auto-corrigido: '${WP_SLUG}' → '${WP_SLUG_NEW}'"
|
||||
WP_SLUG="$WP_SLUG_NEW"
|
||||
fi
|
||||
if [[ -n "$WP_SEO_TITLE_NEW" && "$WP_SEO_TITLE_NEW" != "$WP_SEO_TITLE" ]]; then
|
||||
log_info "EP${EP_PAD}: seo_title auto-corrigido: ano adicionado → '${WP_SEO_TITLE_NEW}'"
|
||||
WP_SEO_TITLE="$WP_SEO_TITLE_NEW"
|
||||
fi
|
||||
|
||||
# Excerpt = meta + hashtags
|
||||
WP_EXCERPT=""
|
||||
if [[ -n "$WP_META" && -n "$WP_HASHTAGS" ]]; then
|
||||
WP_EXCERPT="${WP_META}
|
||||
|
||||
${WP_HASHTAGS}"
|
||||
fi
|
||||
|
||||
# Aplicar post_content
|
||||
if [[ -n "$WP_CONTENT" ]]; then
|
||||
ESCAPED_CONTENT="$(printf '%s' "$WP_CONTENT" | python3 -c "import sys; data=sys.stdin.read(); print(data.replace(\"'\", \"'\\\\''\" ))" 2>/dev/null || echo "$WP_CONTENT" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_content='${ESCAPED_CONTENT}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: post_content aplicado"
|
||||
fi
|
||||
|
||||
# Aplicar excerpt
|
||||
if [[ -n "$WP_EXCERPT" ]]; then
|
||||
ESCAPED_EXCERPT="$(printf '%s' "$WP_EXCERPT" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_excerpt='${ESCAPED_EXCERPT}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: post_excerpt aplicado"
|
||||
fi
|
||||
|
||||
# Aplicar slug
|
||||
if [[ -n "$WP_SLUG" ]]; then
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_name='${WP_SLUG}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: slug actualizado -> ${WP_SLUG}"
|
||||
fi
|
||||
|
||||
# Aplicar tags
|
||||
if [[ -n "$WP_TAGS" ]]; then
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post term set ${POST_ID} post_tag ${WP_TAGS} --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: tags aplicadas"
|
||||
fi
|
||||
|
||||
# Rank Math: description
|
||||
if [[ -n "$WP_META" ]]; then
|
||||
ESCAPED_META="$(echo "$WP_META" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_description '${ESCAPED_META}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: rank_math_description aplicado"
|
||||
fi
|
||||
|
||||
# Rank Math: focus keyword
|
||||
if [[ -n "$WP_KEYWORD" ]]; then
|
||||
ESCAPED_KW="$(echo "$WP_KEYWORD" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_focus_keyword '${ESCAPED_KW}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: rank_math_focus_keyword aplicado"
|
||||
fi
|
||||
|
||||
# Rank Math: SEO title
|
||||
if [[ -z "$WP_SEO_TITLE" ]]; then
|
||||
WP_TITLE_FALLBACK="$(jq -r '.title // empty' "$WP_JSON")"
|
||||
[[ -n "$WP_TITLE_FALLBACK" ]] && WP_SEO_TITLE="${WP_TITLE_FALLBACK} | Podcast Descomplicar Digital"
|
||||
fi
|
||||
if [[ -n "$WP_SEO_TITLE" ]]; then
|
||||
ESCAPED_SEO_TITLE="$(echo "$WP_SEO_TITLE" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_title '${ESCAPED_SEO_TITLE}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: rank_math_title aplicado"
|
||||
fi
|
||||
|
||||
log_info "EP${EP_PAD}: Conteúdo WP aplicado com sucesso (post ${POST_ID})"
|
||||
echo "${POST_ID}"
|
||||
Executable
+177
@@ -0,0 +1,177 @@
|
||||
#!/usr/bin/env bash
|
||||
# batch-prepare.sh — Pipeline completo para lote de episódios
|
||||
# Uso: ./scripts/batch-prepare.sh [--dry-run]
|
||||
# Para cada episódio: TTS → pós-produção → agendar no WordPress
|
||||
# Retomável: salta episódios já prontos
|
||||
|
||||
set -uo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
DRY_RUN=false
|
||||
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
|
||||
|
||||
# Verificar GEMINI_API_KEY
|
||||
if [[ -z "${GEMINI_API_KEY:-}" ]]; then
|
||||
log_error "GEMINI_API_KEY não definida. Exporta primeiro: export GEMINI_API_KEY=..."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Lote: episódios e datas de publicação (ordem do calendário)
|
||||
EP_ORDER=(65 132 137 66 67 134 139 69 78 79 80 81 82 83 84)
|
||||
|
||||
declare -A EP_DATES=(
|
||||
[65]="2026-07-02"
|
||||
[132]="2026-07-06"
|
||||
[137]="2026-07-07"
|
||||
[66]="2026-07-08"
|
||||
[67]="2026-07-09"
|
||||
[134]="2026-07-13"
|
||||
[139]="2026-07-14"
|
||||
[69]="2026-07-15"
|
||||
[78]="2026-07-16"
|
||||
[79]="2026-07-20"
|
||||
[80]="2026-07-21"
|
||||
[81]="2026-07-22"
|
||||
[82]="2026-07-23"
|
||||
[83]="2026-07-27"
|
||||
[84]="2026-07-28"
|
||||
)
|
||||
|
||||
BATCH_LOG="${LOG_DIR}/batch-$(date +%Y%m%d-%H%M%S).log"
|
||||
mkdir -p "${LOG_DIR}"
|
||||
|
||||
declare -A RESULTS
|
||||
|
||||
_batch_log() {
|
||||
local msg="$*"
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ${msg}" | tee -a "${BATCH_LOG}"
|
||||
}
|
||||
|
||||
_batch_log "=== BATCH START — $(date) ==="
|
||||
_batch_log "Episódios: ${EP_ORDER[*]}"
|
||||
[[ "$DRY_RUN" == true ]] && _batch_log "MODO DRY-RUN activado"
|
||||
|
||||
for EP_NUM in "${EP_ORDER[@]}"; do
|
||||
EP_PAD="$(pad_number "$EP_NUM")"
|
||||
SCHED_DATE="${EP_DATES[$EP_NUM]}"
|
||||
|
||||
_batch_log "--- EP${EP_PAD} (${SCHED_DATE}) ---"
|
||||
|
||||
# Verificar se já está agendado (status ready no pipeline-state.json)
|
||||
ALREADY_STATUS=$(jq -r --argjson n "$EP_NUM" \
|
||||
'.episodes[] | select(.num == $n) | .status // ""' \
|
||||
"${STATE_FILE}" 2>/dev/null || echo "")
|
||||
|
||||
if [[ "$ALREADY_STATUS" == "ready" ]]; then
|
||||
_batch_log "EP${EP_PAD}: já agendado — a saltar"
|
||||
RESULTS[$EP_NUM]="skip"
|
||||
continue
|
||||
fi
|
||||
|
||||
# --- Etapa 1: TTS ---
|
||||
FINAL_MP3=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Audios/final/ep_${EP_PAD}_"*.mp3; do
|
||||
[[ -f "$f" ]] && FINAL_MP3="$f" && break
|
||||
done
|
||||
|
||||
if [[ -z "$FINAL_MP3" ]]; then
|
||||
_batch_log "EP${EP_PAD}: [1/3] A gerar áudio TTS..."
|
||||
if [[ "$DRY_RUN" == false ]]; then
|
||||
if ! bash "${SCRIPT_DIR}/generate-audio.sh" "$EP_NUM" >> "${BATCH_LOG}" 2>&1; then
|
||||
_batch_log "EP${EP_PAD}: ERRO no TTS — a saltar episódio"
|
||||
RESULTS[$EP_NUM]="erro_tts"
|
||||
continue
|
||||
fi
|
||||
else
|
||||
_batch_log "EP${EP_PAD}: [DRY-RUN] generate-audio.sh ${EP_NUM}"
|
||||
fi
|
||||
|
||||
# --- Etapa 2: Pós-produção ---
|
||||
RAW_FILES=()
|
||||
for f in "${PROJECT_ROOT}/Episodios/Audios/raw/ep_${EP_PAD}_p"*.wav; do
|
||||
[[ -f "$f" ]] && RAW_FILES+=("$f")
|
||||
done
|
||||
|
||||
if [[ ${#RAW_FILES[@]} -eq 0 && "$DRY_RUN" == false ]]; then
|
||||
_batch_log "EP${EP_PAD}: ERRO — sem ficheiros raw após TTS — a saltar episódio"
|
||||
RESULTS[$EP_NUM]="erro_sem_raw"
|
||||
continue
|
||||
fi
|
||||
|
||||
_batch_log "EP${EP_PAD}: [2/3] Pós-produção (${#RAW_FILES[@]} partes)..."
|
||||
if [[ "$DRY_RUN" == false ]]; then
|
||||
if ! bash "${SCRIPT_DIR}/post-produce.sh" "$EP_NUM" "${RAW_FILES[@]}" >> "${BATCH_LOG}" 2>&1; then
|
||||
_batch_log "EP${EP_PAD}: ERRO na pós-produção — a saltar episódio"
|
||||
RESULTS[$EP_NUM]="erro_postprod"
|
||||
continue
|
||||
fi
|
||||
else
|
||||
_batch_log "EP${EP_PAD}: [DRY-RUN] post-produce.sh ${EP_NUM} <raw_files>"
|
||||
fi
|
||||
|
||||
# Encontrar MP3 final
|
||||
for f in "${PROJECT_ROOT}/Episodios/Audios/final/ep_${EP_PAD}_"*.mp3; do
|
||||
[[ -f "$f" ]] && FINAL_MP3="$f" && break
|
||||
done
|
||||
|
||||
if [[ -z "$FINAL_MP3" && "$DRY_RUN" == false ]]; then
|
||||
_batch_log "EP${EP_PAD}: ERRO — MP3 final não encontrado após pós-produção"
|
||||
RESULTS[$EP_NUM]="erro_sem_mp3"
|
||||
continue
|
||||
fi
|
||||
else
|
||||
_batch_log "EP${EP_PAD}: [1/3] Áudio já existe: $(basename "${FINAL_MP3}") — a saltar TTS+pós-prod"
|
||||
fi
|
||||
|
||||
# --- Etapa 3: Agendar no WordPress ---
|
||||
_batch_log "EP${EP_PAD}: [3/3] A agendar no WordPress para ${SCHED_DATE}..."
|
||||
if [[ "$DRY_RUN" == false ]]; then
|
||||
if ! bash "${SCRIPT_DIR}/schedule-episode.sh" "$EP_NUM" "$SCHED_DATE" >> "${BATCH_LOG}" 2>&1; then
|
||||
_batch_log "EP${EP_PAD}: ERRO no agendamento"
|
||||
RESULTS[$EP_NUM]="erro_schedule"
|
||||
continue
|
||||
fi
|
||||
else
|
||||
_batch_log "EP${EP_PAD}: [DRY-RUN] schedule-episode.sh ${EP_NUM} ${SCHED_DATE}"
|
||||
fi
|
||||
|
||||
RESULTS[$EP_NUM]="ok"
|
||||
_batch_log "EP${EP_PAD}: ✓ COMPLETO"
|
||||
done
|
||||
|
||||
# Resumo final
|
||||
_batch_log ""
|
||||
_batch_log "=== RESUMO ==="
|
||||
|
||||
OK_COUNT=0
|
||||
ERR_COUNT=0
|
||||
SKIP_COUNT=0
|
||||
|
||||
for EP_NUM in "${EP_ORDER[@]}"; do
|
||||
EP_PAD="$(pad_number "$EP_NUM")"
|
||||
STATUS="${RESULTS[$EP_NUM]:-desconhecido}"
|
||||
case "$STATUS" in
|
||||
ok)
|
||||
_batch_log "✓ EP${EP_PAD} — completo"
|
||||
((OK_COUNT++)) || true
|
||||
;;
|
||||
skip)
|
||||
_batch_log "⏭ EP${EP_PAD} — já agendado"
|
||||
((SKIP_COUNT++)) || true
|
||||
;;
|
||||
*)
|
||||
_batch_log "✗ EP${EP_PAD} — ERRO: ${STATUS}"
|
||||
((ERR_COUNT++)) || true
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
_batch_log ""
|
||||
_batch_log "Total: ${OK_COUNT} ok | ${SKIP_COUNT} saltados | ${ERR_COUNT} erros"
|
||||
_batch_log "Log completo: ${BATCH_LOG}"
|
||||
_batch_log "=== BATCH END — $(date) ==="
|
||||
|
||||
[[ "$ERR_COUNT" -gt 0 ]] && exit 1
|
||||
exit 0
|
||||
Executable
+68
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
# Episodios a produzir (ordem do calendario)
|
||||
EPISODES=(136 37 38 39 40 41 42 44 135 46 47 48 49 50 51)
|
||||
|
||||
TOTAL=${#EPISODES[@]}
|
||||
DONE=0
|
||||
FAILED=0
|
||||
|
||||
log_info "=== BATCH PRODUCE: ${TOTAL} episodios ==="
|
||||
|
||||
for EP in "${EPISODES[@]}"; do
|
||||
EP_PAD="$(pad_number "$EP")"
|
||||
DONE=$((DONE + 1))
|
||||
log_info "--- [${DONE}/${TOTAL}] EP${EP_PAD} ---"
|
||||
|
||||
# Step 1: Generate audio (split + TTS)
|
||||
log_info "EP${EP_PAD}: Generating audio..."
|
||||
if bash "${SCRIPT_DIR}/generate-audio.sh" "$EP" 2>&1; then
|
||||
log_info "EP${EP_PAD}: Audio generation OK"
|
||||
else
|
||||
log_error "EP${EP_PAD}: Audio generation FAILED, skipping"
|
||||
FAILED=$((FAILED + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
# Step 2: Collect raw audio parts
|
||||
RAW_DIR="${PROJECT_ROOT}/Episodios/Audios/raw"
|
||||
RAW_FILES=()
|
||||
for p in "${RAW_DIR}/ep_${EP_PAD}_p"*.wav; do
|
||||
[[ -f "$p" ]] && RAW_FILES+=("$p")
|
||||
done
|
||||
|
||||
if [[ ${#RAW_FILES[@]} -eq 0 ]]; then
|
||||
# Try alternate path with accent
|
||||
RAW_DIR="${PROJECT_ROOT}/Episodios/Audios/raw"
|
||||
for p in "${RAW_DIR}/ep_${EP_PAD}_p"*.wav; do
|
||||
[[ -f "$p" ]] && RAW_FILES+=("$p")
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ ${#RAW_FILES[@]} -eq 0 ]]; then
|
||||
log_error "EP${EP_PAD}: No raw audio files found, skipping post-production"
|
||||
FAILED=$((FAILED + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
# Sort parts numerically
|
||||
IFS=$'\n' RAW_FILES_SORTED=($(printf '%s\n' "${RAW_FILES[@]}" | sort)); unset IFS
|
||||
|
||||
# Step 3: Post-produce
|
||||
log_info "EP${EP_PAD}: Post-producing ${#RAW_FILES_SORTED[@]} parts..."
|
||||
if bash "${SCRIPT_DIR}/post-produce.sh" "$EP" "${RAW_FILES_SORTED[@]}" 2>&1; then
|
||||
log_info "EP${EP_PAD}: Post-production OK"
|
||||
else
|
||||
log_error "EP${EP_PAD}: Post-production FAILED"
|
||||
FAILED=$((FAILED + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
log_info "EP${EP_PAD}: DONE"
|
||||
done
|
||||
|
||||
log_info "=== BATCH COMPLETE: ${DONE} processed, $((DONE - FAILED)) OK, ${FAILED} failed ==="
|
||||
Executable
+134
@@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
EPISODES=(88 89 55 56 133 57 58 59 60 61 62 63 138 140 64)
|
||||
SPLIT_MARKER="$(jq -r '.tts_split_marker' "${PROJECT_ROOT}/config/audio-settings.json")"
|
||||
MAX_WORDS=1000
|
||||
RAW_DIR="${PROJECT_ROOT}/Episodios/Audios/raw"
|
||||
TOTAL=${#EPISODES[@]}
|
||||
DONE=0
|
||||
FAILED=0
|
||||
|
||||
mkdir -p "$RAW_DIR"
|
||||
|
||||
split_and_generate() {
|
||||
local ep_num="$1"
|
||||
local ep_pad
|
||||
ep_pad="$(pad_number "$ep_num")"
|
||||
|
||||
# Find script file
|
||||
local script_file=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Episodio_${ep_pad}_"*.txt; do
|
||||
[[ -f "$f" ]] && script_file="$f" && break
|
||||
done
|
||||
[[ -z "$script_file" ]] && { log_error "EP${ep_pad}: No script found"; return 1; }
|
||||
|
||||
local total_words
|
||||
total_words="$(wc -w < "$script_file")"
|
||||
log_info "EP${ep_pad}: ${total_words} words from $(basename "$script_file")"
|
||||
|
||||
# Check if final MP3 already exists
|
||||
local existing_mp3
|
||||
existing_mp3="$(ls "${PROJECT_ROOT}/Episodios/Audios/final/ep_${ep_pad}_"*.mp3 2>/dev/null | head -1)"
|
||||
if [[ -n "$existing_mp3" ]]; then
|
||||
log_info "EP${ep_pad}: Final MP3 already exists, SKIPPING"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Split
|
||||
local tmp_dir
|
||||
tmp_dir="$(mktemp -d)"
|
||||
|
||||
local marker_line
|
||||
marker_line="$(grep -nE "$SPLIT_MARKER" "$script_file" | head -1 | cut -d: -f1 || echo "")"
|
||||
|
||||
if [[ -n "$marker_line" && "$marker_line" -gt 1 ]]; then
|
||||
head -n "$((marker_line - 1))" "$script_file" > "${tmp_dir}/corpo.txt"
|
||||
tail -n "+${marker_line}" "$script_file" > "${tmp_dir}/faq.txt"
|
||||
else
|
||||
cp "$script_file" "${tmp_dir}/corpo.txt"
|
||||
: > "${tmp_dir}/faq.txt"
|
||||
fi
|
||||
|
||||
local corpo_words
|
||||
corpo_words="$(wc -w < "${tmp_dir}/corpo.txt")"
|
||||
|
||||
local num_parts=0
|
||||
if [[ "$corpo_words" -gt "$MAX_WORDS" ]]; then
|
||||
local corpo_lines mid check line split_line
|
||||
corpo_lines="$(wc -l < "${tmp_dir}/corpo.txt")"
|
||||
mid=$((corpo_lines / 2))
|
||||
split_line="$mid"
|
||||
for offset in 0 1 -1 2 -2 3 -3 5 -5 10 -10; do
|
||||
check=$((mid + offset))
|
||||
if [[ "$check" -gt 0 && "$check" -lt "$corpo_lines" ]]; then
|
||||
line="$(sed -n "${check}p" "${tmp_dir}/corpo.txt")"
|
||||
if [[ -z "$line" || "$line" =~ ^[[:space:]]*$ ]]; then
|
||||
split_line="$check"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
head -n "$split_line" "${tmp_dir}/corpo.txt" > "${tmp_dir}/part_1.txt"
|
||||
tail -n "+$((split_line + 1))" "${tmp_dir}/corpo.txt" > "${tmp_dir}/part_2.txt"
|
||||
if [[ -s "${tmp_dir}/faq.txt" ]]; then
|
||||
cp "${tmp_dir}/faq.txt" "${tmp_dir}/part_3.txt"
|
||||
num_parts=3
|
||||
else
|
||||
num_parts=2
|
||||
fi
|
||||
else
|
||||
cp "${tmp_dir}/corpo.txt" "${tmp_dir}/part_1.txt"
|
||||
if [[ -s "${tmp_dir}/faq.txt" ]]; then
|
||||
cp "${tmp_dir}/faq.txt" "${tmp_dir}/part_2.txt"
|
||||
num_parts=2
|
||||
else
|
||||
num_parts=1
|
||||
fi
|
||||
fi
|
||||
|
||||
log_info "EP${ep_pad}: Split into ${num_parts} parts"
|
||||
|
||||
# Generate TTS for each part (skip if raw already exists)
|
||||
local raw_files=()
|
||||
for ((i=1; i<=num_parts; i++)); do
|
||||
local part_file="${tmp_dir}/part_${i}.txt"
|
||||
local output="${RAW_DIR}/ep_${ep_pad}_p${i}.wav"
|
||||
local part_words
|
||||
part_words="$(wc -w < "$part_file")"
|
||||
|
||||
if [[ -f "$output" ]]; then
|
||||
log_info "EP${ep_pad}: Part ${i}/${num_parts} already exists (${part_words} words), skipping"
|
||||
else
|
||||
log_info "EP${ep_pad}: Generating part ${i}/${num_parts} (${part_words} words)..."
|
||||
python3 "${SCRIPT_DIR}/tts-single-part.py" "$part_file" "$output"
|
||||
log_info "EP${ep_pad}: Part ${i} done"
|
||||
fi
|
||||
raw_files+=("$output")
|
||||
done
|
||||
|
||||
# Post-produce
|
||||
log_info "EP${ep_pad}: Post-producing..."
|
||||
bash "${SCRIPT_DIR}/post-produce.sh" "$ep_num" "${raw_files[@]}"
|
||||
log_info "EP${ep_pad}: COMPLETE"
|
||||
|
||||
rm -rf "$tmp_dir"
|
||||
}
|
||||
|
||||
log_info "=== BATCH TTS: ${TOTAL} episodios ==="
|
||||
|
||||
for EP in "${EPISODES[@]}"; do
|
||||
DONE=$((DONE + 1))
|
||||
log_info "--- [${DONE}/${TOTAL}] EP$(pad_number "$EP") ---"
|
||||
if split_and_generate "$EP"; then
|
||||
log_info "EP$(pad_number "$EP"): OK"
|
||||
else
|
||||
log_error "EP$(pad_number "$EP"): FAILED"
|
||||
FAILED=$((FAILED + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
log_info "=== BATCH COMPLETE: ${TOTAL} processed, $((TOTAL - FAILED)) OK, ${FAILED} failed ==="
|
||||
Executable
+168
@@ -0,0 +1,168 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <episode_number>"
|
||||
echo " Generates TTS audio via Gemini API for the given episode."
|
||||
echo " Requires GEMINI_API_KEY environment variable."
|
||||
echo " Splits text into blocks if needed (max ~1000 words per block)."
|
||||
exit 1
|
||||
}
|
||||
|
||||
[[ $# -lt 1 ]] && usage
|
||||
EP_NUM="$1"
|
||||
EP_PAD="$(pad_number "$EP_NUM")"
|
||||
|
||||
ensure_state_file
|
||||
|
||||
# Check API key
|
||||
if [[ -z "${GEMINI_API_KEY:-}" ]]; then
|
||||
log_error "GEMINI_API_KEY not set. Export it first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Load settings
|
||||
SETTINGS_FILE="${PROJECT_ROOT}/config/audio-settings.json"
|
||||
TTS_MODEL="$(jq -r '.tts_model' "$SETTINGS_FILE")"
|
||||
TTS_VOICE="$(jq -r '.tts_voice' "$SETTINGS_FILE")"
|
||||
MAX_WORDS="$(jq -r '.tts_max_words_per_block' "$SETTINGS_FILE")"
|
||||
SPLIT_MARKER="$(jq -r '.tts_split_marker' "$SETTINGS_FILE")"
|
||||
|
||||
# Find script file
|
||||
SCRIPT_FILE=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*.txt; do
|
||||
if [[ -f "$f" ]]; then
|
||||
SCRIPT_FILE="$f"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ -z "$SCRIPT_FILE" ]]; then
|
||||
log_error "EP${EP_PAD}: No script .txt found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
TOTAL_WORDS="$(wc -w < "$SCRIPT_FILE")"
|
||||
log_info "EP${EP_PAD}: Script has ${TOTAL_WORDS} words, max per block: ${MAX_WORDS}"
|
||||
|
||||
# Output directory
|
||||
RAW_DIR="${PROJECT_ROOT}/Episodios/Audios/raw"
|
||||
mkdir -p "$RAW_DIR"
|
||||
|
||||
# Split if needed
|
||||
TMP_DIR="$(mktemp -d)"
|
||||
trap 'rm -rf "$TMP_DIR"' EXIT
|
||||
|
||||
if [[ "$TOTAL_WORDS" -le "$MAX_WORDS" ]]; then
|
||||
cp "$SCRIPT_FILE" "${TMP_DIR}/part_1.txt"
|
||||
NUM_PARTS=1
|
||||
else
|
||||
# Smart split: first at FAQ marker, then subdivide large parts
|
||||
MARKER_LINE="$(grep -nE "$SPLIT_MARKER" "$SCRIPT_FILE" | head -1 | cut -d: -f1 || echo "")"
|
||||
|
||||
if [[ -n "$MARKER_LINE" && "$MARKER_LINE" -gt 1 ]]; then
|
||||
head -n "$((MARKER_LINE - 1))" "$SCRIPT_FILE" > "${TMP_DIR}/corpo.txt"
|
||||
tail -n "+${MARKER_LINE}" "$SCRIPT_FILE" > "${TMP_DIR}/faq.txt"
|
||||
else
|
||||
cp "$SCRIPT_FILE" "${TMP_DIR}/corpo.txt"
|
||||
: > "${TMP_DIR}/faq.txt"
|
||||
fi
|
||||
|
||||
# Subdivide corpo if too long
|
||||
CORPO_WORDS="$(wc -w < "${TMP_DIR}/corpo.txt")"
|
||||
if [[ "$CORPO_WORDS" -gt "$MAX_WORDS" ]]; then
|
||||
CORPO_LINES="$(wc -l < "${TMP_DIR}/corpo.txt")"
|
||||
MID=$((CORPO_LINES / 2))
|
||||
# Find nearest paragraph break
|
||||
SPLIT_LINE="$MID"
|
||||
for offset in 0 1 -1 2 -2 3 -3 5 -5 10 -10; do
|
||||
CHECK=$((MID + offset))
|
||||
if [[ "$CHECK" -gt 0 && "$CHECK" -lt "$CORPO_LINES" ]]; then
|
||||
LINE="$(sed -n "${CHECK}p" "${TMP_DIR}/corpo.txt")"
|
||||
if [[ -z "$LINE" || "$LINE" =~ ^[[:space:]]*$ ]]; then
|
||||
SPLIT_LINE="$CHECK"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
head -n "$SPLIT_LINE" "${TMP_DIR}/corpo.txt" > "${TMP_DIR}/part_1.txt"
|
||||
tail -n "+$((SPLIT_LINE + 1))" "${TMP_DIR}/corpo.txt" > "${TMP_DIR}/part_2.txt"
|
||||
if [[ -s "${TMP_DIR}/faq.txt" ]]; then
|
||||
cp "${TMP_DIR}/faq.txt" "${TMP_DIR}/part_3.txt"
|
||||
NUM_PARTS=3
|
||||
else
|
||||
NUM_PARTS=2
|
||||
fi
|
||||
else
|
||||
cp "${TMP_DIR}/corpo.txt" "${TMP_DIR}/part_1.txt"
|
||||
if [[ -s "${TMP_DIR}/faq.txt" ]]; then
|
||||
cp "${TMP_DIR}/faq.txt" "${TMP_DIR}/part_2.txt"
|
||||
NUM_PARTS=2
|
||||
else
|
||||
NUM_PARTS=1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
log_info "EP${EP_PAD}: Split into ${NUM_PARTS} parts"
|
||||
|
||||
# TTS style prompt
|
||||
STYLE="Lê este texto em português de Portugal (PT-PT), com um tom enérgico, confiante, educativo, inspirador e profissional. Mantém o ritmo natural e envolvente, como se estivesses a conversar diretamente com o ouvinte, transmitindo proximidade e autoridade. Faz pequenas pausas para dar ênfase às ideias-chave e assegura que cada transição entre temas é fluida. Evita soar robótico ou demasiado formal; o objetivo é informar, motivar e criar ligação com quem está a ouvir."
|
||||
|
||||
# Generate each part
|
||||
AUDIO_FILES=()
|
||||
for ((i=1; i<=NUM_PARTS; i++)); do
|
||||
PART_FILE="${TMP_DIR}/part_${i}.txt"
|
||||
PART_TEXT="$(cat "$PART_FILE")"
|
||||
PART_WORDS="$(wc -w < "$PART_FILE")"
|
||||
OUTPUT="${RAW_DIR}/ep_${EP_PAD}_p${i}.wav"
|
||||
|
||||
log_info "EP${EP_PAD}: Generating part ${i}/${NUM_PARTS} (${PART_WORDS} words)..."
|
||||
|
||||
python3 -c "
|
||||
import wave, sys
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
|
||||
client = genai.Client(api_key='${GEMINI_API_KEY}')
|
||||
with open('${PART_FILE}', 'r') as f:
|
||||
text = f.read()
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='${TTS_MODEL}',
|
||||
contents='''${STYLE}''' + '\n\n' + text,
|
||||
config=types.GenerateContentConfig(
|
||||
response_modalities=['AUDIO'],
|
||||
speech_config=types.SpeechConfig(
|
||||
voice_config=types.VoiceConfig(
|
||||
prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name='${TTS_VOICE}')
|
||||
)
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
data = response.candidates[0].content.parts[0].inline_data.data
|
||||
with wave.open('${OUTPUT}', 'wb') as wf:
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2)
|
||||
wf.setframerate(24000)
|
||||
wf.writeframes(data)
|
||||
|
||||
duration = (len(data) // 2) / 24000
|
||||
print(f'{duration:.0f}')
|
||||
"
|
||||
|
||||
DURATION="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$OUTPUT")"
|
||||
log_info "EP${EP_PAD}: Part ${i} done: ${DURATION%.*}s"
|
||||
AUDIO_FILES+=("$OUTPUT")
|
||||
done
|
||||
|
||||
# Update state
|
||||
set_episode_field "$EP_NUM" "status" "audio_done"
|
||||
|
||||
# Print output files for post-produce.sh
|
||||
log_info "EP${EP_PAD}: All ${NUM_PARTS} parts generated. Run post-production:"
|
||||
echo "bash scripts/post-produce.sh ${EP_NUM} ${AUDIO_FILES[*]}"
|
||||
Executable
+160
@@ -0,0 +1,160 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <episode_number>"
|
||||
echo " Generates podcast script and WP description for the given episode."
|
||||
echo " Reads episode data from config/episode-guide-map.json."
|
||||
echo " Skips script generation if .txt already exists."
|
||||
exit 1
|
||||
}
|
||||
|
||||
[[ $# -lt 1 ]] && usage
|
||||
EP_NUM="$1"
|
||||
EP_PAD="$(pad_number "$EP_NUM")"
|
||||
|
||||
ensure_state_file
|
||||
|
||||
# Load episode data from map
|
||||
MAP_FILE="${PROJECT_ROOT}/config/episode-guide-map.json"
|
||||
EP_DATA="$(jq -r --arg n "$EP_NUM" '.[$n] // empty' "$MAP_FILE")"
|
||||
if [[ -z "$EP_DATA" ]]; then
|
||||
log_error "Episode $EP_NUM not found in episode-guide-map.json"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
GUIDE_TITLE="$(echo "$EP_DATA" | jq -r '.guide_title')"
|
||||
PODCAST_TITLE="$(echo "$EP_DATA" | jq -r '.podcast_title')"
|
||||
GUIDE_URL="$(echo "$EP_DATA" | jq -r '.guide_url')"
|
||||
SAFE_TITLE="$(echo "$PODCAST_TITLE" | sed 's/[^a-zA-Z0-9]/_/g' | sed 's/__*/_/g' | sed 's/_$//')"
|
||||
|
||||
SCRIPT_FILE="${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_${SAFE_TITLE}.txt"
|
||||
WP_FILE="${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_${SAFE_TITLE}_wp.json"
|
||||
|
||||
# Check if script already exists (reuse existing .txt files)
|
||||
EXISTING_SCRIPT=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*.txt; do
|
||||
if [[ -f "$f" ]]; then
|
||||
EXISTING_SCRIPT="$f"
|
||||
SCRIPT_FILE="$f"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
SERVICES="$(cat "${PROJECT_ROOT}/config/service-links.json")"
|
||||
SCRIPT_PROMPT="$(cat "${PROJECT_ROOT}/prompts/generate-script.md")"
|
||||
WP_PROMPT="$(cat "${PROJECT_ROOT}/prompts/generate-wp-description.md")"
|
||||
|
||||
# Step 1: Generate script (or skip if exists)
|
||||
if [[ -n "$EXISTING_SCRIPT" ]]; then
|
||||
log_info "EP${EP_PAD}: Script already exists at ${EXISTING_SCRIPT}, skipping generation"
|
||||
else
|
||||
log_info "EP${EP_PAD}: Generating podcast script for '${PODCAST_TITLE}'"
|
||||
|
||||
FULL_PROMPT="${SCRIPT_PROMPT}
|
||||
|
||||
---
|
||||
|
||||
## Dados do episodio
|
||||
- Numero: ${EP_NUM}
|
||||
- Titulo: ${PODCAST_TITLE}
|
||||
- Guia de referencia: ${GUIDE_TITLE}
|
||||
- URL do guia: ${GUIDE_URL}
|
||||
|
||||
## Links de servicos Descomplicar (usar quando contextual)
|
||||
${SERVICES}
|
||||
|
||||
## Instrucao
|
||||
Gera o guiao completo seguindo a estrutura obrigatoria acima. Output apenas o texto limpo."
|
||||
|
||||
echo "$FULL_PROMPT" | claude --print > "$SCRIPT_FILE"
|
||||
log_info "EP${EP_PAD}: Script saved to ${SCRIPT_FILE}"
|
||||
fi
|
||||
|
||||
# Step 2: Generate WP description
|
||||
log_info "EP${EP_PAD}: Generating WordPress description"
|
||||
|
||||
SCRIPT_CONTENT="$(cat "$SCRIPT_FILE")"
|
||||
WP_FULL_PROMPT="${WP_PROMPT}
|
||||
|
||||
---
|
||||
|
||||
## Dados do episodio
|
||||
- Numero: ${EP_NUM}
|
||||
- Titulo: ${PODCAST_TITLE}
|
||||
- URL do guia relacionado: ${GUIDE_URL}
|
||||
|
||||
## Links de servicos Descomplicar (USAR APENAS ESTES — nunca inventar)
|
||||
${SERVICES}
|
||||
|
||||
## Guiao do episodio (base para a descricao)
|
||||
${SCRIPT_CONTENT}
|
||||
|
||||
## Instrucao
|
||||
Gera o JSON com a descricao WordPress completa. Output APENAS JSON valido."
|
||||
|
||||
echo "$WP_FULL_PROMPT" | claude --print > "$WP_FILE"
|
||||
|
||||
# Validate and auto-repair JSON output
|
||||
if ! python3 -c "import json; json.load(open('$WP_FILE'))" 2>/dev/null; then
|
||||
log_warn "EP${EP_PAD}: WP JSON inválido — a tentar reparação automática..."
|
||||
python3 - "$WP_FILE" << 'PYFIX'
|
||||
import sys, re, json
|
||||
|
||||
filepath = sys.argv[1]
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
raw = f.read()
|
||||
|
||||
# Encontrar e corrigir aspas não escapadas no content_html
|
||||
marker = '"content_html": "'
|
||||
start = raw.find(marker)
|
||||
if start == -1:
|
||||
print("SKIP: content_html não encontrado")
|
||||
sys.exit(1)
|
||||
|
||||
content_start = start + len(marker)
|
||||
end_pattern = re.search(r'",\s*\n\s*"hashtags"', raw[content_start:])
|
||||
if not end_pattern:
|
||||
end_pattern = re.search(r'",\s*\n\s*"wp_tags"', raw[content_start:])
|
||||
|
||||
if not end_pattern:
|
||||
print("SKIP: fim do content_html não encontrado")
|
||||
sys.exit(1)
|
||||
|
||||
content_end = content_start + end_pattern.start()
|
||||
raw_content = raw[content_start:content_end]
|
||||
fixed_content = re.sub(r'(?<!\\)"', '\\"', raw_content)
|
||||
fixed_raw = raw[:content_start] + fixed_content + raw[content_end:]
|
||||
|
||||
try:
|
||||
json.loads(fixed_raw)
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
f.write(fixed_raw)
|
||||
print("JSON reparado com sucesso")
|
||||
except Exception as e:
|
||||
print(f"Reparação falhou: {e}")
|
||||
sys.exit(1)
|
||||
PYFIX
|
||||
|
||||
if python3 -c "import json; json.load(open('$WP_FILE'))" 2>/dev/null; then
|
||||
log_info "EP${EP_PAD}: JSON reparado automaticamente"
|
||||
else
|
||||
log_error "EP${EP_PAD}: JSON inválido após reparação — verificação manual necessária"
|
||||
fi
|
||||
else
|
||||
log_info "EP${EP_PAD}: WP description saved to ${WP_FILE}"
|
||||
fi
|
||||
|
||||
# Update state
|
||||
STATUS="$(get_episode_status "$EP_NUM")"
|
||||
if [[ "$STATUS" == "not_found" ]]; then
|
||||
add_episode "$EP_NUM" "$PODCAST_TITLE" "$GUIDE_URL"
|
||||
fi
|
||||
set_episode_field "$EP_NUM" "status" "script_done"
|
||||
set_episode_field "$EP_NUM" "script_path" "$(basename "$SCRIPT_FILE")"
|
||||
set_episode_field "$EP_NUM" "wp_data_path" "$(basename "$WP_FILE")"
|
||||
|
||||
log_info "EP${EP_PAD}: Content generation complete"
|
||||
Executable
+48
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 [start_episode] [count]"
|
||||
echo " Generates canva-bulk.csv for cover image generation."
|
||||
echo " Default: next 7 episodes from pipeline state."
|
||||
exit 1
|
||||
}
|
||||
|
||||
ensure_state_file
|
||||
|
||||
START="${1:-$(jq -r '.next_episode' "$STATE_FILE")}"
|
||||
COUNT="${2:-7}"
|
||||
END=$((START + COUNT - 1))
|
||||
|
||||
MAP_FILE="${PROJECT_ROOT}/config/episode-guide-map.json"
|
||||
CSV_FILE="${PROJECT_ROOT}/canva-bulk.csv"
|
||||
|
||||
echo "number,title,keyword" > "$CSV_FILE"
|
||||
|
||||
for ((ep=START; ep<=END; ep++)); do
|
||||
EP_DATA="$(jq -r --arg n "$ep" '.[$n] // empty' "$MAP_FILE")"
|
||||
if [[ -z "$EP_DATA" ]]; then
|
||||
log_warn "Episode $ep not found in map, skipping"
|
||||
continue
|
||||
fi
|
||||
TITLE="$(echo "$EP_DATA" | jq -r '.podcast_title')"
|
||||
|
||||
# Try to get keyword from WP JSON if it exists
|
||||
EP_PAD="$(pad_number "$ep")"
|
||||
KEYWORD=""
|
||||
for wp_file in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*_wp.json; do
|
||||
if [[ -f "$wp_file" ]]; then
|
||||
KEYWORD="$(jq -r '.keyword // ""' "$wp_file" 2>/dev/null || echo "")"
|
||||
break
|
||||
fi
|
||||
done
|
||||
[[ -z "$KEYWORD" ]] && KEYWORD="$TITLE"
|
||||
|
||||
echo "${ep},\"${TITLE}\",\"${KEYWORD}\"" >> "$CSV_FILE"
|
||||
done
|
||||
|
||||
log_info "Generated Canva CSV: ${CSV_FILE} (episodes ${START}-${END})"
|
||||
echo "CSV saved to: ${CSV_FILE}"
|
||||
Executable
+71
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
PROJECT_ROOT="/media/ealmeida/Dados/Hub/05-Projectos/Podcast-Descomplicar-Digital"
|
||||
STATE_FILE="${PROJECT_ROOT}/pipeline-state.json"
|
||||
LOG_DIR="${PROJECT_ROOT}/logs"
|
||||
|
||||
# Auto-load .env if present
|
||||
if [[ -f "${PROJECT_ROOT}/.env" ]]; then
|
||||
set -a
|
||||
source "${PROJECT_ROOT}/.env"
|
||||
set +a
|
||||
fi
|
||||
|
||||
log() {
|
||||
local level="$1"; shift
|
||||
local msg="$*"
|
||||
local ts
|
||||
ts="$(date '+%Y-%m-%d %H:%M:%S')"
|
||||
mkdir -p "${LOG_DIR}"
|
||||
echo "[${ts}] [${level}] ${msg}" | tee -a "${LOG_DIR}/pipeline-$(date '+%Y-%m-%d').log"
|
||||
}
|
||||
|
||||
log_info() { log "INFO" "$@"; }
|
||||
log_warn() { log "WARN" "$@"; }
|
||||
log_error() { log "ERROR" "$@"; }
|
||||
|
||||
ensure_state_file() {
|
||||
if [[ ! -f "${STATE_FILE}" ]]; then
|
||||
echo '{"last_updated":"","next_episode":20,"next_publish_date":"","publish_time":"07:00","publish_days":["mon","tue","wed","thu","fri"],"episodes":[]}' | jq '.' > "${STATE_FILE}"
|
||||
log_info "Created new pipeline-state.json"
|
||||
fi
|
||||
}
|
||||
|
||||
get_episode_status() {
|
||||
local ep_num="$1"
|
||||
jq -r --argjson n "$ep_num" '.episodes[] | select(.number == $n) | .status // "not_found"' "${STATE_FILE}" 2>/dev/null || echo "not_found"
|
||||
}
|
||||
|
||||
set_episode_field() {
|
||||
local ep_num="$1" field="$2" value="$3"
|
||||
local tmp
|
||||
tmp="$(mktemp)"
|
||||
jq --argjson n "$ep_num" --arg f "$field" --arg v "$value" \
|
||||
'(.episodes[] | select(.number == $n))[$f] = $v | .last_updated = (now | todate)' \
|
||||
"${STATE_FILE}" > "$tmp" && mv "$tmp" "${STATE_FILE}"
|
||||
}
|
||||
|
||||
add_episode() {
|
||||
local ep_num="$1" title="$2" guide_url="$3"
|
||||
local tmp
|
||||
tmp="$(mktemp)"
|
||||
jq --argjson n "$ep_num" --arg t "$title" --arg g "$guide_url" \
|
||||
'.episodes += [{"number":$n,"title":$t,"source_guide_url":$g,"status":"pending","script_path":null,"wp_data_path":null,"audio_raw_path":null,"audio_final_path":null,"cover_path":null,"wp_post_id":null,"scheduled_date":null}] | .last_updated = (now | todate)' \
|
||||
"${STATE_FILE}" > "$tmp" && mv "$tmp" "${STATE_FILE}"
|
||||
}
|
||||
|
||||
next_weekday() {
|
||||
local base_date="$1"
|
||||
local d
|
||||
d="$(date -d "${base_date}" '+%u')"
|
||||
if [[ "$d" -ge 5 ]]; then
|
||||
date -d "${base_date} + $((8 - d)) days" '+%Y-%m-%d'
|
||||
else
|
||||
date -d "${base_date} + 1 day" '+%Y-%m-%d'
|
||||
fi
|
||||
}
|
||||
|
||||
pad_number() {
|
||||
printf '%03d' "$1"
|
||||
}
|
||||
Executable
+154
@@ -0,0 +1,154 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <episode_number> <raw_audio_path> [raw_audio_path_2 ...]"
|
||||
echo " Applies intro, outro, and loudness normalization to raw TTS audio."
|
||||
echo " Accepts multiple audio parts that will be concatenated in order."
|
||||
exit 1
|
||||
}
|
||||
|
||||
[[ $# -lt 2 ]] && usage
|
||||
EP_NUM="$1"; shift
|
||||
RAW_PARTS=("$@")
|
||||
EP_PAD="$(pad_number "$EP_NUM")"
|
||||
|
||||
ensure_state_file
|
||||
|
||||
for f in "${RAW_PARTS[@]}"; do
|
||||
if [[ ! -f "$f" ]]; then
|
||||
log_error "EP${EP_PAD}: Raw audio file not found: ${f}"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Load audio settings
|
||||
SETTINGS_FILE="${PROJECT_ROOT}/config/audio-settings.json"
|
||||
INTRO_FILE="${PROJECT_ROOT}/$(jq -r '.intro_file' "$SETTINGS_FILE")"
|
||||
OUTRO_FILE="${PROJECT_ROOT}/$(jq -r '.outro_file' "$SETTINGS_FILE")"
|
||||
FADE_IN="$(jq -r '.fade_in_duration' "$SETTINGS_FILE")"
|
||||
BG_DUR="$(jq -r '.background_duration' "$SETTINGS_FILE")"
|
||||
BG_VOL="$(jq -r '.background_volume_db' "$SETTINGS_FILE")"
|
||||
FADE_OUT="$(jq -r '.fade_out_duration' "$SETTINGS_FILE")"
|
||||
LUFS="$(jq -r '.loudness_target_lufs' "$SETTINGS_FILE")"
|
||||
BITRATE="$(jq -r '.export_bitrate' "$SETTINGS_FILE")"
|
||||
SAMPLE_RATE="$(jq -r '.export_sample_rate' "$SETTINGS_FILE")"
|
||||
MIN_DUR="$(jq -r '.min_duration_minutes' "$SETTINGS_FILE")"
|
||||
MAX_DUR="$(jq -r '.max_duration_minutes' "$SETTINGS_FILE")"
|
||||
|
||||
# Verify input files
|
||||
for f in "$INTRO_FILE" "$OUTRO_FILE"; do
|
||||
if [[ ! -f "$f" ]]; then
|
||||
log_error "EP${EP_PAD}: Required audio file not found: ${f}"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Create output directory
|
||||
OUTPUT_DIR="${PROJECT_ROOT}/Episodios/Audios/final"
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
# Get podcast title for filename
|
||||
MAP_FILE="${PROJECT_ROOT}/config/episode-guide-map.json"
|
||||
PODCAST_TITLE="$(jq -r --arg n "$EP_NUM" '.[$n].podcast_title // "episodio"' "$MAP_FILE")"
|
||||
SAFE_TITLE="$(echo "$PODCAST_TITLE" | sed 's/[^a-zA-Z0-9]/-/g' | sed 's/--*/-/g' | sed 's/-$//' | tr '[:upper:]' '[:lower:]')"
|
||||
OUTPUT_FILE="${OUTPUT_DIR}/ep_${EP_PAD}_${SAFE_TITLE}.mp3"
|
||||
|
||||
# Temp directory
|
||||
TMP_DIR="$(mktemp -d)"
|
||||
trap 'rm -rf "$TMP_DIR"' EXIT
|
||||
|
||||
log_info "EP${EP_PAD}: Starting post-production"
|
||||
|
||||
# Concatenate raw audio parts if multiple
|
||||
if [[ "${#RAW_PARTS[@]}" -gt 1 ]]; then
|
||||
log_info "EP${EP_PAD}: Concatenating ${#RAW_PARTS[@]} audio parts"
|
||||
CONCAT_LIST="${TMP_DIR}/concat_list.txt"
|
||||
for part in "${RAW_PARTS[@]}"; do
|
||||
# Normalize each part to same format first
|
||||
PART_BASE="$(basename "$part" | sed 's/\.[^.]*$//')"
|
||||
ffmpeg -y -v quiet -i "$part" -af "highshelf=f=4000:g=-6" -ar "$SAMPLE_RATE" -ac 2 "${TMP_DIR}/${PART_BASE}_norm.wav"
|
||||
echo "file '${TMP_DIR}/${PART_BASE}_norm.wav'" >> "$CONCAT_LIST"
|
||||
done
|
||||
ffmpeg -y -v quiet -f concat -safe 0 -i "$CONCAT_LIST" -c copy "${TMP_DIR}/raw_combined.wav"
|
||||
RAW_COMBINED="${TMP_DIR}/raw_combined.wav"
|
||||
else
|
||||
RAW_COMBINED="${RAW_PARTS[0]}"
|
||||
fi
|
||||
|
||||
# Get duration of raw audio
|
||||
RAW_DUR="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$RAW_COMBINED")"
|
||||
RAW_DUR_INT="${RAW_DUR%.*}"
|
||||
log_info "EP${EP_PAD}: Raw audio duration: ${RAW_DUR_INT}s"
|
||||
|
||||
# Create intro background: The Inspiring at low volume for first 30s
|
||||
ffmpeg -y -v quiet \
|
||||
-i "$INTRO_FILE" \
|
||||
-af "afade=t=in:st=0:d=${FADE_IN},volume=${BG_VOL}dB,afade=t=out:st=$((BG_DUR - 3)):d=3" \
|
||||
-t "$BG_DUR" \
|
||||
-ar "$SAMPLE_RATE" -ac 2 \
|
||||
"${TMP_DIR}/intro_bg.wav"
|
||||
|
||||
# Prepare raw audio (ensure stereo, correct sample rate, de-ess)
|
||||
ffmpeg -y -v quiet \
|
||||
-i "$RAW_COMBINED" \
|
||||
-af "highshelf=f=4000:g=-6" \
|
||||
-ar "$SAMPLE_RATE" -ac 2 \
|
||||
"${TMP_DIR}/tts_stereo.wav"
|
||||
|
||||
# Mix intro background with beginning of TTS
|
||||
ffmpeg -y -v quiet \
|
||||
-i "${TMP_DIR}/tts_stereo.wav" \
|
||||
-i "${TMP_DIR}/intro_bg.wav" \
|
||||
-filter_complex "[1]apad=whole_dur=${RAW_DUR_INT}[bg];[0][bg]amix=inputs=2:duration=first:dropout_transition=3[mixed]" \
|
||||
-map "[mixed]" \
|
||||
-ar "$SAMPLE_RATE" -ac 2 \
|
||||
"${TMP_DIR}/tts_with_intro.wav"
|
||||
|
||||
# Get outro duration
|
||||
OUTRO_DUR="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$OUTRO_FILE")"
|
||||
OUTRO_DUR_INT="${OUTRO_DUR%.*}"
|
||||
|
||||
# Create ending: outro audio + The Inspiring fade-out underneath
|
||||
ffmpeg -y -v quiet \
|
||||
-i "$OUTRO_FILE" \
|
||||
-i "$INTRO_FILE" \
|
||||
-filter_complex "[1]volume=${BG_VOL}dB,afade=t=out:st=$((OUTRO_DUR_INT - FADE_OUT)):d=${FADE_OUT},atrim=0:${OUTRO_DUR_INT}[music];[0][music]amix=inputs=2:duration=first[out]" \
|
||||
-map "[out]" \
|
||||
-ar "$SAMPLE_RATE" -ac 2 \
|
||||
"${TMP_DIR}/outro_mixed.wav"
|
||||
|
||||
# Concatenate TTS (with intro) + outro
|
||||
ffmpeg -y -v quiet \
|
||||
-i "${TMP_DIR}/tts_with_intro.wav" \
|
||||
-i "${TMP_DIR}/outro_mixed.wav" \
|
||||
-filter_complex "[0][1]concat=n=2:v=0:a=1[out]" \
|
||||
-map "[out]" \
|
||||
-ar "$SAMPLE_RATE" -ac 2 \
|
||||
"${TMP_DIR}/full_episode.wav"
|
||||
|
||||
# Normalize loudness and export as MP3
|
||||
ffmpeg -y -v quiet \
|
||||
-i "${TMP_DIR}/full_episode.wav" \
|
||||
-af "loudnorm=I=${LUFS}:TP=-1.5:LRA=11" \
|
||||
-ar "$SAMPLE_RATE" -ac 2 \
|
||||
-b:a "$BITRATE" \
|
||||
"$OUTPUT_FILE"
|
||||
|
||||
# Validate duration
|
||||
FINAL_DUR="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$OUTPUT_FILE")"
|
||||
FINAL_MIN="$(echo "${FINAL_DUR%.*} / 60" | bc)"
|
||||
log_info "EP${EP_PAD}: Final duration: ${FINAL_MIN} minutes (${FINAL_DUR%.*}s)"
|
||||
|
||||
if [[ "$FINAL_MIN" -lt "$MIN_DUR" ]] || [[ "$FINAL_MIN" -gt "$MAX_DUR" ]]; then
|
||||
log_warn "EP${EP_PAD}: Duration ${FINAL_MIN}min is outside expected range (${MIN_DUR}-${MAX_DUR}min)"
|
||||
fi
|
||||
|
||||
# Update state
|
||||
set_episode_field "$EP_NUM" "status" "produced"
|
||||
set_episode_field "$EP_NUM" "audio_final_path" "$(basename "$OUTPUT_FILE")"
|
||||
|
||||
log_info "EP${EP_PAD}: Post-production complete -> ${OUTPUT_FILE}"
|
||||
Executable
+140
@@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <episode_number> <scheduled_date>"
|
||||
echo " Publishes episode to WordPress via wp-cli over SSH."
|
||||
echo " scheduled_date format: YYYY-MM-DD"
|
||||
echo " Requires: final audio, WP JSON, cover image (optional)."
|
||||
exit 1
|
||||
}
|
||||
|
||||
[[ $# -lt 2 ]] && usage
|
||||
EP_NUM="$1"
|
||||
SCHED_DATE="$2"
|
||||
EP_PAD="$(pad_number "$EP_NUM")"
|
||||
PUBLISH_TIME="$(jq -r '.publish_time' "$STATE_FILE")"
|
||||
|
||||
ensure_state_file
|
||||
|
||||
# Find required files
|
||||
AUDIO_FILE=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Audios/final/ep_${EP_PAD}_"*.mp3; do
|
||||
[[ -f "$f" ]] && AUDIO_FILE="$f" && break
|
||||
done
|
||||
|
||||
WP_JSON=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*_wp.json; do
|
||||
[[ -f "$f" ]] && WP_JSON="$f" && break
|
||||
done
|
||||
|
||||
COVER_FILE=""
|
||||
while IFS= read -r -d '' f; do
|
||||
COVER_FILE="$f"
|
||||
break
|
||||
done < <(find "${PROJECT_ROOT}/Episodios/Capas_PodCast" -name "ep_${EP_PAD}_*" \( -name "*.jpg" -o -name "*.png" \) -print0 2>/dev/null)
|
||||
|
||||
# Validate required files
|
||||
if [[ -z "$AUDIO_FILE" || ! -f "$AUDIO_FILE" ]]; then
|
||||
log_error "EP${EP_PAD}: Audio file not found in Audios/final/"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "$WP_JSON" || ! -f "$WP_JSON" ]]; then
|
||||
log_error "EP${EP_PAD}: WP JSON file not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "$COVER_FILE" ]]; then
|
||||
log_warn "EP${EP_PAD}: Cover image not found, publishing without featured image"
|
||||
fi
|
||||
|
||||
# Read WP data
|
||||
TITLE="$(jq -r '.title' "$WP_JSON")"
|
||||
CONTENT="$(jq -r '.content_html' "$WP_JSON")"
|
||||
META_DESC="$(jq -r '.meta_description' "$WP_JSON")"
|
||||
TAGS="$(jq -r '.wp_tags | join(",")' "$WP_JSON")"
|
||||
|
||||
# Get audio metadata
|
||||
DURATION="$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$AUDIO_FILE")"
|
||||
DUR_MIN=$((${DURATION%.*} / 60))
|
||||
DUR_SEC=$((${DURATION%.*} % 60))
|
||||
DURATION_FMT="$(printf '%d:%02d' "$DUR_MIN" "$DUR_SEC")"
|
||||
FILESIZE="$(du -h "$AUDIO_FILE" | cut -f1)"
|
||||
FILESIZE_RAW="$(stat -c%s "$AUDIO_FILE")"
|
||||
|
||||
log_info "EP${EP_PAD}: Publishing '${TITLE}' scheduled for ${SCHED_DATE} ${PUBLISH_TIME}"
|
||||
|
||||
# Generate wp-cli commands for SSH execution
|
||||
CMDS_FILE="${PROJECT_ROOT}/logs/publish_${EP_PAD}_commands.sh"
|
||||
mkdir -p "${PROJECT_ROOT}/logs"
|
||||
|
||||
AUDIO_BASENAME="$(basename "$AUDIO_FILE")"
|
||||
YEAR="$(date -d "$SCHED_DATE" '+%Y')"
|
||||
MONTH="$(date -d "$SCHED_DATE" '+%m')"
|
||||
UPLOAD_PATH="wp-content/uploads/podcast/${YEAR}/${MONTH}"
|
||||
|
||||
# Escape single quotes in content
|
||||
ESCAPED_CONTENT="$(echo "$CONTENT" | sed "s/'/'\\\\''/g")"
|
||||
ESCAPED_META="$(echo "$META_DESC" | sed "s/'/'\\\\''/g")"
|
||||
ESCAPED_TITLE="$(echo "$TITLE" | sed "s/'/'\\\\''/g")"
|
||||
|
||||
cat > "$CMDS_FILE" << CMDEOF
|
||||
#!/usr/bin/env bash
|
||||
# Auto-generated publish commands for EP${EP_PAD}
|
||||
# Run via SSH MCP on server (user: ealmeida, path: /home/ealmeida/public_html)
|
||||
set -euo pipefail
|
||||
|
||||
WP_PATH="/home/ealmeida/public_html"
|
||||
UPLOAD_DIR="\${WP_PATH}/${UPLOAD_PATH}"
|
||||
|
||||
# 1. Create upload directory
|
||||
mkdir -p "\${UPLOAD_DIR}"
|
||||
|
||||
# 2. Audio file must be uploaded to server first (via sftp MCP)
|
||||
# Source: ${AUDIO_FILE}
|
||||
# Target: \${UPLOAD_DIR}/${AUDIO_BASENAME}
|
||||
|
||||
# 3. Create podcast post
|
||||
POST_ID=\$(wp post create \\
|
||||
--post_type=podcast \\
|
||||
--post_title='${ESCAPED_TITLE}' \\
|
||||
--post_status=future \\
|
||||
--post_date='${SCHED_DATE} ${PUBLISH_TIME}:00' \\
|
||||
--tags_input='${TAGS}' \\
|
||||
--porcelain \\
|
||||
--allow-root \\
|
||||
--path="\${WP_PATH}")
|
||||
|
||||
echo "Created post: \${POST_ID}"
|
||||
|
||||
# 4. Add content (separate to avoid shell escaping issues)
|
||||
wp post update \${POST_ID} --post_content='${ESCAPED_CONTENT}' --allow-root --path="\${WP_PATH}"
|
||||
|
||||
# 5. Set SSP meta fields
|
||||
wp post meta update \${POST_ID} episode_type audio --allow-root --path="\${WP_PATH}"
|
||||
wp post meta update \${POST_ID} audio_file "https://descomplicar.pt/${UPLOAD_PATH}/${AUDIO_BASENAME}" --allow-root --path="\${WP_PATH}"
|
||||
wp post meta update \${POST_ID} duration "${DURATION_FMT}" --allow-root --path="\${WP_PATH}"
|
||||
wp post meta update \${POST_ID} filesize "${FILESIZE}" --allow-root --path="\${WP_PATH}"
|
||||
wp post meta update \${POST_ID} filesize_raw "${FILESIZE_RAW}" --allow-root --path="\${WP_PATH}"
|
||||
|
||||
# 6. Set Rank Math meta description
|
||||
wp post meta update \${POST_ID} rank_math_description '${ESCAPED_META}' --allow-root --path="\${WP_PATH}"
|
||||
|
||||
# 7. Fix permissions
|
||||
chown -R ealmeida:ealmeida "\${UPLOAD_DIR}"
|
||||
|
||||
echo "EP${EP_PAD} published as post \${POST_ID}, scheduled for ${SCHED_DATE} ${PUBLISH_TIME}"
|
||||
CMDEOF
|
||||
|
||||
chmod +x "$CMDS_FILE"
|
||||
|
||||
# Update state
|
||||
set_episode_field "$EP_NUM" "status" "published"
|
||||
set_episode_field "$EP_NUM" "scheduled_date" "$SCHED_DATE"
|
||||
|
||||
log_info "EP${EP_PAD}: Publish commands saved to ${CMDS_FILE}"
|
||||
log_info "EP${EP_PAD}: Upload audio via SFTP, then run commands via SSH"
|
||||
Executable
+292
@@ -0,0 +1,292 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# schedule-episode.sh — Agenda episódio completo no WordPress
|
||||
# Envia MP3 + capa via SCP, importa media, cria post com todos os metas
|
||||
#
|
||||
# Uso: ./scripts/schedule-episode.sh <ep_num> <YYYY-MM-DD> [--dry-run]
|
||||
#
|
||||
# Requisitos locais:
|
||||
# - MP3 em Episodios/Audios/final/ep_NNN_*.mp3
|
||||
# - Capa em banco-media: capas-geradas/podcast/podcast-epNNN-*.png
|
||||
# - ffprobe (para duração)
|
||||
#
|
||||
# Requisitos servidor:
|
||||
# - wp-cli com --allow-root
|
||||
# - SSH porta 9443, chave ~/.ssh/id_ed25519
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
# SSH config
|
||||
SSH_KEY="${HOME}/.ssh/id_ed25519"
|
||||
SSH_PORT=9443
|
||||
SSH_HOST="server.descomplicar.pt"
|
||||
SSH_USER="root"
|
||||
SSH_OPTS="-o IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
|
||||
WP_PATH="/home/ealmeida/public_html"
|
||||
WP_OWNER="ealmeida:ealmeida"
|
||||
SITE_URL="https://descomplicar.pt"
|
||||
SERIES_SLUG="podcast-descomplicar-digital"
|
||||
|
||||
# Banco de media (capas)
|
||||
CAPAS_DIR="/media/ealmeida/Dados/Hub/06-Operacoes/Conteúdos/banco-media/capas-geradas/podcast"
|
||||
|
||||
DRY_RUN=false
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <ep_num> <YYYY-MM-DD> [--dry-run]"
|
||||
echo " Agenda episodio completo no WordPress (audio + capa + metas + SEO)"
|
||||
exit 1
|
||||
}
|
||||
|
||||
ssh_cmd() {
|
||||
SSH_AUTH_SOCK= ssh -p "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "${SSH_USER}@${SSH_HOST}" "$@"
|
||||
}
|
||||
|
||||
scp_file() {
|
||||
SSH_AUTH_SOCK= scp -P "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "$1" "${SSH_USER}@${SSH_HOST}:$2"
|
||||
}
|
||||
|
||||
[[ $# -lt 2 ]] && usage
|
||||
EP_NUM="$1"
|
||||
SCHED_DATE="$2"
|
||||
[[ "${3:-}" == "--dry-run" ]] && DRY_RUN=true
|
||||
EP_PAD="$(pad_number "$EP_NUM")"
|
||||
|
||||
# === 1. Encontrar ficheiros locais ===
|
||||
|
||||
AUDIO_FILE=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Audios/final/ep_${EP_PAD}_"*.mp3; do
|
||||
[[ -f "$f" ]] && AUDIO_FILE="$f" && break
|
||||
done
|
||||
|
||||
COVER_FILE=""
|
||||
for f in "${CAPAS_DIR}/podcast-ep${EP_PAD}-"*.png; do
|
||||
[[ -f "$f" ]] && COVER_FILE="$f" && break
|
||||
done
|
||||
|
||||
GUIDE_FILE=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*.txt; do
|
||||
[[ -f "$f" ]] && GUIDE_FILE="$f" && break
|
||||
done
|
||||
|
||||
# Validar
|
||||
[[ -z "$AUDIO_FILE" ]] && log_error "EP${EP_PAD}: MP3 nao encontrado em Episodios/Audios/final/" && exit 1
|
||||
[[ -z "$COVER_FILE" ]] && log_error "EP${EP_PAD}: Capa PNG nao encontrada em ${CAPAS_DIR}/" && exit 1
|
||||
[[ -z "$GUIDE_FILE" ]] && log_warn "EP${EP_PAD}: Guiao .txt nao encontrado (conteudo WP ficara vazio)"
|
||||
|
||||
# === 2. Extrair metadata do audio ===
|
||||
|
||||
DURATION_RAW=$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$AUDIO_FILE")
|
||||
DUR_SEC_TOTAL=${DURATION_RAW%.*}
|
||||
DUR_MIN=$((DUR_SEC_TOTAL / 60))
|
||||
DUR_SEC=$((DUR_SEC_TOTAL % 60))
|
||||
DURATION_FMT="$(printf '%d:%02d' "$DUR_MIN" "$DUR_SEC")"
|
||||
FILESIZE_H="$(du -h "$AUDIO_FILE" | cut -f1)"
|
||||
FILESIZE_RAW="$(stat -c%s "$AUDIO_FILE")"
|
||||
AUDIO_BASENAME="$(basename "$AUDIO_FILE")"
|
||||
|
||||
# Extrair titulo do nome do ficheiro do guiao
|
||||
TITLE=""
|
||||
if [[ -n "$GUIDE_FILE" ]]; then
|
||||
TITLE="$(basename "$GUIDE_FILE" .txt | sed 's/^Episodio_[0-9]*_//' | tr '_' ' ')"
|
||||
fi
|
||||
# Fallback do audio
|
||||
[[ -z "$TITLE" ]] && TITLE="$(basename "$AUDIO_FILE" .mp3 | sed 's/^ep_[0-9]*_//' | tr '-' ' ')"
|
||||
|
||||
# Upload paths
|
||||
YEAR="$(date -d "$SCHED_DATE" '+%Y')"
|
||||
MONTH="$(date -d "$SCHED_DATE" '+%m')"
|
||||
AUDIO_REMOTE_DIR="${WP_PATH}/wp-content/uploads/podcast/${YEAR}/${MONTH}"
|
||||
AUDIO_URL="${SITE_URL}/wp-content/uploads/podcast/${YEAR}/${MONTH}/${AUDIO_BASENAME}"
|
||||
|
||||
log_info "EP${EP_PAD}: '${TITLE}' | ${DURATION_FMT} | ${FILESIZE_H} | ${SCHED_DATE} 07:00"
|
||||
|
||||
if $DRY_RUN; then
|
||||
log_info "[DRY-RUN] Audio: ${AUDIO_FILE}"
|
||||
log_info "[DRY-RUN] Capa: ${COVER_FILE}"
|
||||
log_info "[DRY-RUN] URL audio: ${AUDIO_URL}"
|
||||
log_info "[DRY-RUN] Titulo: ${TITLE}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# === 3. Enviar MP3 para o servidor ===
|
||||
|
||||
log_info "EP${EP_PAD}: Enviar MP3..."
|
||||
ssh_cmd "mkdir -p '${AUDIO_REMOTE_DIR}'"
|
||||
scp_file "$AUDIO_FILE" "${AUDIO_REMOTE_DIR}/"
|
||||
ssh_cmd "chown ${WP_OWNER} '${AUDIO_REMOTE_DIR}/${AUDIO_BASENAME}'"
|
||||
log_info "EP${EP_PAD}: MP3 enviado -> ${AUDIO_REMOTE_DIR}/${AUDIO_BASENAME}"
|
||||
|
||||
# === 4. Enviar capa e importar no WP ===
|
||||
|
||||
log_info "EP${EP_PAD}: Enviar capa..."
|
||||
COVER_BASENAME="$(basename "$COVER_FILE")"
|
||||
ssh_cmd "mkdir -p /tmp/podcast-upload"
|
||||
scp_file "$COVER_FILE" "/tmp/podcast-upload/${COVER_BASENAME}"
|
||||
ssh_cmd "chown ${WP_OWNER} '/tmp/podcast-upload/${COVER_BASENAME}'"
|
||||
|
||||
ATTACH_ID=$(ssh_cmd "cd '${WP_PATH}' && wp media import '/tmp/podcast-upload/${COVER_BASENAME}' --title='${COVER_BASENAME%.png}' --porcelain --allow-root 2>/dev/null")
|
||||
ssh_cmd "rm -f '/tmp/podcast-upload/${COVER_BASENAME}'"
|
||||
|
||||
if [[ -z "$ATTACH_ID" ]]; then
|
||||
log_error "EP${EP_PAD}: Falha ao importar capa no WP"
|
||||
exit 1
|
||||
fi
|
||||
log_info "EP${EP_PAD}: Capa importada (attach_id: ${ATTACH_ID})"
|
||||
|
||||
# === 5. Criar post podcast agendado ===
|
||||
|
||||
log_info "EP${EP_PAD}: Criar post..."
|
||||
POST_ID=$(ssh_cmd "cd '${WP_PATH}' && wp post create \
|
||||
--post_type=podcast \
|
||||
--post_title='$(echo "$TITLE" | sed "s/'/'\\\\''/g")' \
|
||||
--post_status=future \
|
||||
--post_date='${SCHED_DATE} 07:00:00' \
|
||||
--porcelain \
|
||||
--allow-root 2>/dev/null")
|
||||
|
||||
if [[ -z "$POST_ID" ]]; then
|
||||
log_error "EP${EP_PAD}: Falha ao criar post"
|
||||
exit 1
|
||||
fi
|
||||
log_info "EP${EP_PAD}: Post criado (ID: ${POST_ID})"
|
||||
|
||||
# === 6. Associar serie e featured image ===
|
||||
|
||||
ssh_cmd "cd '${WP_PATH}' && \
|
||||
wp post term set ${POST_ID} series '${SERIES_SLUG}' --allow-root 2>/dev/null && \
|
||||
wp post meta update ${POST_ID} _thumbnail_id ${ATTACH_ID} --allow-root 2>/dev/null"
|
||||
|
||||
# === 7. Metas SSP (Seriously Simple Podcasting) ===
|
||||
|
||||
ssh_cmd "cd '${WP_PATH}' && \
|
||||
wp post meta update ${POST_ID} episode_type audio --allow-root 2>/dev/null && \
|
||||
wp post meta update ${POST_ID} audio_file '${AUDIO_URL}' --allow-root 2>/dev/null && \
|
||||
wp post meta update ${POST_ID} duration '${DURATION_FMT}' --allow-root 2>/dev/null && \
|
||||
wp post meta update ${POST_ID} filesize '${FILESIZE_H}' --allow-root 2>/dev/null && \
|
||||
wp post meta update ${POST_ID} filesize_raw '${FILESIZE_RAW}' --allow-root 2>/dev/null && \
|
||||
wp post meta update ${POST_ID} date_recorded '${SCHED_DATE} 07:00:00' --allow-root 2>/dev/null"
|
||||
|
||||
# === 8. Aplicar conteudo WP + Rank Math + tags (se _wp.json existir) ===
|
||||
|
||||
WP_JSON=""
|
||||
for f in "${PROJECT_ROOT}/Episodios/Episodio_${EP_PAD}_"*_wp.json; do
|
||||
[[ -f "$f" ]] && WP_JSON="$f" && break
|
||||
done
|
||||
|
||||
if [[ -n "$WP_JSON" ]]; then
|
||||
# Auto-reparar JSON se necessário
|
||||
if ! python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
|
||||
log_warn "EP${EP_PAD}: JSON inválido — a tentar reparação automática..."
|
||||
python3 - "$WP_JSON" << 'PYFIX'
|
||||
import sys, re, json
|
||||
filepath = sys.argv[1]
|
||||
with open(filepath, 'r') as f:
|
||||
raw = f.read()
|
||||
marker = '"content_html": "'
|
||||
start = raw.find(marker)
|
||||
if start == -1: sys.exit(1)
|
||||
content_start = start + len(marker)
|
||||
end_pattern = re.search(r'",\s*\n\s*"hashtags"', raw[content_start:]) or re.search(r'",\s*\n\s*"wp_tags"', raw[content_start:])
|
||||
if not end_pattern: sys.exit(1)
|
||||
content_end = content_start + end_pattern.start()
|
||||
fixed = raw[:content_start] + re.sub(r'(?<!\\)"', '\\"', raw[content_start:content_end]) + raw[content_end:]
|
||||
json.loads(fixed)
|
||||
with open(filepath, 'w') as f: f.write(fixed)
|
||||
PYFIX
|
||||
log_info "EP${EP_PAD}: JSON reparado"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -n "$WP_JSON" ]] && python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
|
||||
log_info "EP${EP_PAD}: Aplicar conteudo WP de $(basename "$WP_JSON")"
|
||||
|
||||
WP_CONTENT="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('content_html',''))" 2>/dev/null)"
|
||||
WP_META="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('meta_description',''))" 2>/dev/null)"
|
||||
WP_KEYWORD="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('keyword',''))" 2>/dev/null)"
|
||||
WP_TAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(','.join(d.get('wp_tags',[])))" 2>/dev/null)"
|
||||
WP_HASHTAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(' '.join(d.get('hashtags',[])))" 2>/dev/null)"
|
||||
|
||||
# Excerpt = primeira linha do meta + hashtags
|
||||
WP_EXCERPT=""
|
||||
if [[ -n "$WP_META" && -n "$WP_HASHTAGS" ]]; then
|
||||
WP_EXCERPT="${WP_META}
|
||||
|
||||
${WP_HASHTAGS}"
|
||||
fi
|
||||
|
||||
# Aplicar conteudo HTML
|
||||
if [[ -n "$WP_CONTENT" ]]; then
|
||||
ESCAPED_CONTENT="$(echo "$WP_CONTENT" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_content='${ESCAPED_CONTENT}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: post_content aplicado"
|
||||
fi
|
||||
|
||||
# Aplicar excerpt
|
||||
if [[ -n "$WP_EXCERPT" ]]; then
|
||||
ESCAPED_EXCERPT="$(echo "$WP_EXCERPT" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_excerpt='${ESCAPED_EXCERPT}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: post_excerpt aplicado"
|
||||
fi
|
||||
|
||||
# Aplicar tags
|
||||
if [[ -n "$WP_TAGS" ]]; then
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post term set ${POST_ID} post_tag ${WP_TAGS} --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: tags aplicadas"
|
||||
fi
|
||||
|
||||
# Rank Math: meta description + focus keyword
|
||||
if [[ -n "$WP_META" ]]; then
|
||||
ESCAPED_META="$(echo "$WP_META" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_description '${ESCAPED_META}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: rank_math_description aplicado"
|
||||
fi
|
||||
|
||||
if [[ -n "$WP_KEYWORD" ]]; then
|
||||
ESCAPED_KW="$(echo "$WP_KEYWORD" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_focus_keyword '${ESCAPED_KW}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: rank_math_focus_keyword aplicado"
|
||||
fi
|
||||
|
||||
# Rank Math: SEO title (preferir seo_title do JSON, fallback para title + sufixo)
|
||||
WP_SEO_TITLE="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('seo_title',''))" 2>/dev/null)"
|
||||
if [[ -z "$WP_SEO_TITLE" ]]; then
|
||||
WP_TITLE_FALLBACK="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('title',''))" 2>/dev/null)"
|
||||
[[ -n "$WP_TITLE_FALLBACK" ]] && WP_SEO_TITLE="${WP_TITLE_FALLBACK} | Podcast Descomplicar Digital"
|
||||
fi
|
||||
if [[ -n "$WP_SEO_TITLE" ]]; then
|
||||
ESCAPED_SEO_TITLE="$(echo "$WP_SEO_TITLE" | sed "s/'/'\\\\''/g")"
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_title '${ESCAPED_SEO_TITLE}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: rank_math_title aplicado"
|
||||
fi
|
||||
|
||||
# Slug optimizado (preferir slug do JSON)
|
||||
WP_SLUG="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('slug',''))" 2>/dev/null)"
|
||||
if [[ -n "$WP_SLUG" ]]; then
|
||||
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_name='${WP_SLUG}' --allow-root 2>/dev/null"
|
||||
log_info "EP${EP_PAD}: slug actualizado para ${WP_SLUG}"
|
||||
fi
|
||||
else
|
||||
log_info "EP${EP_PAD}: PENDENTE — WP JSON nao encontrado, gerar via generate-content.sh"
|
||||
fi
|
||||
|
||||
# === 9. Corrigir permissoes uploads ===
|
||||
|
||||
ssh_cmd "chown -R ${WP_OWNER} '${AUDIO_REMOTE_DIR}/' '${WP_PATH}/wp-content/uploads/${YEAR}/${MONTH}/' 2>/dev/null" || true
|
||||
|
||||
log_info "EP${EP_PAD}: Agendado para ${SCHED_DATE} 07:00 (post ${POST_ID})"
|
||||
|
||||
# Actualizar pipeline-state.json
|
||||
EP_TITLE="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('title',''))" 2>/dev/null || echo "")"
|
||||
AUDIO_BASENAME="$(basename "$AUDIO_FILE")"
|
||||
jq --argjson n "$EP_NUM" --arg t "$EP_TITLE" --arg a "Episodios/Audios/final/${AUDIO_BASENAME}" --arg s "$SCHED_DATE" \
|
||||
'if [.episodes[] | select(.num == $n)] | length > 0
|
||||
then (.episodes[] | select(.num == $n)) |= . + {status: "ready", title: $t, audio: $a, scheduled: $s}
|
||||
else .episodes += [{num: ($n | tonumber), title: $t, audio: $a, scheduled: $s, status: "ready"}]
|
||||
end | .last_updated = (now | todate)' \
|
||||
"${STATE_FILE}" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "${STATE_FILE}"
|
||||
log_info "EP${EP_PAD}: pipeline-state.json actualizado"
|
||||
|
||||
echo "${POST_ID}"
|
||||
Executable
+82
@@ -0,0 +1,82 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <input_file> <output_dir>"
|
||||
echo " Splits a podcast script into blocks for TTS generation."
|
||||
echo " Splits at the FAQ transition marker or at word limit."
|
||||
echo " Creates part_1.txt, part_2.txt, etc. in output_dir."
|
||||
exit 1
|
||||
}
|
||||
|
||||
[[ $# -lt 2 ]] && usage
|
||||
INPUT_FILE="$1"
|
||||
OUTPUT_DIR="$2"
|
||||
|
||||
if [[ ! -f "$INPUT_FILE" ]]; then
|
||||
log_error "Input file not found: ${INPUT_FILE}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
SETTINGS_FILE="${PROJECT_ROOT}/config/audio-settings.json"
|
||||
MAX_WORDS="$(jq -r '.tts_max_words_per_block' "$SETTINGS_FILE")"
|
||||
SPLIT_MARKER="$(jq -r '.tts_split_marker' "$SETTINGS_FILE")"
|
||||
|
||||
TOTAL_WORDS="$(wc -w < "$INPUT_FILE")"
|
||||
log_info "Total words: ${TOTAL_WORDS}, max per block: ${MAX_WORDS}"
|
||||
|
||||
if [[ "$TOTAL_WORDS" -le "$MAX_WORDS" ]]; then
|
||||
# No split needed
|
||||
cp "$INPUT_FILE" "${OUTPUT_DIR}/part_1.txt"
|
||||
log_info "No split needed (${TOTAL_WORDS} words). Created part_1.txt"
|
||||
echo "1"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Try to split at the FAQ marker
|
||||
MARKER_LINE="$(grep -nE "$SPLIT_MARKER" "$INPUT_FILE" | head -1 | cut -d: -f1)"
|
||||
|
||||
if [[ -n "$MARKER_LINE" && "$MARKER_LINE" -gt 1 ]]; then
|
||||
# Split at the marker line (FAQ section starts here)
|
||||
head -n "$((MARKER_LINE - 1))" "$INPUT_FILE" > "${OUTPUT_DIR}/part_1.txt"
|
||||
tail -n "+${MARKER_LINE}" "$INPUT_FILE" > "${OUTPUT_DIR}/part_2.txt"
|
||||
|
||||
WORDS_1="$(wc -w < "${OUTPUT_DIR}/part_1.txt")"
|
||||
WORDS_2="$(wc -w < "${OUTPUT_DIR}/part_2.txt")"
|
||||
log_info "Split at FAQ marker (line ${MARKER_LINE}): part_1=${WORDS_1} words, part_2=${WORDS_2} words"
|
||||
echo "2"
|
||||
else
|
||||
# No marker found — split at approximate midpoint by paragraph
|
||||
TOTAL_LINES="$(wc -l < "$INPUT_FILE")"
|
||||
MID_LINE=$((TOTAL_LINES / 2))
|
||||
|
||||
# Find nearest empty line (paragraph break) near midpoint
|
||||
SPLIT_LINE=""
|
||||
for offset in 0 1 -1 2 -2 3 -3 5 -5 10 -10; do
|
||||
CHECK=$((MID_LINE + offset))
|
||||
if [[ "$CHECK" -gt 0 && "$CHECK" -lt "$TOTAL_LINES" ]]; then
|
||||
LINE_CONTENT="$(sed -n "${CHECK}p" "$INPUT_FILE")"
|
||||
if [[ -z "$LINE_CONTENT" || "$LINE_CONTENT" =~ ^[[:space:]]*$ ]]; then
|
||||
SPLIT_LINE="$CHECK"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ -z "$SPLIT_LINE" ]]; then
|
||||
SPLIT_LINE="$MID_LINE"
|
||||
fi
|
||||
|
||||
head -n "$SPLIT_LINE" "$INPUT_FILE" > "${OUTPUT_DIR}/part_1.txt"
|
||||
tail -n "+$((SPLIT_LINE + 1))" "$INPUT_FILE" > "${OUTPUT_DIR}/part_2.txt"
|
||||
|
||||
WORDS_1="$(wc -w < "${OUTPUT_DIR}/part_1.txt")"
|
||||
WORDS_2="$(wc -w < "${OUTPUT_DIR}/part_2.txt")"
|
||||
log_info "Split at paragraph break (line ${SPLIT_LINE}): part_1=${WORDS_1} words, part_2=${WORDS_2} words"
|
||||
echo "2"
|
||||
fi
|
||||
Executable
+115
@@ -0,0 +1,115 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# transfer-to-server.sh — Transfere ficheiros do desktop para o CWP server via SCP
|
||||
# Utiliza a chave SSH em ~/.ssh/id_ed25519, porta 9443
|
||||
# Permissoes finais: ealmeida:ealmeida
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "${SCRIPT_DIR}/lib.sh"
|
||||
|
||||
SSH_KEY="${HOME}/.ssh/id_ed25519"
|
||||
SSH_PORT=9443
|
||||
SSH_HOST="server.descomplicar.pt"
|
||||
SSH_USER="root"
|
||||
SSH_OPTS="-o IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
|
||||
WP_PATH="/home/ealmeida/public_html"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <type> <local_file> [remote_subdir]"
|
||||
echo ""
|
||||
echo "Types:"
|
||||
echo " cover <file.png> -> wp-content/uploads/YYYY/MM/"
|
||||
echo " audio <file.mp3> [YYYY/MM] -> wp-content/uploads/podcast/YYYY/MM/"
|
||||
echo " batch <dir_of_files> <type> -> envia todos os ficheiros do directorio"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 cover /path/to/ep001.png"
|
||||
echo " $0 audio /path/to/ep020.mp3 2026/04"
|
||||
echo " $0 batch /path/to/capas/ cover"
|
||||
exit 1
|
||||
}
|
||||
|
||||
scp_file() {
|
||||
local src="$1" dst="$2"
|
||||
SSH_AUTH_SOCK= scp -P "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "$src" "${SSH_USER}@${SSH_HOST}:${dst}"
|
||||
}
|
||||
|
||||
ssh_cmd() {
|
||||
SSH_AUTH_SOCK= ssh -p "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "${SSH_USER}@${SSH_HOST}" "$@"
|
||||
}
|
||||
|
||||
[[ $# -lt 2 ]] && usage
|
||||
|
||||
TYPE="$1"
|
||||
shift
|
||||
|
||||
case "$TYPE" in
|
||||
cover)
|
||||
LOCAL_FILE="$1"
|
||||
[[ ! -f "$LOCAL_FILE" ]] && log_error "Ficheiro nao encontrado: $LOCAL_FILE" && exit 1
|
||||
|
||||
YEAR="$(date '+%Y')"
|
||||
MONTH="$(date '+%m')"
|
||||
REMOTE_DIR="${WP_PATH}/wp-content/uploads/${YEAR}/${MONTH}"
|
||||
|
||||
ssh_cmd "mkdir -p '${REMOTE_DIR}'"
|
||||
scp_file "$LOCAL_FILE" "${REMOTE_DIR}/"
|
||||
BASENAME="$(basename "$LOCAL_FILE")"
|
||||
ssh_cmd "chown ealmeida:ealmeida '${REMOTE_DIR}/${BASENAME}'"
|
||||
|
||||
log_info "Cover enviada: ${BASENAME} -> ${REMOTE_DIR}/"
|
||||
;;
|
||||
|
||||
audio)
|
||||
LOCAL_FILE="$1"
|
||||
[[ ! -f "$LOCAL_FILE" ]] && log_error "Ficheiro nao encontrado: $LOCAL_FILE" && exit 1
|
||||
|
||||
if [[ $# -ge 2 ]]; then
|
||||
SUBDIR="$2"
|
||||
else
|
||||
YEAR="$(date '+%Y')"
|
||||
MONTH="$(date '+%m')"
|
||||
SUBDIR="${YEAR}/${MONTH}"
|
||||
fi
|
||||
REMOTE_DIR="${WP_PATH}/wp-content/uploads/podcast/${SUBDIR}"
|
||||
|
||||
ssh_cmd "mkdir -p '${REMOTE_DIR}'"
|
||||
scp_file "$LOCAL_FILE" "${REMOTE_DIR}/"
|
||||
BASENAME="$(basename "$LOCAL_FILE")"
|
||||
ssh_cmd "chown ealmeida:ealmeida '${REMOTE_DIR}/${BASENAME}'"
|
||||
|
||||
log_info "Audio enviado: ${BASENAME} -> ${REMOTE_DIR}/"
|
||||
;;
|
||||
|
||||
batch)
|
||||
LOCAL_DIR="$1"
|
||||
BATCH_TYPE="${2:-cover}"
|
||||
[[ ! -d "$LOCAL_DIR" ]] && log_error "Directorio nao encontrado: $LOCAL_DIR" && exit 1
|
||||
|
||||
YEAR="$(date '+%Y')"
|
||||
MONTH="$(date '+%m')"
|
||||
|
||||
case "$BATCH_TYPE" in
|
||||
cover) REMOTE_DIR="${WP_PATH}/wp-content/uploads/${YEAR}/${MONTH}" ;;
|
||||
audio) REMOTE_DIR="${WP_PATH}/wp-content/uploads/podcast/${YEAR}/${MONTH}" ;;
|
||||
*) log_error "Tipo batch invalido: $BATCH_TYPE" && exit 1 ;;
|
||||
esac
|
||||
|
||||
ssh_cmd "mkdir -p '${REMOTE_DIR}'"
|
||||
|
||||
COUNT=0
|
||||
for f in "${LOCAL_DIR}"/*.{png,jpg,mp3,wav} ; do
|
||||
[[ ! -f "$f" ]] && continue
|
||||
scp_file "$f" "${REMOTE_DIR}/"
|
||||
COUNT=$((COUNT + 1))
|
||||
done
|
||||
|
||||
ssh_cmd "chown -R ealmeida:ealmeida '${REMOTE_DIR}/'"
|
||||
log_info "Batch ${BATCH_TYPE}: ${COUNT} ficheiros enviados para ${REMOTE_DIR}/"
|
||||
;;
|
||||
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
Executable
+62
@@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate TTS for a single text file via Gemini API."""
|
||||
import sys, wave, os
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 3:
|
||||
print("Usage: tts-single-part.py <input.txt> <output.wav>")
|
||||
sys.exit(1)
|
||||
|
||||
input_file = sys.argv[1]
|
||||
output_file = sys.argv[2]
|
||||
|
||||
api_key = os.environ.get("GEMINI_API_KEY")
|
||||
if not api_key:
|
||||
print("ERROR: GEMINI_API_KEY not set")
|
||||
sys.exit(1)
|
||||
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
|
||||
style = (
|
||||
"Lê este texto em português de Portugal (PT-PT), com um tom enérgico, "
|
||||
"confiante, educativo, inspirador e profissional. Mantém o ritmo natural "
|
||||
"e envolvente, como se estivesses a conversar diretamente com o ouvinte, "
|
||||
"transmitindo proximidade e autoridade. Faz pequenas pausas para dar ênfase "
|
||||
"às ideias-chave e assegura que cada transição entre temas é fluida. Evita "
|
||||
"soar robótico ou demasiado formal; o objetivo é informar, motivar e criar "
|
||||
"ligação com quem está a ouvir."
|
||||
)
|
||||
|
||||
with open(input_file, "r") as f:
|
||||
text = f.read()
|
||||
|
||||
words = len(text.split())
|
||||
print(f"Generating TTS for {words} words...")
|
||||
|
||||
client = genai.Client(api_key=api_key)
|
||||
response = client.models.generate_content(
|
||||
model="gemini-2.5-pro-preview-tts",
|
||||
contents=style + "\n\n" + text,
|
||||
config=types.GenerateContentConfig(
|
||||
response_modalities=["AUDIO"],
|
||||
speech_config=types.SpeechConfig(
|
||||
voice_config=types.VoiceConfig(
|
||||
prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Sulafat")
|
||||
)
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
data = response.candidates[0].content.parts[0].inline_data.data
|
||||
with wave.open(output_file, "wb") as wf:
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2)
|
||||
wf.setframerate(24000)
|
||||
wf.writeframes(data)
|
||||
|
||||
duration = (len(data) // 2) / 24000
|
||||
print(f"OK: {duration:.0f}s -> {output_file}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user