Files
ealmeida 6035542b67 feat: scripts de projectos vindos do Hub (podcast, alojadamaria, clip, ocr, etc.)
Movidos do vault Hub para centralizar scripts. Hub mantem symlinks.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-28 20:53:29 +01:00

225 lines
8.7 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
# apply-wp-content.sh — Aplica conteúdo _wp.json a posts já agendados no WordPress
#
# Uso: ./scripts/apply-wp-content.sh <ep_num> [post_id]
# Se post_id não for fornecido, pesquisa por data na pipeline-state.json
#
# Útil para: episódios que foram agendados antes do _wp.json existir
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/lib.sh"
# SSH config
SSH_KEY="${HOME}/.ssh/id_ed25519"
SSH_PORT=9443
SSH_HOST="server.descomplicar.pt"
SSH_USER="root"
SSH_OPTS="-o IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
WP_PATH="/home/ealmeida/public_html"
ssh_cmd() {
SSH_AUTH_SOCK= ssh -p "${SSH_PORT}" -i "${SSH_KEY}" ${SSH_OPTS} "${SSH_USER}@${SSH_HOST}" "$@"
}
usage() {
echo "Usage: $0 <ep_num> [post_id]"
exit 1
}
[[ $# -lt 1 ]] && usage
EP_NUM="$1"
MANUAL_POST_ID="${2:-}"
EP_PAD="$(pad_number "$EP_NUM")"
# Encontrar _wp.json (find para lidar com nomes com acentos)
WP_JSON=""
while IFS= read -r f; do
[[ -f "$f" ]] && WP_JSON="$f" && break
done < <(find "${PROJECT_ROOT}" -maxdepth 3 -name "Episodio_${EP_PAD}_*_wp.json" 2>/dev/null)
if [[ -z "$WP_JSON" ]]; then
log_error "EP${EP_PAD}: _wp.json não encontrado"
exit 1
fi
# Validar e auto-reparar JSON se necessário
if ! python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
log_warn "EP${EP_PAD}: JSON inválido — a tentar reparação automática..."
python3 - "$WP_JSON" << 'PYFIX'
import sys, re, json
filepath = sys.argv[1]
with open(filepath, 'r') as f:
raw = f.read()
marker = '"content_html": "'
start = raw.find(marker)
if start == -1: sys.exit(1)
content_start = start + len(marker)
end_pattern = re.search(r'",\s*\n\s*"hashtags"', raw[content_start:]) or re.search(r'",\s*\n\s*"wp_tags"', raw[content_start:])
if not end_pattern: sys.exit(1)
content_end = content_start + end_pattern.start()
fixed = raw[:content_start] + re.sub(r'(?<!\\)"', '\\"', raw[content_start:content_end]) + raw[content_end:]
json.loads(fixed)
with open(filepath, 'w') as f: f.write(fixed)
print("reparado")
PYFIX
if ! python3 -c "import json; json.load(open('$WP_JSON'))" 2>/dev/null; then
log_error "EP${EP_PAD}: JSON inválido após reparação"
exit 1
fi
log_info "EP${EP_PAD}: JSON reparado automaticamente"
fi
log_info "EP${EP_PAD}: Usando $(basename "$WP_JSON")"
# Encontrar post_id
POST_ID="$MANUAL_POST_ID"
if [[ -z "$POST_ID" ]]; then
# Tentar obter por data agendada da pipeline-state
SCHED_DATE="$(jq -r --argjson n "$EP_NUM" '.episodes[] | select(.num == $n) | .scheduled // empty' "$STATE_FILE")"
if [[ -z "$SCHED_DATE" ]]; then
log_error "EP${EP_PAD}: Não encontrado em pipeline-state.json, especifique post_id manualmente"
exit 1
fi
log_info "EP${EP_PAD}: A pesquisar post por data ${SCHED_DATE}..."
POST_ID="$(ssh_cmd "cd '${WP_PATH}' && wp db query \
\"SELECT ID FROM wpah_posts WHERE post_type='podcast' AND post_status='future' AND DATE(post_date)='${SCHED_DATE}' LIMIT 1\" \
--skip-column-names \
--allow-root 2>/dev/null")"
fi
if [[ -z "$POST_ID" ]]; then
log_error "EP${EP_PAD}: Post não encontrado no WordPress"
exit 1
fi
log_info "EP${EP_PAD}: Post ID = ${POST_ID}"
# Extrair campos do JSON via Python (mais robusto que jq para HTML com acentos)
WP_CONTENT="$(python3 -c "import json,sys; d=json.load(open('$WP_JSON')); print(d.get('content_html',''))" 2>/dev/null)"
WP_META="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('meta_description',''))" 2>/dev/null)"
WP_KEYWORD="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('keyword',''))" 2>/dev/null)"
WP_TAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(','.join(d.get('wp_tags',[])))" 2>/dev/null)"
WP_HASHTAGS="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(' '.join(d.get('hashtags',[])))" 2>/dev/null)"
WP_SEO_TITLE="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('seo_title',''))" 2>/dev/null)"
WP_SLUG="$(python3 -c "import json; d=json.load(open('$WP_JSON')); print(d.get('slug',''))" 2>/dev/null)"
# Auto-fix RankMath: slug sem keyword → reconstruir; título sem número → adicionar ano
_AUTOFIX="$(python3 - "$WP_JSON" "$WP_SLUG" "$WP_SEO_TITLE" <<'PYFIX'
import json, sys, re, unicodedata, time
def slugify(text):
text = unicodedata.normalize("NFD", text.lower())
text = "".join(c for c in text if unicodedata.category(c) != "Mn")
text = re.sub(r'[^a-z0-9\s-]', '', text)
return re.sub(r'[-\s]+', '-', text.strip()).rstrip('-')
STOPWORDS = {"a","o","as","os","de","da","do","das","dos","e","em","no","na","nos","nas","para","por","pelo","pela"}
def strip_sw(s):
return " ".join(t for t in slugify(s).replace("-", " ").split() if t not in STOPWORDS)
d = json.load(open(sys.argv[1]))
slug = sys.argv[2]
seo_title = sys.argv[3]
kw = d.get('keyword', '')
# Fix 1: slug sem keyword
slug_fixed = slug
if kw and slug and strip_sw(kw) not in strip_sw(slug):
fk_slug = slugify(kw)
orig_words = [w for w in slug.split('-') if w not in fk_slug.split('-') and len(w) > 3][:2]
slug_fixed = (fk_slug + ('-' + '-'.join(orig_words) if orig_words else ''))[:75].rstrip('-')
# Fix 2: seo_title sem número → adicionar ano
seo_fixed = seo_title
if seo_title and not re.search(r'\d', seo_title):
year = time.strftime('%Y')
# Inserir ano antes do separador "|" se existir, senão no fim do título
# Não truncar o texto principal — o ano vai imediatamente antes do "|"
if ' | ' in seo_title:
parts = seo_title.split(' | ', 1)
seo_fixed = f"{parts[0].rstrip()} {year} | {parts[1]}"
else:
t = seo_title.rstrip()
seo_fixed = (t[:55].rsplit(' ', 1)[0] + f' {year}') if len(t) > 55 else f'{t} {year}'
print(slug_fixed)
print(seo_fixed)
PYFIX
)"
# Aplicar valores corrigidos
WP_SLUG_NEW="$(echo "$_AUTOFIX" | sed -n '1p')"
WP_SEO_TITLE_NEW="$(echo "$_AUTOFIX" | sed -n '2p')"
if [[ -n "$WP_SLUG_NEW" && "$WP_SLUG_NEW" != "$WP_SLUG" ]]; then
log_info "EP${EP_PAD}: slug auto-corrigido: '${WP_SLUG}' → '${WP_SLUG_NEW}'"
WP_SLUG="$WP_SLUG_NEW"
fi
if [[ -n "$WP_SEO_TITLE_NEW" && "$WP_SEO_TITLE_NEW" != "$WP_SEO_TITLE" ]]; then
log_info "EP${EP_PAD}: seo_title auto-corrigido: ano adicionado → '${WP_SEO_TITLE_NEW}'"
WP_SEO_TITLE="$WP_SEO_TITLE_NEW"
fi
# Excerpt = meta + hashtags
WP_EXCERPT=""
if [[ -n "$WP_META" && -n "$WP_HASHTAGS" ]]; then
WP_EXCERPT="${WP_META}
${WP_HASHTAGS}"
fi
# Aplicar post_content
if [[ -n "$WP_CONTENT" ]]; then
ESCAPED_CONTENT="$(printf '%s' "$WP_CONTENT" | python3 -c "import sys; data=sys.stdin.read(); print(data.replace(\"'\", \"'\\\\''\" ))" 2>/dev/null || echo "$WP_CONTENT" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_content='${ESCAPED_CONTENT}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: post_content aplicado"
fi
# Aplicar excerpt
if [[ -n "$WP_EXCERPT" ]]; then
ESCAPED_EXCERPT="$(printf '%s' "$WP_EXCERPT" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_excerpt='${ESCAPED_EXCERPT}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: post_excerpt aplicado"
fi
# Aplicar slug
if [[ -n "$WP_SLUG" ]]; then
ssh_cmd "cd '${WP_PATH}' && wp post update ${POST_ID} --post_name='${WP_SLUG}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: slug actualizado -> ${WP_SLUG}"
fi
# Aplicar tags
if [[ -n "$WP_TAGS" ]]; then
ssh_cmd "cd '${WP_PATH}' && wp post term set ${POST_ID} post_tag ${WP_TAGS} --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: tags aplicadas"
fi
# Rank Math: description
if [[ -n "$WP_META" ]]; then
ESCAPED_META="$(echo "$WP_META" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_description '${ESCAPED_META}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: rank_math_description aplicado"
fi
# Rank Math: focus keyword
if [[ -n "$WP_KEYWORD" ]]; then
ESCAPED_KW="$(echo "$WP_KEYWORD" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_focus_keyword '${ESCAPED_KW}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: rank_math_focus_keyword aplicado"
fi
# Rank Math: SEO title
if [[ -z "$WP_SEO_TITLE" ]]; then
WP_TITLE_FALLBACK="$(jq -r '.title // empty' "$WP_JSON")"
[[ -n "$WP_TITLE_FALLBACK" ]] && WP_SEO_TITLE="${WP_TITLE_FALLBACK} | Podcast Descomplicar Digital"
fi
if [[ -n "$WP_SEO_TITLE" ]]; then
ESCAPED_SEO_TITLE="$(echo "$WP_SEO_TITLE" | sed "s/'/'\\\\''/g")"
ssh_cmd "cd '${WP_PATH}' && wp post meta update ${POST_ID} rank_math_title '${ESCAPED_SEO_TITLE}' --allow-root 2>/dev/null"
log_info "EP${EP_PAD}: rank_math_title aplicado"
fi
log_info "EP${EP_PAD}: Conteúdo WP aplicado com sucesso (post ${POST_ID})"
echo "${POST_ID}"