feat(v1.1.0): validação de base de dados SQLite

- Verificação de integridade ao conectar (PRAGMA integrity_check)
- Validação de esquema completo (4 tabelas, todas as colunas)
- Migração automática de colunas em falta
- Tabela translation_backups para guardar originais antes de traduzir
- CLI --verify-db e --restore-backup
- WAL mode para melhor concorrência

Tarefa #419, Discussão #33, Projecto #65
This commit is contained in:
2026-03-12 14:43:09 +00:00
parent 6ac6f4be2a
commit 44e2250eb7
2 changed files with 266 additions and 5 deletions

View File

@@ -1,5 +1,28 @@
# Changelog — wp-translate-ptpt.py
## [1.1.0] - 2026-03-12
### Added
- Validação de base de dados: verificação de integridade SQLite (PRAGMA integrity_check) ao conectar
- Validação de esquema: verifica que todas as tabelas e colunas esperadas existem
- Migração automática: adiciona colunas em falta sem perder dados existentes
- Tabela `translation_backups`: guarda string original antes de cada tradução/correcção
- Método `verify_health()`: diagnóstico completo da BD (integridade, tabelas, contagens)
- CLI `--verify-db`: verifica saúde da BD e reporta estado de cada tabela
- CLI `--restore-backup HASH`: consulta backups de traduções por hash msgid
- Método `backup_translation()`: regista original antes de sobrescrever
- WAL mode activado para melhor concorrência em SQLite
- Tratamento de erros de BD no arranque do TranslationProcessor com mensagem de sugestão
### Changed
- CacheManager agora valida esquema completo após _init_db (4 tabelas, todas as colunas)
- TranslationProcessor verifica saúde da BD antes de iniciar processamento
- _process_entry guarda backup da string original antes de aplicar correcções de marcas e PT-BR
- Criação do processor envolvida em try/except com mensagem orientativa
### Fixed
- BDs existentes de v1.0.0 são migradas automaticamente (nova tabela translation_backups adicionada)
## [1.0.0] - 2026-02-23
### Added

View File

@@ -5,7 +5,7 @@ Sistema eficiente de traduções WordPress PT-PT.
Author: Descomplicar® Crescimento Digital
Date: 2026-02-23
Version: 1.0.0
Version: 1.1.0
"""
import os
@@ -25,7 +25,7 @@ from urllib.request import Request, urlopen
from urllib.error import URLError
# Version
__version__ = "1.0.0"
__version__ = "1.1.0"
# =============================================================================
@@ -199,10 +199,44 @@ SEED_BRANDS = [
class CacheManager:
"""Manages SQLite cache for translations and brands."""
# Esquema esperado: tabela -> lista de colunas obrigatorias
EXPECTED_SCHEMA = {
"brands": ["id", "name", "variations", "auto_detected", "confidence_score",
"last_seen", "plugin_slug"],
"translations": ["msgid_hash", "msgid", "msgstr", "plugin_name", "validated",
"timestamp"],
"corrections": ["id", "original", "corrected", "rule_applied", "plugin_name",
"timestamp"],
"translation_backups": ["id", "msgid_hash", "msgid", "original_msgstr",
"new_msgstr", "plugin_name", "po_file", "timestamp"],
}
def __init__(self, db_path: str):
"""Initialize database connection and create tables."""
self.conn = sqlite3.connect(db_path)
"""Initialize database connection, validate and create tables."""
self.db_path = db_path
self.conn = self._safe_connect(db_path)
self._init_db()
self._validate_schema()
def _safe_connect(self, db_path: str) -> sqlite3.Connection:
"""Establish connection with integrity check."""
try:
conn = sqlite3.connect(db_path)
# Verificar integridade da BD
result = conn.execute("PRAGMA integrity_check").fetchone()
if result[0] != "ok":
raise sqlite3.DatabaseError(
f"Base de dados corrompida ({db_path}): {result[0]}"
)
# Activar WAL para melhor concorrencia
conn.execute("PRAGMA journal_mode=WAL")
return conn
except sqlite3.DatabaseError as e:
if "corrompida" in str(e):
raise
raise sqlite3.DatabaseError(
f"Erro ao conectar a base de dados ({db_path}): {e}"
)
def _init_db(self):
"""Create database schema."""
@@ -243,8 +277,102 @@ class CacheManager:
)
""")
# Backups de traducoes (nova tabela v1.1.0)
self.conn.execute("""
CREATE TABLE IF NOT EXISTS translation_backups (
id INTEGER PRIMARY KEY,
msgid_hash TEXT NOT NULL,
msgid TEXT,
original_msgstr TEXT,
new_msgstr TEXT,
plugin_name TEXT,
po_file TEXT,
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
self.conn.commit()
def _validate_schema(self):
"""Validate that all expected tables and columns exist."""
for table_name, expected_cols in self.EXPECTED_SCHEMA.items():
if not self._table_exists(table_name):
raise sqlite3.DatabaseError(
f"Tabela '{table_name}' nao existe apos init_db. "
f"BD possivelmente corrompida: {self.db_path}"
)
actual_cols = self._get_columns(table_name)
missing = set(expected_cols) - set(actual_cols)
if missing:
# Tentar adicionar colunas em falta (migracao)
for col in missing:
try:
self.conn.execute(
f"ALTER TABLE {table_name} ADD COLUMN {col} TEXT"
)
except sqlite3.OperationalError:
raise sqlite3.DatabaseError(
f"Coluna(s) em falta na tabela '{table_name}': "
f"{', '.join(missing)}. Migracao falhou."
)
self.conn.commit()
def _table_exists(self, table_name: str) -> bool:
"""Check if a table exists in the database."""
cursor = self.conn.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name=?",
(table_name,)
)
return cursor.fetchone() is not None
def _get_columns(self, table_name: str) -> List[str]:
"""Get list of column names for a table."""
cursor = self.conn.execute(f"PRAGMA table_info({table_name})")
return [row[1] for row in cursor.fetchall()]
def verify_health(self) -> Dict[str, any]:
"""Run full health check on the database. Returns status dict."""
report = {"healthy": True, "tables": {}, "integrity": "ok", "errors": []}
# Verificar integridade
try:
result = self.conn.execute("PRAGMA integrity_check").fetchone()
report["integrity"] = result[0]
if result[0] != "ok":
report["healthy"] = False
report["errors"].append(f"Integridade: {result[0]}")
except Exception as e:
report["healthy"] = False
report["errors"].append(f"Erro integrity_check: {e}")
# Verificar tabelas e contagens
for table_name, expected_cols in self.EXPECTED_SCHEMA.items():
table_info = {"exists": False, "rows": 0, "columns_ok": False}
if self._table_exists(table_name):
table_info["exists"] = True
try:
count = self.conn.execute(
f"SELECT COUNT(*) FROM {table_name}"
).fetchone()[0]
table_info["rows"] = count
except Exception as e:
report["errors"].append(f"Erro contagem {table_name}: {e}")
report["healthy"] = False
actual_cols = self._get_columns(table_name)
missing = set(expected_cols) - set(actual_cols)
table_info["columns_ok"] = len(missing) == 0
if missing:
table_info["missing_columns"] = list(missing)
report["healthy"] = False
else:
report["healthy"] = False
report["errors"].append(f"Tabela '{table_name}' nao existe")
report["tables"][table_name] = table_info
return report
def get_cached_translation(self, msgid: str) -> Optional[str]:
"""Retrieve cached translation for msgid."""
msgid_hash = hashlib.md5(msgid.encode()).hexdigest()
@@ -277,6 +405,20 @@ class CacheManager:
)
self.conn.commit()
def backup_translation(self, msgid: str, original_msgstr: str,
new_msgstr: str, plugin_name: str, po_file: str):
"""Backup the original translation before overwriting."""
if not original_msgstr or original_msgstr == new_msgstr:
return
msgid_hash = hashlib.md5(msgid.encode()).hexdigest()
self.conn.execute(
"""INSERT INTO translation_backups
(msgid_hash, msgid, original_msgstr, new_msgstr, plugin_name, po_file, timestamp)
VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)""",
(msgid_hash, msgid, original_msgstr, new_msgstr, plugin_name, po_file)
)
self.conn.commit()
def close(self):
"""Close database connection."""
self.conn.close()
@@ -652,7 +794,19 @@ class TranslationProcessor:
def __init__(self, db_path: str, api_url: str):
"""Initialize processor with all components."""
self.db_path = db_path
self.cache = CacheManager(db_path) if db_path != ":memory:" else None
# Validar saude da BD antes de processar
if self.cache:
health = self.cache.verify_health()
if not health["healthy"]:
errors_str = "; ".join(health["errors"])
raise RuntimeError(
f"Base de dados com problemas: {errors_str}. "
f"Executar --verify-db para diagnostico completo."
)
self.brand_protector = BrandProtector(db_path)
self.translator = TranslationEngine(api_url)
self.po_handler = PoFileHandler()
@@ -736,6 +890,7 @@ class TranslationProcessor:
if mode == "brands-only" or entry.msgstr:
# Process msgstr (singular)
if entry.msgstr:
original_msgstr = entry.msgstr
fixed, corrections = self.brand_protector.fix_translated_brands(
entry.msgid, entry.msgstr
)
@@ -744,6 +899,13 @@ class TranslationProcessor:
fixed, _ = apply_ptbr_fixes(fixed)
entry.msgstr = fixed
# Guardar backup se houve alteracao
if self.cache and fixed != original_msgstr:
self.cache.backup_translation(
entry.msgid, original_msgstr, fixed,
plugin_name, plugin_name
)
# Process msgstr_plural (plural forms)
if entry.msgstr_plural:
for idx, value in entry.msgstr_plural.items():
@@ -784,6 +946,10 @@ class TranslationProcessor:
self.cache.save_translation(
entry.msgid, translated, plugin_name, validated=False
)
# Guardar backup (original vazio -> traduzido)
self.cache.backup_translation(
entry.msgid, "", translated, plugin_name, plugin_name
)
return entry, brands_fixed
@@ -841,6 +1007,9 @@ def main():
parser.add_argument("--brands-only", action="store_true", help="Only fix brands")
parser.add_argument("--dry-run", action="store_true", help="Show what would be done")
parser.add_argument("--init-db", action="store_true", help="Initialize database")
parser.add_argument("--verify-db", action="store_true", help="Verify database health")
parser.add_argument("--restore-backup", type=str, metavar="MSGID_HASH",
help="Restore original translation from backup by msgid hash")
parser.add_argument("--export-brands", type=Path, help="Export brands to JSON")
parser.add_argument("--import-brands", type=Path, help="Import brands from JSON")
parser.add_argument("--db-path", type=str,
@@ -855,6 +1024,40 @@ def main():
db_dir = Path(args.db_path).parent
db_dir.mkdir(parents=True, exist_ok=True)
# Verify database health
if args.verify_db:
try:
cache = CacheManager(args.db_path)
except Exception as e:
print(f"\u274c Erro ao abrir BD: {e}")
return 1
health = cache.verify_health()
print("=" * 60)
print(f"\U0001f50d Verificacao BD: {args.db_path}")
print("=" * 60)
print(f"Integridade: {health['integrity']}")
state_icon = '\u2705 Saudavel' if health['healthy'] else '\u274c Com problemas'
print(f"Estado: {state_icon}")
print()
for table_name, info in health["tables"].items():
status = "\u2705" if info["exists"] and info.get("columns_ok", False) else "\u274c"
print(f" {status} {table_name}: ", end="")
if info["exists"]:
print(f"{info['rows']} registos", end="")
if not info.get("columns_ok", True):
print(f" (colunas em falta: {info.get('missing_columns', [])})", end="")
print()
else:
print("NAO EXISTE")
if health["errors"]:
print(f"\nErros: {'; '.join(health['errors'])}")
cache.close()
return 0 if health["healthy"] else 1
# Initialize database
if args.init_db:
cache = CacheManager(args.db_path)
@@ -862,6 +1065,36 @@ def main():
cache.close()
return 0
# Restore translation from backup
if args.restore_backup:
try:
cache = CacheManager(args.db_path)
except Exception as e:
print(f"\u274c Erro ao abrir BD: {e}")
return 1
cursor = cache.conn.execute(
"""SELECT msgid, original_msgstr, new_msgstr, plugin_name, po_file, timestamp
FROM translation_backups WHERE msgid_hash = ?
ORDER BY timestamp DESC LIMIT 1""",
(args.restore_backup,)
)
row = cursor.fetchone()
if not row:
print(f"\u274c Nenhum backup encontrado para hash: {args.restore_backup}")
cache.close()
return 1
print(f"\U0001f4e6 Backup encontrado:")
print(f" msgid: {row[0][:80]}")
print(f" Original: {row[1][:80] if row[1] else '(vazio)'}")
print(f" Actual: {row[2][:80]}")
print(f" Plugin: {row[3]}")
print(f" Ficheiro: {row[4]}")
print(f" Data: {row[5]}")
cache.close()
return 0
# Export brands
if args.export_brands:
cache = CacheManager(args.db_path)
@@ -908,7 +1141,12 @@ def main():
return 1
# Process files
processor = TranslationProcessor(args.db_path, args.api_url)
try:
processor = TranslationProcessor(args.db_path, args.api_url)
except (sqlite3.DatabaseError, RuntimeError) as e:
print(f"\u274c Erro na base de dados: {e}")
print("Sugestao: executar --verify-db para diagnostico ou --init-db para reinicializar")
return 1
mode = "brands-only" if args.brands_only else "full"