From 44e2250eb77ee8fa2b4ecc4626edecce3e19566c Mon Sep 17 00:00:00 2001 From: AikTop Date: Thu, 12 Mar 2026 14:43:09 +0000 Subject: [PATCH] =?UTF-8?q?feat(v1.1.0):=20valida=C3=A7=C3=A3o=20de=20base?= =?UTF-8?q?=20de=20dados=20SQLite?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Verificação de integridade ao conectar (PRAGMA integrity_check) - Validação de esquema completo (4 tabelas, todas as colunas) - Migração automática de colunas em falta - Tabela translation_backups para guardar originais antes de traduzir - CLI --verify-db e --restore-backup - WAL mode para melhor concorrência Tarefa #419, Discussão #33, Projecto #65 --- translate-wp-plugin/CHANGELOG.md | 23 +++ translate-wp-plugin/wp-translate-ptpt.py | 248 ++++++++++++++++++++++- 2 files changed, 266 insertions(+), 5 deletions(-) diff --git a/translate-wp-plugin/CHANGELOG.md b/translate-wp-plugin/CHANGELOG.md index 42f9438..08c8065 100644 --- a/translate-wp-plugin/CHANGELOG.md +++ b/translate-wp-plugin/CHANGELOG.md @@ -1,5 +1,28 @@ # Changelog — wp-translate-ptpt.py +## [1.1.0] - 2026-03-12 + +### Added +- Validação de base de dados: verificação de integridade SQLite (PRAGMA integrity_check) ao conectar +- Validação de esquema: verifica que todas as tabelas e colunas esperadas existem +- Migração automática: adiciona colunas em falta sem perder dados existentes +- Tabela `translation_backups`: guarda string original antes de cada tradução/correcção +- Método `verify_health()`: diagnóstico completo da BD (integridade, tabelas, contagens) +- CLI `--verify-db`: verifica saúde da BD e reporta estado de cada tabela +- CLI `--restore-backup HASH`: consulta backups de traduções por hash msgid +- Método `backup_translation()`: regista original antes de sobrescrever +- WAL mode activado para melhor concorrência em SQLite +- Tratamento de erros de BD no arranque do TranslationProcessor com mensagem de sugestão + +### Changed +- CacheManager agora valida esquema completo após _init_db (4 tabelas, todas as colunas) +- TranslationProcessor verifica saúde da BD antes de iniciar processamento +- _process_entry guarda backup da string original antes de aplicar correcções de marcas e PT-BR +- Criação do processor envolvida em try/except com mensagem orientativa + +### Fixed +- BDs existentes de v1.0.0 são migradas automaticamente (nova tabela translation_backups adicionada) + ## [1.0.0] - 2026-02-23 ### Added diff --git a/translate-wp-plugin/wp-translate-ptpt.py b/translate-wp-plugin/wp-translate-ptpt.py index df63a5b..d08c9dd 100755 --- a/translate-wp-plugin/wp-translate-ptpt.py +++ b/translate-wp-plugin/wp-translate-ptpt.py @@ -5,7 +5,7 @@ Sistema eficiente de traduções WordPress PT-PT. Author: Descomplicar® Crescimento Digital Date: 2026-02-23 -Version: 1.0.0 +Version: 1.1.0 """ import os @@ -25,7 +25,7 @@ from urllib.request import Request, urlopen from urllib.error import URLError # Version -__version__ = "1.0.0" +__version__ = "1.1.0" # ============================================================================= @@ -199,10 +199,44 @@ SEED_BRANDS = [ class CacheManager: """Manages SQLite cache for translations and brands.""" + # Esquema esperado: tabela -> lista de colunas obrigatorias + EXPECTED_SCHEMA = { + "brands": ["id", "name", "variations", "auto_detected", "confidence_score", + "last_seen", "plugin_slug"], + "translations": ["msgid_hash", "msgid", "msgstr", "plugin_name", "validated", + "timestamp"], + "corrections": ["id", "original", "corrected", "rule_applied", "plugin_name", + "timestamp"], + "translation_backups": ["id", "msgid_hash", "msgid", "original_msgstr", + "new_msgstr", "plugin_name", "po_file", "timestamp"], + } + def __init__(self, db_path: str): - """Initialize database connection and create tables.""" - self.conn = sqlite3.connect(db_path) + """Initialize database connection, validate and create tables.""" + self.db_path = db_path + self.conn = self._safe_connect(db_path) self._init_db() + self._validate_schema() + + def _safe_connect(self, db_path: str) -> sqlite3.Connection: + """Establish connection with integrity check.""" + try: + conn = sqlite3.connect(db_path) + # Verificar integridade da BD + result = conn.execute("PRAGMA integrity_check").fetchone() + if result[0] != "ok": + raise sqlite3.DatabaseError( + f"Base de dados corrompida ({db_path}): {result[0]}" + ) + # Activar WAL para melhor concorrencia + conn.execute("PRAGMA journal_mode=WAL") + return conn + except sqlite3.DatabaseError as e: + if "corrompida" in str(e): + raise + raise sqlite3.DatabaseError( + f"Erro ao conectar a base de dados ({db_path}): {e}" + ) def _init_db(self): """Create database schema.""" @@ -243,8 +277,102 @@ class CacheManager: ) """) + # Backups de traducoes (nova tabela v1.1.0) + self.conn.execute(""" + CREATE TABLE IF NOT EXISTS translation_backups ( + id INTEGER PRIMARY KEY, + msgid_hash TEXT NOT NULL, + msgid TEXT, + original_msgstr TEXT, + new_msgstr TEXT, + plugin_name TEXT, + po_file TEXT, + timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """) + self.conn.commit() + def _validate_schema(self): + """Validate that all expected tables and columns exist.""" + for table_name, expected_cols in self.EXPECTED_SCHEMA.items(): + if not self._table_exists(table_name): + raise sqlite3.DatabaseError( + f"Tabela '{table_name}' nao existe apos init_db. " + f"BD possivelmente corrompida: {self.db_path}" + ) + actual_cols = self._get_columns(table_name) + missing = set(expected_cols) - set(actual_cols) + if missing: + # Tentar adicionar colunas em falta (migracao) + for col in missing: + try: + self.conn.execute( + f"ALTER TABLE {table_name} ADD COLUMN {col} TEXT" + ) + except sqlite3.OperationalError: + raise sqlite3.DatabaseError( + f"Coluna(s) em falta na tabela '{table_name}': " + f"{', '.join(missing)}. Migracao falhou." + ) + self.conn.commit() + + def _table_exists(self, table_name: str) -> bool: + """Check if a table exists in the database.""" + cursor = self.conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name=?", + (table_name,) + ) + return cursor.fetchone() is not None + + def _get_columns(self, table_name: str) -> List[str]: + """Get list of column names for a table.""" + cursor = self.conn.execute(f"PRAGMA table_info({table_name})") + return [row[1] for row in cursor.fetchall()] + + def verify_health(self) -> Dict[str, any]: + """Run full health check on the database. Returns status dict.""" + report = {"healthy": True, "tables": {}, "integrity": "ok", "errors": []} + + # Verificar integridade + try: + result = self.conn.execute("PRAGMA integrity_check").fetchone() + report["integrity"] = result[0] + if result[0] != "ok": + report["healthy"] = False + report["errors"].append(f"Integridade: {result[0]}") + except Exception as e: + report["healthy"] = False + report["errors"].append(f"Erro integrity_check: {e}") + + # Verificar tabelas e contagens + for table_name, expected_cols in self.EXPECTED_SCHEMA.items(): + table_info = {"exists": False, "rows": 0, "columns_ok": False} + if self._table_exists(table_name): + table_info["exists"] = True + try: + count = self.conn.execute( + f"SELECT COUNT(*) FROM {table_name}" + ).fetchone()[0] + table_info["rows"] = count + except Exception as e: + report["errors"].append(f"Erro contagem {table_name}: {e}") + report["healthy"] = False + + actual_cols = self._get_columns(table_name) + missing = set(expected_cols) - set(actual_cols) + table_info["columns_ok"] = len(missing) == 0 + if missing: + table_info["missing_columns"] = list(missing) + report["healthy"] = False + else: + report["healthy"] = False + report["errors"].append(f"Tabela '{table_name}' nao existe") + + report["tables"][table_name] = table_info + + return report + def get_cached_translation(self, msgid: str) -> Optional[str]: """Retrieve cached translation for msgid.""" msgid_hash = hashlib.md5(msgid.encode()).hexdigest() @@ -277,6 +405,20 @@ class CacheManager: ) self.conn.commit() + def backup_translation(self, msgid: str, original_msgstr: str, + new_msgstr: str, plugin_name: str, po_file: str): + """Backup the original translation before overwriting.""" + if not original_msgstr or original_msgstr == new_msgstr: + return + msgid_hash = hashlib.md5(msgid.encode()).hexdigest() + self.conn.execute( + """INSERT INTO translation_backups + (msgid_hash, msgid, original_msgstr, new_msgstr, plugin_name, po_file, timestamp) + VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)""", + (msgid_hash, msgid, original_msgstr, new_msgstr, plugin_name, po_file) + ) + self.conn.commit() + def close(self): """Close database connection.""" self.conn.close() @@ -652,7 +794,19 @@ class TranslationProcessor: def __init__(self, db_path: str, api_url: str): """Initialize processor with all components.""" + self.db_path = db_path self.cache = CacheManager(db_path) if db_path != ":memory:" else None + + # Validar saude da BD antes de processar + if self.cache: + health = self.cache.verify_health() + if not health["healthy"]: + errors_str = "; ".join(health["errors"]) + raise RuntimeError( + f"Base de dados com problemas: {errors_str}. " + f"Executar --verify-db para diagnostico completo." + ) + self.brand_protector = BrandProtector(db_path) self.translator = TranslationEngine(api_url) self.po_handler = PoFileHandler() @@ -736,6 +890,7 @@ class TranslationProcessor: if mode == "brands-only" or entry.msgstr: # Process msgstr (singular) if entry.msgstr: + original_msgstr = entry.msgstr fixed, corrections = self.brand_protector.fix_translated_brands( entry.msgid, entry.msgstr ) @@ -744,6 +899,13 @@ class TranslationProcessor: fixed, _ = apply_ptbr_fixes(fixed) entry.msgstr = fixed + # Guardar backup se houve alteracao + if self.cache and fixed != original_msgstr: + self.cache.backup_translation( + entry.msgid, original_msgstr, fixed, + plugin_name, plugin_name + ) + # Process msgstr_plural (plural forms) if entry.msgstr_plural: for idx, value in entry.msgstr_plural.items(): @@ -784,6 +946,10 @@ class TranslationProcessor: self.cache.save_translation( entry.msgid, translated, plugin_name, validated=False ) + # Guardar backup (original vazio -> traduzido) + self.cache.backup_translation( + entry.msgid, "", translated, plugin_name, plugin_name + ) return entry, brands_fixed @@ -841,6 +1007,9 @@ def main(): parser.add_argument("--brands-only", action="store_true", help="Only fix brands") parser.add_argument("--dry-run", action="store_true", help="Show what would be done") parser.add_argument("--init-db", action="store_true", help="Initialize database") + parser.add_argument("--verify-db", action="store_true", help="Verify database health") + parser.add_argument("--restore-backup", type=str, metavar="MSGID_HASH", + help="Restore original translation from backup by msgid hash") parser.add_argument("--export-brands", type=Path, help="Export brands to JSON") parser.add_argument("--import-brands", type=Path, help="Import brands from JSON") parser.add_argument("--db-path", type=str, @@ -855,6 +1024,40 @@ def main(): db_dir = Path(args.db_path).parent db_dir.mkdir(parents=True, exist_ok=True) + # Verify database health + if args.verify_db: + try: + cache = CacheManager(args.db_path) + except Exception as e: + print(f"\u274c Erro ao abrir BD: {e}") + return 1 + + health = cache.verify_health() + print("=" * 60) + print(f"\U0001f50d Verificacao BD: {args.db_path}") + print("=" * 60) + print(f"Integridade: {health['integrity']}") + state_icon = '\u2705 Saudavel' if health['healthy'] else '\u274c Com problemas' + print(f"Estado: {state_icon}") + print() + + for table_name, info in health["tables"].items(): + status = "\u2705" if info["exists"] and info.get("columns_ok", False) else "\u274c" + print(f" {status} {table_name}: ", end="") + if info["exists"]: + print(f"{info['rows']} registos", end="") + if not info.get("columns_ok", True): + print(f" (colunas em falta: {info.get('missing_columns', [])})", end="") + print() + else: + print("NAO EXISTE") + + if health["errors"]: + print(f"\nErros: {'; '.join(health['errors'])}") + + cache.close() + return 0 if health["healthy"] else 1 + # Initialize database if args.init_db: cache = CacheManager(args.db_path) @@ -862,6 +1065,36 @@ def main(): cache.close() return 0 + # Restore translation from backup + if args.restore_backup: + try: + cache = CacheManager(args.db_path) + except Exception as e: + print(f"\u274c Erro ao abrir BD: {e}") + return 1 + + cursor = cache.conn.execute( + """SELECT msgid, original_msgstr, new_msgstr, plugin_name, po_file, timestamp + FROM translation_backups WHERE msgid_hash = ? + ORDER BY timestamp DESC LIMIT 1""", + (args.restore_backup,) + ) + row = cursor.fetchone() + if not row: + print(f"\u274c Nenhum backup encontrado para hash: {args.restore_backup}") + cache.close() + return 1 + + print(f"\U0001f4e6 Backup encontrado:") + print(f" msgid: {row[0][:80]}") + print(f" Original: {row[1][:80] if row[1] else '(vazio)'}") + print(f" Actual: {row[2][:80]}") + print(f" Plugin: {row[3]}") + print(f" Ficheiro: {row[4]}") + print(f" Data: {row[5]}") + cache.close() + return 0 + # Export brands if args.export_brands: cache = CacheManager(args.db_path) @@ -908,7 +1141,12 @@ def main(): return 1 # Process files - processor = TranslationProcessor(args.db_path, args.api_url) + try: + processor = TranslationProcessor(args.db_path, args.api_url) + except (sqlite3.DatabaseError, RuntimeError) as e: + print(f"\u274c Erro na base de dados: {e}") + print("Sugestao: executar --verify-db para diagnostico ou --init-db para reinicializar") + return 1 mode = "brands-only" if args.brands_only else "full"