Files
scripts/scraper/test_improved_parser.py

44 lines
1.3 KiB
Python
Executable File

import json
# Simular a resposta do Gemini
gemini_response = '```json\n{\n "relevante": true,\n "problema": "Não há um problema explícito no texto."\n}\n```'
print("RESPOSTA BRUTA:")
print(repr(gemini_response))
# Aplicar a lógica melhorada
content_text = gemini_response
# Remover blocos markdown
if '```json' in content_text:
content_text = content_text.split('```json')[1].split('```')[0]
elif '```' in content_text:
content_text = content_text.split('```')[1].split('```')[0]
# Limpeza agressiva
content_text = content_text.strip()
print("\n\nAPÓS LIMPEZA:")
print(repr(content_text))
try:
knowledge = json.loads(content_text)
print("\n\n✅ JSON PARSE SUCESSO!")
print(json.dumps(knowledge, indent=2, ensure_ascii=False))
except json.JSONDecodeError as e:
print(f"\n\n❌ JSON PARSE FALHOU: {e}")
# Fallback: extrair { ... } manualmente
start = content_text.find('{')
end = content_text.rfind('}') + 1
if start != -1 and end > start:
clean_json = content_text[start:end]
print(f"\n\nFALLBACK EXTRACT:")
print(repr(clean_json))
try:
knowledge = json.loads(clean_json)
print("\n✅ FALLBACK SUCESSO!")
print(json.dumps(knowledge, indent=2, ensure_ascii=False))
except Exception as e2:
print(f"❌ FALLBACK FALHOU: {e2}")