init: scripts diversos (crawlers, conversores, scrapers)

This commit is contained in:
2026-03-05 20:38:36 +00:00
commit 6ac6f4be2a
925 changed files with 850330 additions and 0 deletions

18
crawl4all/config.py Executable file
View File

@@ -0,0 +1,18 @@
import os
BASE_URL = "https://descomplicar.pt/"
MAX_DEPTH = 2
MAX_CONCURRENCY = 10
RETRY_ATTEMPTS = 3
REQUEST_TIMEOUT = 30
OUTPUT_DIR = "output"
BROWSER_CONFIG = {
"headless": True,
"viewport_width": 1280,
"viewport_height": 720
}
CRAWLER_CONFIG = {
"word_count_threshold": 10,
"exclude_external_links": True,
"remove_overlay_elements": True
}