Files
scripts/crawl4all/config.py

18 lines
368 B
Python
Executable File

import os
BASE_URL = "https://descomplicar.pt/"
MAX_DEPTH = 2
MAX_CONCURRENCY = 10
RETRY_ATTEMPTS = 3
REQUEST_TIMEOUT = 30
OUTPUT_DIR = "output"
BROWSER_CONFIG = {
"headless": True,
"viewport_width": 1280,
"viewport_height": 720
}
CRAWLER_CONFIG = {
"word_count_threshold": 10,
"exclude_external_links": True,
"remove_overlay_elements": True
}