init: scripts diversos (crawlers, conversores, scrapers)
This commit is contained in:
18
crawl4all/config.py
Executable file
18
crawl4all/config.py
Executable file
@@ -0,0 +1,18 @@
|
||||
import os
|
||||
|
||||
BASE_URL = "https://descomplicar.pt/"
|
||||
MAX_DEPTH = 2
|
||||
MAX_CONCURRENCY = 10
|
||||
RETRY_ATTEMPTS = 3
|
||||
REQUEST_TIMEOUT = 30
|
||||
OUTPUT_DIR = "output"
|
||||
BROWSER_CONFIG = {
|
||||
"headless": True,
|
||||
"viewport_width": 1280,
|
||||
"viewport_height": 720
|
||||
}
|
||||
CRAWLER_CONFIG = {
|
||||
"word_count_threshold": 10,
|
||||
"exclude_external_links": True,
|
||||
"remove_overlay_elements": True
|
||||
}
|
||||
Reference in New Issue
Block a user