mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
feat: add browser profile XDG support and documentation (#777)
This commit is contained in:
3
.github/workflows/build.yml
vendored
3
.github/workflows/build.yml
vendored
@@ -174,7 +174,8 @@ jobs:
|
||||
case "${{ matrix.os }}" in
|
||||
ubuntu-*)
|
||||
sudo apt-get install --no-install-recommends -y xvfb
|
||||
xvfb-run pdm run itest:cov -vv
|
||||
# Run tests INSIDE xvfb context
|
||||
xvfb-run bash -c 'pdm run itest:cov -vv'
|
||||
;;
|
||||
*) pdm run itest:cov -vv
|
||||
;;
|
||||
|
||||
19
README.md
19
README.md
@@ -248,6 +248,25 @@ Limitation of `download`: It's only possible to extract the cheapest given shipp
|
||||
|
||||
All configuration files can be in YAML or JSON format.
|
||||
|
||||
### Installation modes (portable vs. system-wide)
|
||||
|
||||
On first run, the app may ask which installation mode to use. In non-interactive environments (CI/headless), it defaults to portable mode and will not prompt; `--config` and `--logfile` override only their specific paths, and do not change other mode-dependent paths or the chosen installation mode behavior.
|
||||
|
||||
1. **Portable mode (recommended for most users, especially on Windows):**
|
||||
- Stores config, logs, downloads, and state in the current directory
|
||||
- No admin permissions required
|
||||
- Easy backup/migration; works from USB drives
|
||||
|
||||
2. **System-wide mode (advanced users / multi-user setups):**
|
||||
- Stores files in OS-standard locations
|
||||
- Cleaner directory structure; better separation from working directory
|
||||
- Requires proper permissions for user data directories
|
||||
|
||||
**OS notes (brief):**
|
||||
- **Windows:** System-wide uses AppData (Roaming/Local); portable keeps everything beside the `.exe`.
|
||||
- **Linux:** System-wide follows XDG Base Directory spec; portable stays in the current working directory.
|
||||
- **macOS:** System-wide uses `~/Library/Application Support/kleinanzeigen-bot` (and related dirs); portable stays in the current directory.
|
||||
|
||||
### <a name="main-config"></a>1) Main configuration
|
||||
|
||||
When executing the app it by default looks for a `config.yaml` file in the current directory. If it does not exist it will be created automatically.
|
||||
|
||||
@@ -111,7 +111,8 @@ lint = { composite = ["lint:ruff", "lint:mypy", "lint:pyright"] }
|
||||
# Run unit tests only (exclude smoke and itest)
|
||||
utest = "python -m pytest --capture=tee-sys -m \"not itest and not smoke\""
|
||||
# Run integration tests only (exclude smoke)
|
||||
itest = "python -m pytest --capture=tee-sys -m \"itest and not smoke\""
|
||||
# Uses -n 0 to disable xdist parallelization - browser tests are flaky with parallel workers
|
||||
itest = "python -m pytest --capture=tee-sys -m \"itest and not smoke\" -n 0"
|
||||
# Run smoke tests only
|
||||
smoke = "python -m pytest --capture=tee-sys -m smoke"
|
||||
# Run all tests in order: unit, integration, smoke
|
||||
@@ -126,7 +127,7 @@ test = { composite = ["utest", "itest", "smoke"] }
|
||||
"coverage:prepare" = { shell = "python scripts/coverage_helper.py prepare" }
|
||||
"test:cov" = { composite = ["coverage:prepare", "utest:cov", "itest:cov", "smoke:cov", "coverage:combine"] }
|
||||
"utest:cov" = { shell = "python scripts/coverage_helper.py run .temp/.coverage-unit.sqlite .temp/coverage-unit.xml \"not itest and not smoke\"" }
|
||||
"itest:cov" = { shell = "python scripts/coverage_helper.py run .temp/.coverage-itest.sqlite .temp/coverage-integration.xml \"itest and not smoke\"" }
|
||||
"itest:cov" = { shell = "python scripts/coverage_helper.py run .temp/.coverage-itest.sqlite .temp/coverage-integration.xml \"itest and not smoke\" -n 0" }
|
||||
"smoke:cov" = { shell = "python scripts/coverage_helper.py run .temp/.coverage-smoke.sqlite .temp/coverage-smoke.xml smoke" }
|
||||
"coverage:combine" = { shell = "python scripts/coverage_helper.py combine .temp/.coverage-unit.sqlite .temp/.coverage-itest.sqlite .temp/.coverage-smoke.sqlite" }
|
||||
# Run all tests with coverage in a single invocation
|
||||
|
||||
@@ -185,7 +185,7 @@
|
||||
"BrowserConfig": {
|
||||
"properties": {
|
||||
"arguments": {
|
||||
"description": "See https://peter.sh/experiments/chromium-command-line-switches/",
|
||||
"description": "See https://peter.sh/experiments/chromium-command-line-switches/. Browser profile path is auto-configured based on installation mode (portable/XDG).",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
@@ -227,8 +227,8 @@
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": ".temp/browser-profile",
|
||||
"description": "See https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md",
|
||||
"default": null,
|
||||
"description": "See https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md. If not specified, defaults to XDG cache directory in XDG mode or .temp/browser-profile in portable mode.",
|
||||
"title": "User Data Dir"
|
||||
},
|
||||
"profile_name": {
|
||||
|
||||
@@ -149,7 +149,7 @@ def apply_auto_price_reduction(ad_cfg: Ad, _ad_cfg_orig: dict[str, Any], ad_file
|
||||
# Note: price_reduction_count is persisted to ad_cfg_orig only after successful publish
|
||||
|
||||
|
||||
class KleinanzeigenBot(WebScrapingMixin):
|
||||
class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||||
def __init__(self) -> None:
|
||||
# workaround for https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/295
|
||||
# see https://github.com/pyinstaller/pyinstaller/issues/7229#issuecomment-1309383026
|
||||
|
||||
@@ -91,7 +91,10 @@ class AdDefaults(ContextualModel):
|
||||
|
||||
|
||||
class DownloadConfig(ContextualModel):
|
||||
include_all_matching_shipping_options: bool = Field(default=False, description="if true, all shipping options matching the package size will be included")
|
||||
include_all_matching_shipping_options:bool = Field(
|
||||
default = False,
|
||||
description = "if true, all shipping options matching the package size will be included",
|
||||
)
|
||||
excluded_shipping_options:list[str] = Field(default_factory = list, description = "list of shipping options to exclude, e.g. ['DHL_2', 'DHL_5']")
|
||||
folder_name_max_length:int = Field(default = 100, ge = 10, le = 255, description = "maximum length for folder names when downloading ads (default: 100)")
|
||||
rename_existing_folders:bool = Field(default = False, description = "if true, rename existing folders without titles to include titles (default: false)")
|
||||
|
||||
@@ -457,6 +457,9 @@ kleinanzeigen_bot/utils/web_scraping_mixin.py:
|
||||
" -> Browser profile name: %s": " -> Browser-Profilname: %s"
|
||||
" -> Browser user data dir: %s": " -> Browser-Benutzerdatenverzeichnis: %s"
|
||||
" -> Custom Browser argument: %s": " -> Benutzerdefiniertes Browser-Argument: %s"
|
||||
"Ignoring empty --user-data-dir= argument; falling back to configured user_data_dir.": "Ignoriere leeres --user-data-dir= Argument; verwende konfiguriertes user_data_dir."
|
||||
"Configured browser.user_data_dir (%s) does not match --user-data-dir argument (%s); using the argument value.": "Konfiguriertes browser.user_data_dir (%s) stimmt nicht mit --user-data-dir Argument (%s) überein; verwende Argument-Wert."
|
||||
"Remote debugging detected, but browser configuration looks invalid: %s": "Remote-Debugging erkannt, aber Browser-Konfiguration scheint ungültig: %s"
|
||||
" -> Setting chrome prefs [%s]...": " -> Setze Chrome-Einstellungen [%s]..."
|
||||
" -> Adding Browser extension: [%s]": " -> Füge Browser-Erweiterung hinzu: [%s]"
|
||||
"Failed to connect to browser. This error often occurs when:": "Fehler beim Verbinden mit dem Browser. Dieser Fehler tritt häufig auf, wenn:"
|
||||
@@ -546,8 +549,8 @@ kleinanzeigen_bot/utils/web_scraping_mixin.py:
|
||||
" -> Unexpected error during browser version validation, skipping: %s": " -> Unerwarteter Fehler bei Browser-Versionsvalidierung, wird übersprungen: %s"
|
||||
|
||||
_diagnose_chrome_version_issues:
|
||||
"(info) %s version from binary: %s %s (major: %d)": "(Info) %s-Version von Binärdatei: %s %s (Hauptversion: %d)"
|
||||
"(info) %s version from remote debugging: %s %s (major: %d)": "(Info) %s-Version von Remote-Debugging: %s %s (Hauptversion: %d)"
|
||||
"(info) %s version from binary: %s (major: %d)": "(Info) %s-Version von Binärdatei: %s (Hauptversion: %d)"
|
||||
"(info) %s version from remote debugging: %s (major: %d)": "(Info) %s-Version von Remote-Debugging: %s (Hauptversion: %d)"
|
||||
"(info) %s 136+ detected - security validation required": "(Info) %s 136+ erkannt - Sicherheitsvalidierung erforderlich"
|
||||
"(info) %s pre-136 detected - no special security requirements": "(Info) %s vor 136 erkannt - keine besonderen Sicherheitsanforderungen"
|
||||
"(info) Remote %s 136+ detected - validating configuration": "(Info) Remote %s 136+ erkannt - validiere Konfiguration"
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
import asyncio, enum, inspect, json, os, platform, secrets, shutil, subprocess, urllib.request # isort: skip # noqa: S404
|
||||
from collections.abc import Awaitable, Callable, Coroutine, Iterable
|
||||
from gettext import gettext as _
|
||||
from pathlib import Path
|
||||
from typing import Any, Final, Optional, cast
|
||||
|
||||
try:
|
||||
@@ -22,7 +23,7 @@ from nodriver.core.tab import Tab as Page
|
||||
from kleinanzeigen_bot.model.config_model import Config as BotConfig
|
||||
from kleinanzeigen_bot.model.config_model import TimeoutConfig
|
||||
|
||||
from . import files, loggers, net
|
||||
from . import files, loggers, net, xdg_paths
|
||||
from .chrome_version_detector import (
|
||||
ChromeVersionInfo,
|
||||
detect_chrome_version_from_binary,
|
||||
@@ -40,6 +41,28 @@ if TYPE_CHECKING:
|
||||
_KEY_VALUE_PAIR_SIZE = 2
|
||||
|
||||
|
||||
def _resolve_user_data_dir_paths(arg_value:str, config_value:str) -> tuple[Any, Any]:
|
||||
"""Resolve the argument and config user_data_dir paths for comparison."""
|
||||
try:
|
||||
return (
|
||||
Path(arg_value).expanduser().resolve(),
|
||||
Path(config_value).expanduser().resolve(),
|
||||
)
|
||||
except OSError as exc:
|
||||
LOG.debug("Failed to resolve user_data_dir paths for comparison: %s", exc)
|
||||
return None, None
|
||||
|
||||
|
||||
def _has_non_empty_user_data_dir_arg(args:Iterable[str]) -> bool:
|
||||
for arg in args:
|
||||
if not arg.startswith("--user-data-dir="):
|
||||
continue
|
||||
raw = arg.split("=", maxsplit = 1)[1].strip().strip('"').strip("'")
|
||||
if raw:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _is_remote_object(obj:Any) -> TypeGuard["RemoteObject"]:
|
||||
"""Type guard to check if an object is a RemoteObject."""
|
||||
return hasattr(obj, "__class__") and "RemoteObject" in str(type(obj))
|
||||
@@ -58,7 +81,7 @@ __all__ = [
|
||||
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
|
||||
|
||||
# see https://api.jquery.com/category/selectors/
|
||||
METACHAR_ESCAPER:Final[dict[int, str]] = str.maketrans({ch: f"\\{ch}" for ch in '!"#$%&\'()*+,./:;<=>?@[\\]^`{|}~'})
|
||||
METACHAR_ESCAPER:Final[dict[int, str]] = str.maketrans({ch: f"\\{ch}" for ch in "!\"#$%&'()*+,./:;<=>?@[\\]^`{|}~"})
|
||||
|
||||
|
||||
def _is_admin() -> bool:
|
||||
@@ -90,7 +113,6 @@ class Is(enum.Enum):
|
||||
|
||||
|
||||
class BrowserConfig:
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.arguments:Iterable[str] = []
|
||||
self.binary_location:str | None = None
|
||||
@@ -102,37 +124,27 @@ class BrowserConfig:
|
||||
|
||||
def _write_initial_prefs(prefs_file:str) -> None:
|
||||
with open(prefs_file, "w", encoding = "UTF-8") as fd:
|
||||
json.dump({
|
||||
json.dump(
|
||||
{
|
||||
"credentials_enable_service": False,
|
||||
"enable_do_not_track": True,
|
||||
"google": {
|
||||
"services": {
|
||||
"consented_to_sync": False
|
||||
}
|
||||
},
|
||||
"google": {"services": {"consented_to_sync": False}},
|
||||
"profile": {
|
||||
"default_content_setting_values": {
|
||||
"popups": 0,
|
||||
"notifications": 2 # 1 = allow, 2 = block browser notifications
|
||||
"notifications": 2, # 1 = allow, 2 = block browser notifications
|
||||
},
|
||||
"password_manager_enabled": False
|
||||
"password_manager_enabled": False,
|
||||
},
|
||||
"signin": {
|
||||
"allowed": False
|
||||
"signin": {"allowed": False},
|
||||
"translate_site_blacklist": ["www.kleinanzeigen.de"],
|
||||
"devtools": {"preferences": {"currentDockState": '"bottom"'}},
|
||||
},
|
||||
"translate_site_blacklist": [
|
||||
"www.kleinanzeigen.de"
|
||||
],
|
||||
"devtools": {
|
||||
"preferences": {
|
||||
"currentDockState": '"bottom"'
|
||||
}
|
||||
}
|
||||
}, fd)
|
||||
fd,
|
||||
)
|
||||
|
||||
|
||||
class WebScrapingMixin:
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.browser_config:Final[BrowserConfig] = BrowserConfig()
|
||||
self.browser:Browser = None # pyright: ignore[reportAttributeAccessIssue]
|
||||
@@ -140,6 +152,11 @@ class WebScrapingMixin:
|
||||
self._default_timeout_config:TimeoutConfig | None = None
|
||||
self.config:BotConfig = cast(BotConfig, None)
|
||||
|
||||
@property
|
||||
def _installation_mode(self) -> str:
|
||||
"""Get installation mode with fallback to portable."""
|
||||
return getattr(self, "installation_mode_or_portable", "portable")
|
||||
|
||||
def _get_timeout_config(self) -> TimeoutConfig:
|
||||
config = getattr(self, "config", None)
|
||||
timeouts:TimeoutConfig | None = None
|
||||
@@ -172,12 +189,7 @@ class WebScrapingMixin:
|
||||
return 1 + cfg.retry_max_attempts
|
||||
|
||||
async def _run_with_timeout_retries(
|
||||
self,
|
||||
operation:Callable[[float], Awaitable[T]],
|
||||
*,
|
||||
description:str,
|
||||
key:str = "default",
|
||||
override:float | None = None
|
||||
self, operation:Callable[[float], Awaitable[T]], *, description:str, key:str = "default", override:float | None = None
|
||||
) -> T:
|
||||
"""
|
||||
Execute an async callable with retry/backoff handling for TimeoutError.
|
||||
@@ -191,13 +203,7 @@ class WebScrapingMixin:
|
||||
except TimeoutError:
|
||||
if attempt >= attempts - 1:
|
||||
raise
|
||||
LOG.debug(
|
||||
"Retrying %s after TimeoutError (attempt %d/%d, timeout %.1fs)",
|
||||
description,
|
||||
attempt + 1,
|
||||
attempts,
|
||||
effective_timeout
|
||||
)
|
||||
LOG.debug("Retrying %s after TimeoutError (attempt %d/%d, timeout %.1fs)", description, attempt + 1, attempts, effective_timeout)
|
||||
|
||||
raise TimeoutError(f"{description} failed without executing operation")
|
||||
|
||||
@@ -210,7 +216,24 @@ class WebScrapingMixin:
|
||||
self.browser_config.binary_location = self.get_compatible_browser()
|
||||
LOG.info(" -> Browser binary location: %s", self.browser_config.binary_location)
|
||||
|
||||
has_remote_debugging = any(arg.startswith("--remote-debugging-port=") for arg in self.browser_config.arguments)
|
||||
is_test_environment = bool(os.environ.get("PYTEST_CURRENT_TEST"))
|
||||
|
||||
if (
|
||||
not (self.browser_config.user_data_dir and self.browser_config.user_data_dir.strip())
|
||||
and not _has_non_empty_user_data_dir_arg(self.browser_config.arguments)
|
||||
and not has_remote_debugging
|
||||
and not is_test_environment
|
||||
):
|
||||
self.browser_config.user_data_dir = str(xdg_paths.get_browser_profile_path(self._installation_mode))
|
||||
|
||||
# Chrome version detection and validation
|
||||
if has_remote_debugging:
|
||||
try:
|
||||
await self._validate_chrome_version_configuration()
|
||||
except AssertionError as exc:
|
||||
LOG.warning(_("Remote debugging detected, but browser configuration looks invalid: %s"), exc)
|
||||
else:
|
||||
await self._validate_chrome_version_configuration()
|
||||
|
||||
########################################################
|
||||
@@ -229,10 +252,12 @@ class WebScrapingMixin:
|
||||
|
||||
# Enhanced port checking with retry logic
|
||||
port_available = await self._check_port_with_retry(remote_host, remote_port)
|
||||
ensure(port_available,
|
||||
ensure(
|
||||
port_available,
|
||||
f"Browser process not reachable at {remote_host}:{remote_port}. "
|
||||
f"Start the browser with --remote-debugging-port={remote_port} or remove this port from your config.yaml. "
|
||||
f"Make sure the browser is running and the port is not blocked by firewall.")
|
||||
f"Make sure the browser is running and the port is not blocked by firewall.",
|
||||
)
|
||||
|
||||
try:
|
||||
cfg = NodriverConfig(
|
||||
@@ -255,8 +280,7 @@ class WebScrapingMixin:
|
||||
LOG.error("Troubleshooting steps:")
|
||||
LOG.error("1. Close all browser instances and try again")
|
||||
LOG.error("2. Remove the user_data_dir configuration temporarily")
|
||||
LOG.error("3. Start browser manually with: %s --remote-debugging-port=%d",
|
||||
self.browser_config.binary_location, remote_port)
|
||||
LOG.error("3. Start browser manually with: %s --remote-debugging-port=%d", self.browser_config.binary_location, remote_port)
|
||||
LOG.error("4. Check if any antivirus or security software is blocking the connection")
|
||||
raise
|
||||
|
||||
@@ -274,13 +298,11 @@ class WebScrapingMixin:
|
||||
"--disable-sync",
|
||||
"--no-experiments",
|
||||
"--disable-search-engine-choice-screen",
|
||||
|
||||
"--disable-features=MediaRouter",
|
||||
"--use-mock-keychain",
|
||||
|
||||
"--test-type", # https://stackoverflow.com/a/36746675/5116073
|
||||
# https://chromium.googlesource.com/chromium/src/+/master/net/dns/README.md#request-remapping
|
||||
'--host-resolver-rules="MAP connect.facebook.net 127.0.0.1, MAP securepubads.g.doubleclick.net 127.0.0.1, MAP www.googletagmanager.com 127.0.0.1"'
|
||||
'--host-resolver-rules="MAP connect.facebook.net 127.0.0.1, MAP securepubads.g.doubleclick.net 127.0.0.1, MAP www.googletagmanager.com 127.0.0.1"',
|
||||
]
|
||||
|
||||
is_edge = "edge" in self.browser_config.binary_location.lower()
|
||||
@@ -295,10 +317,36 @@ class WebScrapingMixin:
|
||||
LOG.info(" -> Browser profile name: %s", self.browser_config.profile_name)
|
||||
browser_args.append(f"--profile-directory={self.browser_config.profile_name}")
|
||||
|
||||
user_data_dir_from_args:str | None = None
|
||||
for browser_arg in self.browser_config.arguments:
|
||||
LOG.info(" -> Custom Browser argument: %s", browser_arg)
|
||||
if browser_arg.startswith("--user-data-dir="):
|
||||
raw = browser_arg.split("=", maxsplit = 1)[1].strip().strip('"').strip("'")
|
||||
if not raw:
|
||||
LOG.warning(_("Ignoring empty --user-data-dir= argument; falling back to configured user_data_dir."))
|
||||
continue
|
||||
user_data_dir_from_args = raw
|
||||
continue
|
||||
browser_args.append(browser_arg)
|
||||
|
||||
effective_user_data_dir = user_data_dir_from_args or self.browser_config.user_data_dir
|
||||
if user_data_dir_from_args and self.browser_config.user_data_dir:
|
||||
arg_path, cfg_path = await asyncio.get_running_loop().run_in_executor(
|
||||
None,
|
||||
_resolve_user_data_dir_paths,
|
||||
user_data_dir_from_args,
|
||||
self.browser_config.user_data_dir,
|
||||
)
|
||||
if arg_path is None or cfg_path is None or arg_path != cfg_path:
|
||||
LOG.warning(
|
||||
_("Configured browser.user_data_dir (%s) does not match --user-data-dir argument (%s); using the argument value."),
|
||||
self.browser_config.user_data_dir,
|
||||
user_data_dir_from_args,
|
||||
)
|
||||
if not effective_user_data_dir and not is_test_environment:
|
||||
effective_user_data_dir = str(xdg_paths.get_browser_profile_path(self._installation_mode))
|
||||
self.browser_config.user_data_dir = effective_user_data_dir
|
||||
|
||||
if not loggers.is_debug(LOG):
|
||||
browser_args.append("--log-level=3") # INFO: 0, WARNING: 1, ERROR: 2, FATAL: 3
|
||||
|
||||
@@ -309,7 +357,7 @@ class WebScrapingMixin:
|
||||
headless = False,
|
||||
browser_executable_path = self.browser_config.binary_location,
|
||||
browser_args = browser_args,
|
||||
user_data_dir = self.browser_config.user_data_dir
|
||||
user_data_dir = self.browser_config.user_data_dir,
|
||||
)
|
||||
|
||||
# already logged by nodriver:
|
||||
@@ -371,8 +419,7 @@ class WebScrapingMixin:
|
||||
return True
|
||||
|
||||
if attempt < max_retries - 1:
|
||||
LOG.debug("Port %s:%s not available, retrying in %.1f seconds (attempt %d/%d)",
|
||||
host, port, retry_delay, attempt + 1, max_retries)
|
||||
LOG.debug("Port %s:%s not available, retrying in %.1f seconds (attempt %d/%d)", host, port, retry_delay, attempt + 1, max_retries)
|
||||
await asyncio.sleep(retry_delay)
|
||||
|
||||
return False
|
||||
@@ -522,12 +569,7 @@ class WebScrapingMixin:
|
||||
browser_paths:list[str | None] = []
|
||||
match platform.system():
|
||||
case "Linux":
|
||||
browser_paths = [
|
||||
shutil.which("chromium"),
|
||||
shutil.which("chromium-browser"),
|
||||
shutil.which("google-chrome"),
|
||||
shutil.which("microsoft-edge")
|
||||
]
|
||||
browser_paths = [shutil.which("chromium"), shutil.which("chromium-browser"), shutil.which("google-chrome"), shutil.which("microsoft-edge")]
|
||||
|
||||
case "Darwin":
|
||||
browser_paths = [
|
||||
@@ -540,18 +582,15 @@ class WebScrapingMixin:
|
||||
browser_paths = [
|
||||
os.environ.get("PROGRAMFILES", "C:\\Program Files") + r"\Microsoft\Edge\Application\msedge.exe",
|
||||
os.environ.get("PROGRAMFILES(X86)", "C:\\Program Files (x86)") + r"\Microsoft\Edge\Application\msedge.exe",
|
||||
|
||||
os.environ["PROGRAMFILES"] + r"\Chromium\Application\chrome.exe",
|
||||
os.environ["PROGRAMFILES(X86)"] + r"\Chromium\Application\chrome.exe",
|
||||
os.environ["LOCALAPPDATA"] + r"\Chromium\Application\chrome.exe",
|
||||
|
||||
os.environ["PROGRAMFILES"] + r"\Chrome\Application\chrome.exe",
|
||||
os.environ["PROGRAMFILES(X86)"] + r"\Chrome\Application\chrome.exe",
|
||||
os.environ["LOCALAPPDATA"] + r"\Chrome\Application\chrome.exe",
|
||||
|
||||
shutil.which("msedge.exe"),
|
||||
shutil.which("chromium.exe"),
|
||||
shutil.which("chrome.exe")
|
||||
shutil.which("chrome.exe"),
|
||||
]
|
||||
|
||||
case _ as os_name:
|
||||
@@ -563,8 +602,14 @@ class WebScrapingMixin:
|
||||
|
||||
raise AssertionError(_("Installed browser could not be detected"))
|
||||
|
||||
async def web_await(self, condition:Callable[[], T | Never | Coroutine[Any, Any, T | Never]], *,
|
||||
timeout:int | float | None = None, timeout_error_message:str = "", apply_multiplier:bool = True) -> T:
|
||||
async def web_await(
|
||||
self,
|
||||
condition:Callable[[], T | Never | Coroutine[Any, Any, T | Never]],
|
||||
*,
|
||||
timeout:int | float | None = None,
|
||||
timeout_error_message:str = "",
|
||||
apply_multiplier:bool = True,
|
||||
) -> T:
|
||||
"""
|
||||
Blocks/waits until the given condition is met.
|
||||
|
||||
@@ -604,7 +649,9 @@ class WebScrapingMixin:
|
||||
return elem.attrs.get("disabled") is not None
|
||||
|
||||
async def is_displayed(elem:Element) -> bool:
|
||||
return cast(bool, await elem.apply("""
|
||||
return cast(
|
||||
bool,
|
||||
await elem.apply("""
|
||||
function (element) {
|
||||
var style = window.getComputedStyle(element);
|
||||
return style.display !== 'none'
|
||||
@@ -613,7 +660,8 @@ class WebScrapingMixin:
|
||||
&& element.offsetWidth > 0
|
||||
&& element.offsetHeight > 0
|
||||
}
|
||||
"""))
|
||||
"""),
|
||||
)
|
||||
|
||||
elem:Element = await self.web_find(selector_type, selector_value, timeout = timeout)
|
||||
|
||||
@@ -627,7 +675,9 @@ class WebScrapingMixin:
|
||||
case Is.READONLY:
|
||||
return elem.attrs.get("readonly") is not None
|
||||
case Is.SELECTED:
|
||||
return cast(bool, await elem.apply("""
|
||||
return cast(
|
||||
bool,
|
||||
await elem.apply("""
|
||||
function (element) {
|
||||
if (element.tagName.toLowerCase() === 'input') {
|
||||
if (element.type === 'checkbox' || element.type === 'radio') {
|
||||
@@ -636,7 +686,8 @@ class WebScrapingMixin:
|
||||
}
|
||||
return false
|
||||
}
|
||||
"""))
|
||||
"""),
|
||||
)
|
||||
raise AssertionError(_("Unsupported attribute: %s") % attr)
|
||||
|
||||
async def web_click(self, selector_type:By, selector_value:str, *, timeout:int | float | None = None) -> Element:
|
||||
@@ -743,11 +794,8 @@ class WebScrapingMixin:
|
||||
async def attempt(effective_timeout:float) -> Element:
|
||||
return await self._web_find_once(selector_type, selector_value, effective_timeout, parent = parent)
|
||||
|
||||
return await self._run_with_timeout_retries(
|
||||
attempt,
|
||||
description = f"web_find({selector_type.name}, {selector_value})",
|
||||
key = "default",
|
||||
override = timeout
|
||||
return await self._run_with_timeout_retries( # noqa: E501
|
||||
attempt, description = f"web_find({selector_type.name}, {selector_value})", key = "default", override = timeout
|
||||
)
|
||||
|
||||
async def web_find_all(self, selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float | None = None) -> list[Element]:
|
||||
@@ -762,10 +810,7 @@ class WebScrapingMixin:
|
||||
return await self._web_find_all_once(selector_type, selector_value, effective_timeout, parent = parent)
|
||||
|
||||
return await self._run_with_timeout_retries(
|
||||
attempt,
|
||||
description = f"web_find_all({selector_type.name}, {selector_value})",
|
||||
key = "default",
|
||||
override = timeout
|
||||
attempt, description = f"web_find_all({selector_type.name}, {selector_value})", key = "default", override = timeout
|
||||
)
|
||||
|
||||
async def _web_find_once(self, selector_type:By, selector_value:str, timeout:float, *, parent:Element | None = None) -> Element:
|
||||
@@ -778,40 +823,46 @@ class WebScrapingMixin:
|
||||
lambda: self.page.query_selector(f"#{escaped_id}", parent),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML element found with ID '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
apply_multiplier = False,
|
||||
)
|
||||
case By.CLASS_NAME:
|
||||
escaped_classname = selector_value.translate(METACHAR_ESCAPER)
|
||||
return await self.web_await(
|
||||
lambda: self.page.query_selector(f".{escaped_classname}", parent),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML element found with CSS class '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
apply_multiplier = False,
|
||||
)
|
||||
case By.TAG_NAME:
|
||||
return await self.web_await(
|
||||
lambda: self.page.query_selector(selector_value, parent),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML element found of tag <{selector_value}>{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
apply_multiplier = False,
|
||||
)
|
||||
case By.CSS_SELECTOR:
|
||||
return await self.web_await(
|
||||
lambda: self.page.query_selector(selector_value, parent),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML element found using CSS selector '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
apply_multiplier = False,
|
||||
)
|
||||
case By.TEXT:
|
||||
ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
||||
return await self.web_await(
|
||||
lambda: self.page.find_element_by_text(selector_value, best_match = True),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML element found containing text '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
apply_multiplier = False,
|
||||
)
|
||||
case By.XPATH:
|
||||
ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
||||
return await self.web_await(
|
||||
lambda: self.page.find_element_by_text(selector_value, best_match = True),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML element found using XPath '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
apply_multiplier = False,
|
||||
)
|
||||
|
||||
raise AssertionError(_("Unsupported selector type: %s") % selector_type)
|
||||
|
||||
@@ -825,33 +876,38 @@ class WebScrapingMixin:
|
||||
lambda: self.page.query_selector_all(f".{escaped_classname}", parent),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML elements found with CSS class '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
apply_multiplier = False,
|
||||
)
|
||||
case By.CSS_SELECTOR:
|
||||
return await self.web_await(
|
||||
lambda: self.page.query_selector_all(selector_value, parent),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML elements found using CSS selector '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
apply_multiplier = False,
|
||||
)
|
||||
case By.TAG_NAME:
|
||||
return await self.web_await(
|
||||
lambda: self.page.query_selector_all(selector_value, parent),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML elements found of tag <{selector_value}>{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
apply_multiplier = False,
|
||||
)
|
||||
case By.TEXT:
|
||||
ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
||||
return await self.web_await(
|
||||
lambda: self.page.find_elements_by_text(selector_value),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML elements found containing text '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
apply_multiplier = False,
|
||||
)
|
||||
case By.XPATH:
|
||||
ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
||||
return await self.web_await(
|
||||
lambda: self.page.find_elements_by_text(selector_value),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML elements found using XPath '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
apply_multiplier = False,
|
||||
)
|
||||
|
||||
raise AssertionError(_("Unsupported selector type: %s") % selector_type)
|
||||
|
||||
@@ -885,11 +941,12 @@ class WebScrapingMixin:
|
||||
lambda: self.web_execute("document.readyState == 'complete'"),
|
||||
timeout = page_timeout,
|
||||
timeout_error_message = f"Page did not finish loading within {page_timeout} seconds.",
|
||||
apply_multiplier = False
|
||||
apply_multiplier = False,
|
||||
)
|
||||
|
||||
async def web_text(self, selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float | None = None) -> str:
|
||||
return str(await (await self.web_find(selector_type, selector_value, parent = parent, timeout = timeout)).apply("""
|
||||
return str(
|
||||
await (await self.web_find(selector_type, selector_value, parent = parent, timeout = timeout)).apply("""
|
||||
function (elem) {
|
||||
let sel = window.getSelection()
|
||||
sel.removeAllRanges()
|
||||
@@ -900,16 +957,19 @@ class WebScrapingMixin:
|
||||
sel.removeAllRanges()
|
||||
return visibleText
|
||||
}
|
||||
"""))
|
||||
""")
|
||||
)
|
||||
|
||||
async def web_sleep(self, min_ms:int = 1_000, max_ms:int = 2_500) -> None:
|
||||
duration = max_ms <= min_ms and min_ms or secrets.randbelow(max_ms - min_ms) + min_ms
|
||||
LOG.log(loggers.INFO if duration > 1_500 else loggers.DEBUG, # noqa: PLR2004 Magic value used in comparison
|
||||
" ... pausing for %d ms ...", duration)
|
||||
LOG.log(
|
||||
loggers.INFO if duration > 1_500 else loggers.DEBUG, # noqa: PLR2004 Magic value used in comparison
|
||||
" ... pausing for %d ms ...",
|
||||
duration,
|
||||
)
|
||||
await self.page.sleep(duration / 1_000)
|
||||
|
||||
async def web_request(self, url:str, method:str = "GET", valid_response_codes:int | Iterable[int] = 200,
|
||||
headers:dict[str, str] | None = None) -> Any:
|
||||
async def web_request(self, url:str, method:str = "GET", valid_response_codes:int | Iterable[int] = 200, headers:dict[str, str] | None = None) -> Any:
|
||||
method = method.upper()
|
||||
LOG.debug(" -> HTTP %s [%s]...", method, url)
|
||||
response = await self.web_execute(f"""
|
||||
@@ -933,9 +993,10 @@ class WebScrapingMixin:
|
||||
valid_response_codes = [valid_response_codes]
|
||||
ensure(
|
||||
response["statusCode"] in valid_response_codes,
|
||||
f'Invalid response "{response["statusCode"]} response["statusMessage"]" received for HTTP {method} to {url}'
|
||||
f'Invalid response "{response["statusCode"]} {response["statusMessage"]}" received for HTTP {method} to {url}',
|
||||
)
|
||||
return response
|
||||
|
||||
# pylint: enable=dangerous-default-value
|
||||
|
||||
async def web_scroll_page_down(self, scroll_length:int = 10, scroll_speed:int = 10_000, *, scroll_back_top:bool = False) -> None:
|
||||
@@ -968,8 +1029,9 @@ class WebScrapingMixin:
|
||||
:raises UnexpectedTagNameException: if element is not a <select> element
|
||||
"""
|
||||
await self.web_await(
|
||||
lambda: self.web_check(selector_type, selector_value, Is.CLICKABLE), timeout = timeout,
|
||||
timeout_error_message = f"No clickable HTML element with selector: {selector_type}='{selector_value}' found"
|
||||
lambda: self.web_check(selector_type, selector_value, Is.CLICKABLE),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No clickable HTML element with selector: {selector_type}='{selector_value}' found",
|
||||
)
|
||||
elem = await self.web_find(selector_type, selector_value, timeout = timeout)
|
||||
|
||||
@@ -1107,9 +1169,7 @@ class WebScrapingMixin:
|
||||
if port_available:
|
||||
try:
|
||||
version_info = detect_chrome_version_from_remote_debugging(
|
||||
remote_host,
|
||||
remote_port,
|
||||
timeout = self._effective_timeout("chrome_remote_debugging")
|
||||
remote_host, remote_port, timeout = self._effective_timeout("chrome_remote_debugging")
|
||||
)
|
||||
if version_info:
|
||||
LOG.debug(" -> Detected version from existing browser: %s", version_info)
|
||||
@@ -1125,10 +1185,7 @@ class WebScrapingMixin:
|
||||
binary_path = self.browser_config.binary_location
|
||||
if binary_path:
|
||||
LOG.debug(" -> No remote browser detected, trying binary detection")
|
||||
version_info = detect_chrome_version_from_binary(
|
||||
binary_path,
|
||||
timeout = self._effective_timeout("chrome_binary_detection")
|
||||
)
|
||||
version_info = detect_chrome_version_from_binary(binary_path, timeout = self._effective_timeout("chrome_binary_detection"))
|
||||
|
||||
# Validate if Chrome 136+ detected
|
||||
if version_info and version_info.is_chrome_136_plus:
|
||||
@@ -1158,14 +1215,8 @@ class WebScrapingMixin:
|
||||
AssertionError: If configuration is invalid
|
||||
"""
|
||||
# Check if user-data-dir is specified in arguments or configuration
|
||||
has_user_data_dir_arg = any(
|
||||
arg.startswith("--user-data-dir=")
|
||||
for arg in self.browser_config.arguments
|
||||
)
|
||||
has_user_data_dir_config = (
|
||||
self.browser_config.user_data_dir is not None and
|
||||
self.browser_config.user_data_dir.strip()
|
||||
)
|
||||
has_user_data_dir_arg = any(arg.startswith("--user-data-dir=") for arg in self.browser_config.arguments)
|
||||
has_user_data_dir_config = self.browser_config.user_data_dir is not None and bool(self.browser_config.user_data_dir.strip())
|
||||
|
||||
if not has_user_data_dir_arg and not has_user_data_dir_config:
|
||||
error_message = (
|
||||
@@ -1198,14 +1249,18 @@ class WebScrapingMixin:
|
||||
remote_host = "127.0.0.1",
|
||||
remote_port = remote_port if remote_port > 0 else None,
|
||||
remote_timeout = self._effective_timeout("chrome_remote_debugging"),
|
||||
binary_timeout = self._effective_timeout("chrome_binary_detection")
|
||||
binary_timeout = self._effective_timeout("chrome_binary_detection"),
|
||||
)
|
||||
|
||||
# Report binary detection results
|
||||
if diagnostic_info["binary_detection"]:
|
||||
binary_info = diagnostic_info["binary_detection"]
|
||||
LOG.info("(info) %s version from binary: %s %s (major: %d)",
|
||||
binary_info["browser_name"], binary_info["browser_name"], binary_info["version_string"], binary_info["major_version"])
|
||||
LOG.info(
|
||||
"(info) %s version from binary: %s (major: %d)",
|
||||
binary_info["browser_name"],
|
||||
binary_info["version_string"],
|
||||
binary_info["major_version"],
|
||||
)
|
||||
|
||||
if binary_info["is_chrome_136_plus"]:
|
||||
LOG.info("(info) %s 136+ detected - security validation required", binary_info["browser_name"])
|
||||
@@ -1215,17 +1270,18 @@ class WebScrapingMixin:
|
||||
# Report remote detection results
|
||||
if diagnostic_info["remote_detection"]:
|
||||
remote_info = diagnostic_info["remote_detection"]
|
||||
LOG.info("(info) %s version from remote debugging: %s %s (major: %d)",
|
||||
remote_info["browser_name"], remote_info["browser_name"], remote_info["version_string"], remote_info["major_version"])
|
||||
LOG.info(
|
||||
"(info) %s version from remote debugging: %s (major: %d)",
|
||||
remote_info["browser_name"],
|
||||
remote_info["version_string"],
|
||||
remote_info["major_version"],
|
||||
)
|
||||
|
||||
if remote_info["is_chrome_136_plus"]:
|
||||
LOG.info("(info) Remote %s 136+ detected - validating configuration", remote_info["browser_name"])
|
||||
|
||||
# Validate configuration for Chrome/Edge 136+
|
||||
is_valid, error_message = validate_chrome_136_configuration(
|
||||
list(self.browser_config.arguments),
|
||||
self.browser_config.user_data_dir
|
||||
)
|
||||
is_valid, error_message = validate_chrome_136_configuration(list(self.browser_config.arguments), self.browser_config.user_data_dir)
|
||||
|
||||
if not is_valid:
|
||||
LOG.error("(fail) %s 136+ configuration validation failed: %s", remote_info["browser_name"], error_message)
|
||||
|
||||
@@ -213,13 +213,13 @@ def get_browser_profile_path(mode: str | InstallationMode, config_override: str
|
||||
"""
|
||||
mode = _normalize_mode(mode)
|
||||
if config_override:
|
||||
profile_path = Path(config_override)
|
||||
profile_path = Path(config_override).expanduser().resolve()
|
||||
LOG.debug("Resolving browser profile path for mode '%s' (config override): %s", mode, profile_path)
|
||||
elif mode == "portable":
|
||||
profile_path = Path.cwd() / ".temp" / "browser-profile"
|
||||
profile_path = (Path.cwd() / ".temp" / "browser-profile").resolve()
|
||||
LOG.debug("Resolving browser profile path for mode '%s': %s", mode, profile_path)
|
||||
else: # xdg
|
||||
profile_path = get_xdg_base_dir("cache") / "browser-profile"
|
||||
profile_path = (get_xdg_base_dir("cache") / "browser-profile").resolve()
|
||||
LOG.debug("Resolving browser profile path for mode '%s': %s", mode, profile_path)
|
||||
|
||||
# Create directory if it doesn't exist
|
||||
|
||||
@@ -33,7 +33,7 @@ async def atest_init() -> None:
|
||||
web_scraping_mixin.close_browser_session()
|
||||
|
||||
|
||||
@pytest.mark.flaky(reruns = 4, reruns_delay = 5)
|
||||
@pytest.mark.flaky(reruns = 5, reruns_delay = 10)
|
||||
@pytest.mark.itest
|
||||
def test_init() -> None:
|
||||
nodriver.loop().run_until_complete(atest_init()) # type: ignore[attr-defined]
|
||||
|
||||
@@ -1186,7 +1186,14 @@ class TestAdExtractorDownload:
|
||||
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False),
|
||||
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []),
|
||||
patch.object(
|
||||
extractor, "_extract_contact_from_ad_page", new_callable=AsyncMock, return_value=ContactPartial(name="Test", zipcode="12345", location="Berlin")
|
||||
extractor,
|
||||
"_extract_contact_from_ad_page",
|
||||
new_callable = AsyncMock,
|
||||
return_value = ContactPartial(
|
||||
name = "Test",
|
||||
zipcode = "12345",
|
||||
location = "Berlin",
|
||||
),
|
||||
),
|
||||
):
|
||||
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(base_dir, 12345)
|
||||
@@ -1243,7 +1250,14 @@ class TestAdExtractorDownload:
|
||||
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False),
|
||||
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []),
|
||||
patch.object(
|
||||
extractor, "_extract_contact_from_ad_page", new_callable=AsyncMock, return_value=ContactPartial(name="Test", zipcode="12345", location="Berlin")
|
||||
extractor,
|
||||
"_extract_contact_from_ad_page",
|
||||
new_callable = AsyncMock,
|
||||
return_value = ContactPartial(
|
||||
name = "Test",
|
||||
zipcode = "12345",
|
||||
location = "Berlin",
|
||||
),
|
||||
),
|
||||
):
|
||||
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(base_dir, 12345)
|
||||
@@ -1302,7 +1316,14 @@ class TestAdExtractorDownload:
|
||||
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False),
|
||||
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []),
|
||||
patch.object(
|
||||
extractor, "_extract_contact_from_ad_page", new_callable=AsyncMock, return_value=ContactPartial(name="Test", zipcode="12345", location="Berlin")
|
||||
extractor,
|
||||
"_extract_contact_from_ad_page",
|
||||
new_callable = AsyncMock,
|
||||
return_value = ContactPartial(
|
||||
name = "Test",
|
||||
zipcode = "12345",
|
||||
location = "Berlin",
|
||||
),
|
||||
),
|
||||
):
|
||||
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(base_dir, 12345)
|
||||
@@ -1358,7 +1379,14 @@ class TestAdExtractorDownload:
|
||||
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False),
|
||||
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []),
|
||||
patch.object(
|
||||
extractor, "_extract_contact_from_ad_page", new_callable=AsyncMock, return_value=ContactPartial(name="Test", zipcode="12345", location="Berlin")
|
||||
extractor,
|
||||
"_extract_contact_from_ad_page",
|
||||
new_callable = AsyncMock,
|
||||
return_value = ContactPartial(
|
||||
name = "Test",
|
||||
zipcode = "12345",
|
||||
location = "Berlin",
|
||||
),
|
||||
),
|
||||
):
|
||||
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(base_dir, 12345)
|
||||
|
||||
@@ -641,6 +641,31 @@ class TestKleinanzeigenBotArgParsing:
|
||||
test_bot.parse_args(["script.py", "help", "version"])
|
||||
assert exc_info.value.code == 2
|
||||
|
||||
def test_parse_args_explicit_flags(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None:
|
||||
"""Test that explicit flags are set when --config and --logfile options are provided."""
|
||||
config_path = tmp_path / "custom_config.yaml"
|
||||
log_path = tmp_path / "custom.log"
|
||||
|
||||
# Test --config flag sets config_explicitly_provided
|
||||
test_bot.parse_args(["script.py", "--config", str(config_path), "help"])
|
||||
assert test_bot.config_explicitly_provided is True
|
||||
assert str(config_path.absolute()) == test_bot.config_file_path
|
||||
|
||||
# Reset for next test
|
||||
test_bot.config_explicitly_provided = False
|
||||
|
||||
# Test --logfile flag sets log_file_explicitly_provided
|
||||
test_bot.parse_args(["script.py", "--logfile", str(log_path), "help"])
|
||||
assert test_bot.log_file_explicitly_provided is True
|
||||
assert str(log_path.absolute()) == test_bot.log_file_path
|
||||
|
||||
# Test both flags together
|
||||
test_bot.config_explicitly_provided = False
|
||||
test_bot.log_file_explicitly_provided = False
|
||||
test_bot.parse_args(["script.py", "--config", str(config_path), "--logfile", str(log_path), "help"])
|
||||
assert test_bot.config_explicitly_provided is True
|
||||
assert test_bot.log_file_explicitly_provided is True
|
||||
|
||||
|
||||
class TestKleinanzeigenBotCommands:
|
||||
"""Tests for command execution."""
|
||||
@@ -863,7 +888,7 @@ class TestKleinanzeigenBotAdDeletion:
|
||||
async def test_delete_ad_by_title(self, test_bot:KleinanzeigenBot, minimal_ad_config:dict[str, Any]) -> None:
|
||||
"""Test deleting an ad by title."""
|
||||
test_bot.page = MagicMock()
|
||||
test_bot.page.evaluate = AsyncMock(return_value = {"statusCode": 200, "content": "{}"})
|
||||
test_bot.page.evaluate = AsyncMock(return_value = {"statusCode": 200, "statusMessage": "OK", "content": "{}"})
|
||||
test_bot.page.sleep = AsyncMock()
|
||||
|
||||
# Use minimal config since we only need title for deletion by title
|
||||
@@ -891,7 +916,7 @@ class TestKleinanzeigenBotAdDeletion:
|
||||
async def test_delete_ad_by_id(self, test_bot:KleinanzeigenBot, minimal_ad_config:dict[str, Any]) -> None:
|
||||
"""Test deleting an ad by ID."""
|
||||
test_bot.page = MagicMock()
|
||||
test_bot.page.evaluate = AsyncMock(return_value = {"statusCode": 200, "content": "{}"})
|
||||
test_bot.page.evaluate = AsyncMock(return_value = {"statusCode": 200, "statusMessage": "OK", "content": "{}"})
|
||||
test_bot.page.sleep = AsyncMock()
|
||||
|
||||
# Create config with ID for deletion by ID
|
||||
@@ -918,7 +943,7 @@ class TestKleinanzeigenBotAdDeletion:
|
||||
async def test_delete_ad_by_id_with_non_string_csrf_token(self, test_bot:KleinanzeigenBot, minimal_ad_config:dict[str, Any]) -> None:
|
||||
"""Test deleting an ad by ID with non-string CSRF token to cover str() conversion."""
|
||||
test_bot.page = MagicMock()
|
||||
test_bot.page.evaluate = AsyncMock(return_value = {"statusCode": 200, "content": "{}"})
|
||||
test_bot.page.evaluate = AsyncMock(return_value = {"statusCode": 200, "statusMessage": "OK", "content": "{}"})
|
||||
test_bot.page.sleep = AsyncMock()
|
||||
|
||||
# Create config with ID for deletion by ID
|
||||
|
||||
@@ -20,9 +20,7 @@ class TestWebScrapingMixinChromeVersionValidation:
|
||||
return WebScrapingMixin()
|
||||
|
||||
@patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_binary")
|
||||
async def test_validate_chrome_version_configuration_chrome_136_plus_valid(
|
||||
self, mock_detect:Mock, scraper:WebScrapingMixin
|
||||
) -> None:
|
||||
async def test_validate_chrome_version_configuration_chrome_136_plus_valid(self, mock_detect:Mock, scraper:WebScrapingMixin) -> None:
|
||||
"""Test Chrome 136+ validation with valid configuration."""
|
||||
# Setup mocks
|
||||
mock_detect.return_value = ChromeVersionInfo("136.0.6778.0", 136, "Chrome")
|
||||
@@ -88,9 +86,7 @@ class TestWebScrapingMixinChromeVersionValidation:
|
||||
os.environ["PYTEST_CURRENT_TEST"] = original_env
|
||||
|
||||
@patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_binary")
|
||||
async def test_validate_chrome_version_configuration_chrome_pre_136(
|
||||
self, mock_detect:Mock, scraper:WebScrapingMixin
|
||||
) -> None:
|
||||
async def test_validate_chrome_version_configuration_chrome_pre_136(self, mock_detect:Mock, scraper:WebScrapingMixin) -> None:
|
||||
"""Test Chrome pre-136 validation (no special requirements)."""
|
||||
# Setup mocks
|
||||
mock_detect.return_value = ChromeVersionInfo("120.0.6099.109", 120, "Chrome")
|
||||
@@ -121,11 +117,7 @@ class TestWebScrapingMixinChromeVersionValidation:
|
||||
@patch("kleinanzeigen_bot.utils.chrome_version_detector.detect_chrome_version_from_binary")
|
||||
@patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_remote_debugging")
|
||||
async def test_validate_chrome_version_logs_remote_detection(
|
||||
self,
|
||||
mock_remote:Mock,
|
||||
mock_binary:Mock,
|
||||
scraper:WebScrapingMixin,
|
||||
caplog:pytest.LogCaptureFixture
|
||||
self, mock_remote:Mock, mock_binary:Mock, scraper:WebScrapingMixin, caplog:pytest.LogCaptureFixture
|
||||
) -> None:
|
||||
"""When a remote browser responds, the detected version should be logged."""
|
||||
mock_remote.return_value = ChromeVersionInfo("136.0.6778.0", 136, "Chrome")
|
||||
@@ -134,17 +126,14 @@ class TestWebScrapingMixinChromeVersionValidation:
|
||||
scraper.browser_config.binary_location = "/path/to/chrome"
|
||||
caplog.set_level("DEBUG")
|
||||
|
||||
with patch.dict(os.environ, {}, clear = True), \
|
||||
patch.object(scraper, "_check_port_with_retry", return_value = True):
|
||||
with patch.dict(os.environ, {}, clear = True), patch.object(scraper, "_check_port_with_retry", return_value = True):
|
||||
await scraper._validate_chrome_version_configuration()
|
||||
|
||||
assert "Detected version from existing browser" in caplog.text
|
||||
mock_remote.assert_called_once()
|
||||
|
||||
@patch("kleinanzeigen_bot.utils.chrome_version_detector.detect_chrome_version_from_binary")
|
||||
async def test_validate_chrome_version_configuration_no_binary_location(
|
||||
self, mock_detect:Mock, scraper:WebScrapingMixin
|
||||
) -> None:
|
||||
async def test_validate_chrome_version_configuration_no_binary_location(self, mock_detect:Mock, scraper:WebScrapingMixin) -> None:
|
||||
"""Test Chrome version validation when no binary location is set."""
|
||||
# Configure scraper without binary location
|
||||
scraper.browser_config.binary_location = None
|
||||
@@ -204,15 +193,10 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
|
||||
"""Test Chrome version diagnostics with binary detection."""
|
||||
# Setup mocks
|
||||
mock_get_diagnostic.return_value = {
|
||||
"binary_detection": {
|
||||
"version_string": "136.0.6778.0",
|
||||
"major_version": 136,
|
||||
"browser_name": "Chrome",
|
||||
"is_chrome_136_plus": True
|
||||
},
|
||||
"binary_detection": {"version_string": "136.0.6778.0", "major_version": 136, "browser_name": "Chrome", "is_chrome_136_plus": True},
|
||||
"remote_detection": None,
|
||||
"chrome_136_plus_detected": True,
|
||||
"recommendations": []
|
||||
"recommendations": [],
|
||||
}
|
||||
mock_validate.return_value = (True, "")
|
||||
|
||||
@@ -230,7 +214,7 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
|
||||
scraper._diagnose_chrome_version_issues(9222)
|
||||
|
||||
# Verify logs
|
||||
assert "Chrome version from binary: Chrome 136.0.6778.0 (major: 136)" in caplog.text
|
||||
assert "Chrome version from binary: 136.0.6778.0 (major: 136)" in caplog.text
|
||||
assert "Chrome 136+ detected - security validation required" in caplog.text
|
||||
|
||||
# Verify mocks were called
|
||||
@@ -255,14 +239,9 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
|
||||
# Setup mocks
|
||||
mock_get_diagnostic.return_value = {
|
||||
"binary_detection": None,
|
||||
"remote_detection": {
|
||||
"version_string": "136.0.6778.0",
|
||||
"major_version": 136,
|
||||
"browser_name": "Chrome",
|
||||
"is_chrome_136_plus": True
|
||||
},
|
||||
"remote_detection": {"version_string": "136.0.6778.0", "major_version": 136, "browser_name": "Chrome", "is_chrome_136_plus": True},
|
||||
"chrome_136_plus_detected": True,
|
||||
"recommendations": []
|
||||
"recommendations": [],
|
||||
}
|
||||
mock_validate.return_value = (False, "Chrome 136+ requires --user-data-dir")
|
||||
|
||||
@@ -280,32 +259,22 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
|
||||
scraper._diagnose_chrome_version_issues(9222)
|
||||
|
||||
# Verify logs
|
||||
assert "Chrome version from remote debugging: Chrome 136.0.6778.0 (major: 136)" in caplog.text
|
||||
assert "(info) Chrome version from remote debugging: 136.0.6778.0 (major: 136)" in caplog.text
|
||||
assert "Remote Chrome 136+ detected - validating configuration" in caplog.text
|
||||
assert "Chrome 136+ configuration validation failed" in caplog.text
|
||||
|
||||
# Verify validation was called
|
||||
mock_validate.assert_called_once_with(
|
||||
["--remote-debugging-port=9222"],
|
||||
None
|
||||
)
|
||||
mock_validate.assert_called_once_with(["--remote-debugging-port=9222"], None)
|
||||
finally:
|
||||
# Restore environment
|
||||
if original_env:
|
||||
os.environ["PYTEST_CURRENT_TEST"] = original_env
|
||||
|
||||
@patch("kleinanzeigen_bot.utils.web_scraping_mixin.get_chrome_version_diagnostic_info")
|
||||
def test_diagnose_chrome_version_issues_no_detection(
|
||||
self, mock_get_diagnostic:Mock, scraper:WebScrapingMixin, caplog:pytest.LogCaptureFixture
|
||||
) -> None:
|
||||
def test_diagnose_chrome_version_issues_no_detection(self, mock_get_diagnostic:Mock, scraper:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
|
||||
"""Test Chrome version diagnostics with no detection."""
|
||||
# Setup mocks
|
||||
mock_get_diagnostic.return_value = {
|
||||
"binary_detection": None,
|
||||
"remote_detection": None,
|
||||
"chrome_136_plus_detected": False,
|
||||
"recommendations": []
|
||||
}
|
||||
mock_get_diagnostic.return_value = {"binary_detection": None, "remote_detection": None, "chrome_136_plus_detected": False, "recommendations": []}
|
||||
|
||||
# Configure scraper
|
||||
scraper.browser_config.binary_location = "/path/to/chrome"
|
||||
@@ -334,15 +303,10 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
|
||||
"""Test Chrome version diagnostics with Chrome 136+ recommendations."""
|
||||
# Setup mocks
|
||||
mock_get_diagnostic.return_value = {
|
||||
"binary_detection": {
|
||||
"version_string": "136.0.6778.0",
|
||||
"major_version": 136,
|
||||
"browser_name": "Chrome",
|
||||
"is_chrome_136_plus": True
|
||||
},
|
||||
"binary_detection": {"version_string": "136.0.6778.0", "major_version": 136, "browser_name": "Chrome", "is_chrome_136_plus": True},
|
||||
"remote_detection": None,
|
||||
"chrome_136_plus_detected": True,
|
||||
"recommendations": []
|
||||
"recommendations": [],
|
||||
}
|
||||
|
||||
# Configure scraper
|
||||
@@ -377,11 +341,11 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
|
||||
"version_string": "120.0.6099.109",
|
||||
"major_version": 120,
|
||||
"browser_name": "Chrome",
|
||||
"is_chrome_136_plus": False # This triggers the else branch (lines 832-849)
|
||||
"is_chrome_136_plus": False, # This triggers the else branch (lines 832-849)
|
||||
},
|
||||
"remote_detection": None, # Ensure this is None to avoid other branches
|
||||
"chrome_136_plus_detected": False, # Ensure this is False to avoid recommendations
|
||||
"recommendations": []
|
||||
"recommendations": [],
|
||||
}
|
||||
|
||||
# Configure scraper
|
||||
@@ -420,14 +384,9 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
|
||||
# Setup mocks
|
||||
mock_get_diagnostic.return_value = {
|
||||
"binary_detection": None,
|
||||
"remote_detection": {
|
||||
"version_string": "136.0.6778.0",
|
||||
"major_version": 136,
|
||||
"browser_name": "Chrome",
|
||||
"is_chrome_136_plus": True
|
||||
},
|
||||
"remote_detection": {"version_string": "136.0.6778.0", "major_version": 136, "browser_name": "Chrome", "is_chrome_136_plus": True},
|
||||
"chrome_136_plus_detected": True,
|
||||
"recommendations": []
|
||||
"recommendations": [],
|
||||
}
|
||||
mock_validate.return_value = (True, "") # This triggers the else branch (line 846)
|
||||
|
||||
@@ -451,7 +410,7 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
|
||||
# Verify validation was called with correct arguments
|
||||
mock_validate.assert_called_once_with(
|
||||
["--remote-debugging-port=9222", "--user-data-dir=/tmp/chrome-debug"], # noqa: S108
|
||||
"/tmp/chrome-debug" # noqa: S108
|
||||
"/tmp/chrome-debug", # noqa: S108
|
||||
)
|
||||
finally:
|
||||
# Restore environment
|
||||
@@ -469,9 +428,7 @@ class TestWebScrapingMixinIntegration:
|
||||
|
||||
@patch.object(WebScrapingMixin, "_validate_chrome_version_configuration")
|
||||
@patch.object(WebScrapingMixin, "get_compatible_browser")
|
||||
async def test_create_browser_session_calls_chrome_validation(
|
||||
self, mock_get_browser:Mock, mock_validate:Mock, scraper:WebScrapingMixin
|
||||
) -> None:
|
||||
async def test_create_browser_session_calls_chrome_validation(self, mock_get_browser:Mock, mock_validate:Mock, scraper:WebScrapingMixin) -> None:
|
||||
"""Test that create_browser_session calls Chrome version validation."""
|
||||
# Setup mocks
|
||||
mock_get_browser.return_value = "/path/to/chrome"
|
||||
@@ -493,9 +450,7 @@ class TestWebScrapingMixinIntegration:
|
||||
|
||||
@patch.object(WebScrapingMixin, "_diagnose_chrome_version_issues")
|
||||
@patch.object(WebScrapingMixin, "get_compatible_browser")
|
||||
def test_diagnose_browser_issues_calls_chrome_diagnostics(
|
||||
self, mock_get_browser:Mock, mock_diagnose:Mock, scraper:WebScrapingMixin
|
||||
) -> None:
|
||||
def test_diagnose_browser_issues_calls_chrome_diagnostics(self, mock_get_browser:Mock, mock_diagnose:Mock, scraper:WebScrapingMixin) -> None:
|
||||
"""Test that diagnose_browser_issues calls Chrome version diagnostics."""
|
||||
# Setup mocks
|
||||
mock_get_browser.return_value = "/path/to/chrome"
|
||||
@@ -521,9 +476,7 @@ class TestWebScrapingMixinIntegration:
|
||||
|
||||
# Mock Chrome version detection to return pre-136 version
|
||||
with patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_binary") as mock_detect:
|
||||
mock_detect.return_value = ChromeVersionInfo(
|
||||
"120.0.6099.109", 120, "Chrome"
|
||||
)
|
||||
mock_detect.return_value = ChromeVersionInfo("120.0.6099.109", 120, "Chrome")
|
||||
|
||||
# Temporarily unset PYTEST_CURRENT_TEST to allow validation to run
|
||||
original_env = os.environ.get("PYTEST_CURRENT_TEST")
|
||||
@@ -541,3 +494,68 @@ class TestWebScrapingMixinIntegration:
|
||||
# Restore environment
|
||||
if original_env:
|
||||
os.environ["PYTEST_CURRENT_TEST"] = original_env
|
||||
|
||||
@patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_binary")
|
||||
async def test_validate_chrome_136_configuration_with_whitespace_user_data_dir(
|
||||
self, mock_detect:Mock, scraper:WebScrapingMixin, caplog:pytest.LogCaptureFixture
|
||||
) -> None:
|
||||
"""Test Chrome 136+ validation correctly handles whitespace-only user_data_dir."""
|
||||
# Setup mocks
|
||||
mock_detect.return_value = ChromeVersionInfo("136.0.6778.0", 136, "Chrome")
|
||||
|
||||
# Configure scraper with whitespace-only user_data_dir
|
||||
scraper.browser_config.binary_location = "/path/to/chrome"
|
||||
scraper.browser_config.arguments = ["--remote-debugging-port=9222"]
|
||||
scraper.browser_config.user_data_dir = " " # Only whitespace
|
||||
|
||||
# Temporarily unset PYTEST_CURRENT_TEST to allow validation to run
|
||||
original_env = os.environ.get("PYTEST_CURRENT_TEST")
|
||||
if "PYTEST_CURRENT_TEST" in os.environ:
|
||||
del os.environ["PYTEST_CURRENT_TEST"]
|
||||
|
||||
try:
|
||||
# Test validation should fail because whitespace-only is treated as empty
|
||||
await scraper._validate_chrome_version_configuration()
|
||||
|
||||
# Verify detection was called
|
||||
assert mock_detect.call_count == 1
|
||||
|
||||
# Verify error was logged
|
||||
assert "Chrome 136+ configuration validation failed" in caplog.text
|
||||
assert "Chrome 136+ requires --user-data-dir" in caplog.text
|
||||
finally:
|
||||
# Restore environment
|
||||
if original_env:
|
||||
os.environ["PYTEST_CURRENT_TEST"] = original_env
|
||||
|
||||
@patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_binary")
|
||||
async def test_validate_chrome_136_configuration_with_valid_user_data_dir(
|
||||
self, mock_detect:Mock, scraper:WebScrapingMixin, caplog:pytest.LogCaptureFixture
|
||||
) -> None:
|
||||
"""Test Chrome 136+ validation passes with valid user_data_dir."""
|
||||
# Setup mocks
|
||||
mock_detect.return_value = ChromeVersionInfo("136.0.6778.0", 136, "Chrome")
|
||||
|
||||
# Configure scraper with valid user_data_dir
|
||||
scraper.browser_config.binary_location = "/path/to/chrome"
|
||||
scraper.browser_config.arguments = ["--remote-debugging-port=9222"]
|
||||
scraper.browser_config.user_data_dir = "/tmp/valid-profile" # noqa: S108
|
||||
|
||||
# Temporarily unset PYTEST_CURRENT_TEST to allow validation to run
|
||||
original_env = os.environ.get("PYTEST_CURRENT_TEST")
|
||||
if "PYTEST_CURRENT_TEST" in os.environ:
|
||||
del os.environ["PYTEST_CURRENT_TEST"]
|
||||
|
||||
try:
|
||||
# Test validation should pass
|
||||
await scraper._validate_chrome_version_configuration()
|
||||
|
||||
# Verify detection was called
|
||||
assert mock_detect.call_count == 1
|
||||
|
||||
# Verify success was logged
|
||||
assert "Chrome 136+ configuration validation passed" in caplog.text
|
||||
finally:
|
||||
# Restore environment
|
||||
if original_env:
|
||||
os.environ["PYTEST_CURRENT_TEST"] = original_env
|
||||
|
||||
Reference in New Issue
Block a user