feat: add browser profile XDG support and documentation (#777)

This commit is contained in:
Jens
2026-01-23 22:45:22 +01:00
committed by GitHub
parent dc0d9404bf
commit eda1b4d0ec
15 changed files with 841 additions and 687 deletions

View File

@@ -174,7 +174,8 @@ jobs:
case "${{ matrix.os }}" in
ubuntu-*)
sudo apt-get install --no-install-recommends -y xvfb
xvfb-run pdm run itest:cov -vv
# Run tests INSIDE xvfb context
xvfb-run bash -c 'pdm run itest:cov -vv'
;;
*) pdm run itest:cov -vv
;;

View File

@@ -248,6 +248,25 @@ Limitation of `download`: It's only possible to extract the cheapest given shipp
All configuration files can be in YAML or JSON format.
### Installation modes (portable vs. system-wide)
On first run, the app may ask which installation mode to use. In non-interactive environments (CI/headless), it defaults to portable mode and will not prompt; `--config` and `--logfile` override only their specific paths, and do not change other mode-dependent paths or the chosen installation mode behavior.
1. **Portable mode (recommended for most users, especially on Windows):**
- Stores config, logs, downloads, and state in the current directory
- No admin permissions required
- Easy backup/migration; works from USB drives
2. **System-wide mode (advanced users / multi-user setups):**
- Stores files in OS-standard locations
- Cleaner directory structure; better separation from working directory
- Requires proper permissions for user data directories
**OS notes (brief):**
- **Windows:** System-wide uses AppData (Roaming/Local); portable keeps everything beside the `.exe`.
- **Linux:** System-wide follows XDG Base Directory spec; portable stays in the current working directory.
- **macOS:** System-wide uses `~/Library/Application Support/kleinanzeigen-bot` (and related dirs); portable stays in the current directory.
### <a name="main-config"></a>1) Main configuration
When executing the app it by default looks for a `config.yaml` file in the current directory. If it does not exist it will be created automatically.

View File

@@ -111,7 +111,8 @@ lint = { composite = ["lint:ruff", "lint:mypy", "lint:pyright"] }
# Run unit tests only (exclude smoke and itest)
utest = "python -m pytest --capture=tee-sys -m \"not itest and not smoke\""
# Run integration tests only (exclude smoke)
itest = "python -m pytest --capture=tee-sys -m \"itest and not smoke\""
# Uses -n 0 to disable xdist parallelization - browser tests are flaky with parallel workers
itest = "python -m pytest --capture=tee-sys -m \"itest and not smoke\" -n 0"
# Run smoke tests only
smoke = "python -m pytest --capture=tee-sys -m smoke"
# Run all tests in order: unit, integration, smoke
@@ -126,7 +127,7 @@ test = { composite = ["utest", "itest", "smoke"] }
"coverage:prepare" = { shell = "python scripts/coverage_helper.py prepare" }
"test:cov" = { composite = ["coverage:prepare", "utest:cov", "itest:cov", "smoke:cov", "coverage:combine"] }
"utest:cov" = { shell = "python scripts/coverage_helper.py run .temp/.coverage-unit.sqlite .temp/coverage-unit.xml \"not itest and not smoke\"" }
"itest:cov" = { shell = "python scripts/coverage_helper.py run .temp/.coverage-itest.sqlite .temp/coverage-integration.xml \"itest and not smoke\"" }
"itest:cov" = { shell = "python scripts/coverage_helper.py run .temp/.coverage-itest.sqlite .temp/coverage-integration.xml \"itest and not smoke\" -n 0" }
"smoke:cov" = { shell = "python scripts/coverage_helper.py run .temp/.coverage-smoke.sqlite .temp/coverage-smoke.xml smoke" }
"coverage:combine" = { shell = "python scripts/coverage_helper.py combine .temp/.coverage-unit.sqlite .temp/.coverage-itest.sqlite .temp/.coverage-smoke.sqlite" }
# Run all tests with coverage in a single invocation

View File

@@ -185,7 +185,7 @@
"BrowserConfig": {
"properties": {
"arguments": {
"description": "See https://peter.sh/experiments/chromium-command-line-switches/",
"description": "See https://peter.sh/experiments/chromium-command-line-switches/. Browser profile path is auto-configured based on installation mode (portable/XDG).",
"items": {
"type": "string"
},
@@ -227,8 +227,8 @@
"type": "null"
}
],
"default": ".temp/browser-profile",
"description": "See https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md",
"default": null,
"description": "See https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md. If not specified, defaults to XDG cache directory in XDG mode or .temp/browser-profile in portable mode.",
"title": "User Data Dir"
},
"profile_name": {

View File

@@ -149,7 +149,7 @@ def apply_auto_price_reduction(ad_cfg: Ad, _ad_cfg_orig: dict[str, Any], ad_file
# Note: price_reduction_count is persisted to ad_cfg_orig only after successful publish
class KleinanzeigenBot(WebScrapingMixin):
class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
def __init__(self) -> None:
# workaround for https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/295
# see https://github.com/pyinstaller/pyinstaller/issues/7229#issuecomment-1309383026

View File

@@ -91,7 +91,10 @@ class AdDefaults(ContextualModel):
class DownloadConfig(ContextualModel):
include_all_matching_shipping_options: bool = Field(default=False, description="if true, all shipping options matching the package size will be included")
include_all_matching_shipping_options:bool = Field(
default = False,
description = "if true, all shipping options matching the package size will be included",
)
excluded_shipping_options:list[str] = Field(default_factory = list, description = "list of shipping options to exclude, e.g. ['DHL_2', 'DHL_5']")
folder_name_max_length:int = Field(default = 100, ge = 10, le = 255, description = "maximum length for folder names when downloading ads (default: 100)")
rename_existing_folders:bool = Field(default = False, description = "if true, rename existing folders without titles to include titles (default: false)")

View File

@@ -457,6 +457,9 @@ kleinanzeigen_bot/utils/web_scraping_mixin.py:
" -> Browser profile name: %s": " -> Browser-Profilname: %s"
" -> Browser user data dir: %s": " -> Browser-Benutzerdatenverzeichnis: %s"
" -> Custom Browser argument: %s": " -> Benutzerdefiniertes Browser-Argument: %s"
"Ignoring empty --user-data-dir= argument; falling back to configured user_data_dir.": "Ignoriere leeres --user-data-dir= Argument; verwende konfiguriertes user_data_dir."
"Configured browser.user_data_dir (%s) does not match --user-data-dir argument (%s); using the argument value.": "Konfiguriertes browser.user_data_dir (%s) stimmt nicht mit --user-data-dir Argument (%s) überein; verwende Argument-Wert."
"Remote debugging detected, but browser configuration looks invalid: %s": "Remote-Debugging erkannt, aber Browser-Konfiguration scheint ungültig: %s"
" -> Setting chrome prefs [%s]...": " -> Setze Chrome-Einstellungen [%s]..."
" -> Adding Browser extension: [%s]": " -> Füge Browser-Erweiterung hinzu: [%s]"
"Failed to connect to browser. This error often occurs when:": "Fehler beim Verbinden mit dem Browser. Dieser Fehler tritt häufig auf, wenn:"
@@ -546,8 +549,8 @@ kleinanzeigen_bot/utils/web_scraping_mixin.py:
" -> Unexpected error during browser version validation, skipping: %s": " -> Unerwarteter Fehler bei Browser-Versionsvalidierung, wird übersprungen: %s"
_diagnose_chrome_version_issues:
"(info) %s version from binary: %s %s (major: %d)": "(Info) %s-Version von Binärdatei: %s %s (Hauptversion: %d)"
"(info) %s version from remote debugging: %s %s (major: %d)": "(Info) %s-Version von Remote-Debugging: %s %s (Hauptversion: %d)"
"(info) %s version from binary: %s (major: %d)": "(Info) %s-Version von Binärdatei: %s (Hauptversion: %d)"
"(info) %s version from remote debugging: %s (major: %d)": "(Info) %s-Version von Remote-Debugging: %s (Hauptversion: %d)"
"(info) %s 136+ detected - security validation required": "(Info) %s 136+ erkannt - Sicherheitsvalidierung erforderlich"
"(info) %s pre-136 detected - no special security requirements": "(Info) %s vor 136 erkannt - keine besonderen Sicherheitsanforderungen"
"(info) Remote %s 136+ detected - validating configuration": "(Info) Remote %s 136+ erkannt - validiere Konfiguration"

View File

@@ -4,6 +4,7 @@
import asyncio, enum, inspect, json, os, platform, secrets, shutil, subprocess, urllib.request # isort: skip # noqa: S404
from collections.abc import Awaitable, Callable, Coroutine, Iterable
from gettext import gettext as _
from pathlib import Path
from typing import Any, Final, Optional, cast
try:
@@ -22,7 +23,7 @@ from nodriver.core.tab import Tab as Page
from kleinanzeigen_bot.model.config_model import Config as BotConfig
from kleinanzeigen_bot.model.config_model import TimeoutConfig
from . import files, loggers, net
from . import files, loggers, net, xdg_paths
from .chrome_version_detector import (
ChromeVersionInfo,
detect_chrome_version_from_binary,
@@ -40,6 +41,28 @@ if TYPE_CHECKING:
_KEY_VALUE_PAIR_SIZE = 2
def _resolve_user_data_dir_paths(arg_value:str, config_value:str) -> tuple[Any, Any]:
"""Resolve the argument and config user_data_dir paths for comparison."""
try:
return (
Path(arg_value).expanduser().resolve(),
Path(config_value).expanduser().resolve(),
)
except OSError as exc:
LOG.debug("Failed to resolve user_data_dir paths for comparison: %s", exc)
return None, None
def _has_non_empty_user_data_dir_arg(args:Iterable[str]) -> bool:
for arg in args:
if not arg.startswith("--user-data-dir="):
continue
raw = arg.split("=", maxsplit = 1)[1].strip().strip('"').strip("'")
if raw:
return True
return False
def _is_remote_object(obj:Any) -> TypeGuard["RemoteObject"]:
"""Type guard to check if an object is a RemoteObject."""
return hasattr(obj, "__class__") and "RemoteObject" in str(type(obj))
@@ -58,7 +81,7 @@ __all__ = [
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
# see https://api.jquery.com/category/selectors/
METACHAR_ESCAPER:Final[dict[int, str]] = str.maketrans({ch: f"\\{ch}" for ch in '!"#$%&\'()*+,./:;<=>?@[\\]^`{|}~'})
METACHAR_ESCAPER:Final[dict[int, str]] = str.maketrans({ch: f"\\{ch}" for ch in "!\"#$%&'()*+,./:;<=>?@[\\]^`{|}~"})
def _is_admin() -> bool:
@@ -90,7 +113,6 @@ class Is(enum.Enum):
class BrowserConfig:
def __init__(self) -> None:
self.arguments:Iterable[str] = []
self.binary_location:str | None = None
@@ -102,37 +124,27 @@ class BrowserConfig:
def _write_initial_prefs(prefs_file:str) -> None:
with open(prefs_file, "w", encoding = "UTF-8") as fd:
json.dump({
json.dump(
{
"credentials_enable_service": False,
"enable_do_not_track": True,
"google": {
"services": {
"consented_to_sync": False
}
},
"google": {"services": {"consented_to_sync": False}},
"profile": {
"default_content_setting_values": {
"popups": 0,
"notifications": 2 # 1 = allow, 2 = block browser notifications
"notifications": 2, # 1 = allow, 2 = block browser notifications
},
"password_manager_enabled": False
"password_manager_enabled": False,
},
"signin": {
"allowed": False
"signin": {"allowed": False},
"translate_site_blacklist": ["www.kleinanzeigen.de"],
"devtools": {"preferences": {"currentDockState": '"bottom"'}},
},
"translate_site_blacklist": [
"www.kleinanzeigen.de"
],
"devtools": {
"preferences": {
"currentDockState": '"bottom"'
}
}
}, fd)
fd,
)
class WebScrapingMixin:
def __init__(self) -> None:
self.browser_config:Final[BrowserConfig] = BrowserConfig()
self.browser:Browser = None # pyright: ignore[reportAttributeAccessIssue]
@@ -140,6 +152,11 @@ class WebScrapingMixin:
self._default_timeout_config:TimeoutConfig | None = None
self.config:BotConfig = cast(BotConfig, None)
@property
def _installation_mode(self) -> str:
"""Get installation mode with fallback to portable."""
return getattr(self, "installation_mode_or_portable", "portable")
def _get_timeout_config(self) -> TimeoutConfig:
config = getattr(self, "config", None)
timeouts:TimeoutConfig | None = None
@@ -172,12 +189,7 @@ class WebScrapingMixin:
return 1 + cfg.retry_max_attempts
async def _run_with_timeout_retries(
self,
operation:Callable[[float], Awaitable[T]],
*,
description:str,
key:str = "default",
override:float | None = None
self, operation:Callable[[float], Awaitable[T]], *, description:str, key:str = "default", override:float | None = None
) -> T:
"""
Execute an async callable with retry/backoff handling for TimeoutError.
@@ -191,13 +203,7 @@ class WebScrapingMixin:
except TimeoutError:
if attempt >= attempts - 1:
raise
LOG.debug(
"Retrying %s after TimeoutError (attempt %d/%d, timeout %.1fs)",
description,
attempt + 1,
attempts,
effective_timeout
)
LOG.debug("Retrying %s after TimeoutError (attempt %d/%d, timeout %.1fs)", description, attempt + 1, attempts, effective_timeout)
raise TimeoutError(f"{description} failed without executing operation")
@@ -210,7 +216,24 @@ class WebScrapingMixin:
self.browser_config.binary_location = self.get_compatible_browser()
LOG.info(" -> Browser binary location: %s", self.browser_config.binary_location)
has_remote_debugging = any(arg.startswith("--remote-debugging-port=") for arg in self.browser_config.arguments)
is_test_environment = bool(os.environ.get("PYTEST_CURRENT_TEST"))
if (
not (self.browser_config.user_data_dir and self.browser_config.user_data_dir.strip())
and not _has_non_empty_user_data_dir_arg(self.browser_config.arguments)
and not has_remote_debugging
and not is_test_environment
):
self.browser_config.user_data_dir = str(xdg_paths.get_browser_profile_path(self._installation_mode))
# Chrome version detection and validation
if has_remote_debugging:
try:
await self._validate_chrome_version_configuration()
except AssertionError as exc:
LOG.warning(_("Remote debugging detected, but browser configuration looks invalid: %s"), exc)
else:
await self._validate_chrome_version_configuration()
########################################################
@@ -229,10 +252,12 @@ class WebScrapingMixin:
# Enhanced port checking with retry logic
port_available = await self._check_port_with_retry(remote_host, remote_port)
ensure(port_available,
ensure(
port_available,
f"Browser process not reachable at {remote_host}:{remote_port}. "
f"Start the browser with --remote-debugging-port={remote_port} or remove this port from your config.yaml. "
f"Make sure the browser is running and the port is not blocked by firewall.")
f"Make sure the browser is running and the port is not blocked by firewall.",
)
try:
cfg = NodriverConfig(
@@ -255,8 +280,7 @@ class WebScrapingMixin:
LOG.error("Troubleshooting steps:")
LOG.error("1. Close all browser instances and try again")
LOG.error("2. Remove the user_data_dir configuration temporarily")
LOG.error("3. Start browser manually with: %s --remote-debugging-port=%d",
self.browser_config.binary_location, remote_port)
LOG.error("3. Start browser manually with: %s --remote-debugging-port=%d", self.browser_config.binary_location, remote_port)
LOG.error("4. Check if any antivirus or security software is blocking the connection")
raise
@@ -274,13 +298,11 @@ class WebScrapingMixin:
"--disable-sync",
"--no-experiments",
"--disable-search-engine-choice-screen",
"--disable-features=MediaRouter",
"--use-mock-keychain",
"--test-type", # https://stackoverflow.com/a/36746675/5116073
# https://chromium.googlesource.com/chromium/src/+/master/net/dns/README.md#request-remapping
'--host-resolver-rules="MAP connect.facebook.net 127.0.0.1, MAP securepubads.g.doubleclick.net 127.0.0.1, MAP www.googletagmanager.com 127.0.0.1"'
'--host-resolver-rules="MAP connect.facebook.net 127.0.0.1, MAP securepubads.g.doubleclick.net 127.0.0.1, MAP www.googletagmanager.com 127.0.0.1"',
]
is_edge = "edge" in self.browser_config.binary_location.lower()
@@ -295,10 +317,36 @@ class WebScrapingMixin:
LOG.info(" -> Browser profile name: %s", self.browser_config.profile_name)
browser_args.append(f"--profile-directory={self.browser_config.profile_name}")
user_data_dir_from_args:str | None = None
for browser_arg in self.browser_config.arguments:
LOG.info(" -> Custom Browser argument: %s", browser_arg)
if browser_arg.startswith("--user-data-dir="):
raw = browser_arg.split("=", maxsplit = 1)[1].strip().strip('"').strip("'")
if not raw:
LOG.warning(_("Ignoring empty --user-data-dir= argument; falling back to configured user_data_dir."))
continue
user_data_dir_from_args = raw
continue
browser_args.append(browser_arg)
effective_user_data_dir = user_data_dir_from_args or self.browser_config.user_data_dir
if user_data_dir_from_args and self.browser_config.user_data_dir:
arg_path, cfg_path = await asyncio.get_running_loop().run_in_executor(
None,
_resolve_user_data_dir_paths,
user_data_dir_from_args,
self.browser_config.user_data_dir,
)
if arg_path is None or cfg_path is None or arg_path != cfg_path:
LOG.warning(
_("Configured browser.user_data_dir (%s) does not match --user-data-dir argument (%s); using the argument value."),
self.browser_config.user_data_dir,
user_data_dir_from_args,
)
if not effective_user_data_dir and not is_test_environment:
effective_user_data_dir = str(xdg_paths.get_browser_profile_path(self._installation_mode))
self.browser_config.user_data_dir = effective_user_data_dir
if not loggers.is_debug(LOG):
browser_args.append("--log-level=3") # INFO: 0, WARNING: 1, ERROR: 2, FATAL: 3
@@ -309,7 +357,7 @@ class WebScrapingMixin:
headless = False,
browser_executable_path = self.browser_config.binary_location,
browser_args = browser_args,
user_data_dir = self.browser_config.user_data_dir
user_data_dir = self.browser_config.user_data_dir,
)
# already logged by nodriver:
@@ -371,8 +419,7 @@ class WebScrapingMixin:
return True
if attempt < max_retries - 1:
LOG.debug("Port %s:%s not available, retrying in %.1f seconds (attempt %d/%d)",
host, port, retry_delay, attempt + 1, max_retries)
LOG.debug("Port %s:%s not available, retrying in %.1f seconds (attempt %d/%d)", host, port, retry_delay, attempt + 1, max_retries)
await asyncio.sleep(retry_delay)
return False
@@ -522,12 +569,7 @@ class WebScrapingMixin:
browser_paths:list[str | None] = []
match platform.system():
case "Linux":
browser_paths = [
shutil.which("chromium"),
shutil.which("chromium-browser"),
shutil.which("google-chrome"),
shutil.which("microsoft-edge")
]
browser_paths = [shutil.which("chromium"), shutil.which("chromium-browser"), shutil.which("google-chrome"), shutil.which("microsoft-edge")]
case "Darwin":
browser_paths = [
@@ -540,18 +582,15 @@ class WebScrapingMixin:
browser_paths = [
os.environ.get("PROGRAMFILES", "C:\\Program Files") + r"\Microsoft\Edge\Application\msedge.exe",
os.environ.get("PROGRAMFILES(X86)", "C:\\Program Files (x86)") + r"\Microsoft\Edge\Application\msedge.exe",
os.environ["PROGRAMFILES"] + r"\Chromium\Application\chrome.exe",
os.environ["PROGRAMFILES(X86)"] + r"\Chromium\Application\chrome.exe",
os.environ["LOCALAPPDATA"] + r"\Chromium\Application\chrome.exe",
os.environ["PROGRAMFILES"] + r"\Chrome\Application\chrome.exe",
os.environ["PROGRAMFILES(X86)"] + r"\Chrome\Application\chrome.exe",
os.environ["LOCALAPPDATA"] + r"\Chrome\Application\chrome.exe",
shutil.which("msedge.exe"),
shutil.which("chromium.exe"),
shutil.which("chrome.exe")
shutil.which("chrome.exe"),
]
case _ as os_name:
@@ -563,8 +602,14 @@ class WebScrapingMixin:
raise AssertionError(_("Installed browser could not be detected"))
async def web_await(self, condition:Callable[[], T | Never | Coroutine[Any, Any, T | Never]], *,
timeout:int | float | None = None, timeout_error_message:str = "", apply_multiplier:bool = True) -> T:
async def web_await(
self,
condition:Callable[[], T | Never | Coroutine[Any, Any, T | Never]],
*,
timeout:int | float | None = None,
timeout_error_message:str = "",
apply_multiplier:bool = True,
) -> T:
"""
Blocks/waits until the given condition is met.
@@ -604,7 +649,9 @@ class WebScrapingMixin:
return elem.attrs.get("disabled") is not None
async def is_displayed(elem:Element) -> bool:
return cast(bool, await elem.apply("""
return cast(
bool,
await elem.apply("""
function (element) {
var style = window.getComputedStyle(element);
return style.display !== 'none'
@@ -613,7 +660,8 @@ class WebScrapingMixin:
&& element.offsetWidth > 0
&& element.offsetHeight > 0
}
"""))
"""),
)
elem:Element = await self.web_find(selector_type, selector_value, timeout = timeout)
@@ -627,7 +675,9 @@ class WebScrapingMixin:
case Is.READONLY:
return elem.attrs.get("readonly") is not None
case Is.SELECTED:
return cast(bool, await elem.apply("""
return cast(
bool,
await elem.apply("""
function (element) {
if (element.tagName.toLowerCase() === 'input') {
if (element.type === 'checkbox' || element.type === 'radio') {
@@ -636,7 +686,8 @@ class WebScrapingMixin:
}
return false
}
"""))
"""),
)
raise AssertionError(_("Unsupported attribute: %s") % attr)
async def web_click(self, selector_type:By, selector_value:str, *, timeout:int | float | None = None) -> Element:
@@ -743,11 +794,8 @@ class WebScrapingMixin:
async def attempt(effective_timeout:float) -> Element:
return await self._web_find_once(selector_type, selector_value, effective_timeout, parent = parent)
return await self._run_with_timeout_retries(
attempt,
description = f"web_find({selector_type.name}, {selector_value})",
key = "default",
override = timeout
return await self._run_with_timeout_retries( # noqa: E501
attempt, description = f"web_find({selector_type.name}, {selector_value})", key = "default", override = timeout
)
async def web_find_all(self, selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float | None = None) -> list[Element]:
@@ -762,10 +810,7 @@ class WebScrapingMixin:
return await self._web_find_all_once(selector_type, selector_value, effective_timeout, parent = parent)
return await self._run_with_timeout_retries(
attempt,
description = f"web_find_all({selector_type.name}, {selector_value})",
key = "default",
override = timeout
attempt, description = f"web_find_all({selector_type.name}, {selector_value})", key = "default", override = timeout
)
async def _web_find_once(self, selector_type:By, selector_value:str, timeout:float, *, parent:Element | None = None) -> Element:
@@ -778,40 +823,46 @@ class WebScrapingMixin:
lambda: self.page.query_selector(f"#{escaped_id}", parent),
timeout = timeout,
timeout_error_message = f"No HTML element found with ID '{selector_value}'{timeout_suffix}",
apply_multiplier = False)
apply_multiplier = False,
)
case By.CLASS_NAME:
escaped_classname = selector_value.translate(METACHAR_ESCAPER)
return await self.web_await(
lambda: self.page.query_selector(f".{escaped_classname}", parent),
timeout = timeout,
timeout_error_message = f"No HTML element found with CSS class '{selector_value}'{timeout_suffix}",
apply_multiplier = False)
apply_multiplier = False,
)
case By.TAG_NAME:
return await self.web_await(
lambda: self.page.query_selector(selector_value, parent),
timeout = timeout,
timeout_error_message = f"No HTML element found of tag <{selector_value}>{timeout_suffix}",
apply_multiplier = False)
apply_multiplier = False,
)
case By.CSS_SELECTOR:
return await self.web_await(
lambda: self.page.query_selector(selector_value, parent),
timeout = timeout,
timeout_error_message = f"No HTML element found using CSS selector '{selector_value}'{timeout_suffix}",
apply_multiplier = False)
apply_multiplier = False,
)
case By.TEXT:
ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
return await self.web_await(
lambda: self.page.find_element_by_text(selector_value, best_match = True),
timeout = timeout,
timeout_error_message = f"No HTML element found containing text '{selector_value}'{timeout_suffix}",
apply_multiplier = False)
apply_multiplier = False,
)
case By.XPATH:
ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
return await self.web_await(
lambda: self.page.find_element_by_text(selector_value, best_match = True),
timeout = timeout,
timeout_error_message = f"No HTML element found using XPath '{selector_value}'{timeout_suffix}",
apply_multiplier = False)
apply_multiplier = False,
)
raise AssertionError(_("Unsupported selector type: %s") % selector_type)
@@ -825,33 +876,38 @@ class WebScrapingMixin:
lambda: self.page.query_selector_all(f".{escaped_classname}", parent),
timeout = timeout,
timeout_error_message = f"No HTML elements found with CSS class '{selector_value}'{timeout_suffix}",
apply_multiplier = False)
apply_multiplier = False,
)
case By.CSS_SELECTOR:
return await self.web_await(
lambda: self.page.query_selector_all(selector_value, parent),
timeout = timeout,
timeout_error_message = f"No HTML elements found using CSS selector '{selector_value}'{timeout_suffix}",
apply_multiplier = False)
apply_multiplier = False,
)
case By.TAG_NAME:
return await self.web_await(
lambda: self.page.query_selector_all(selector_value, parent),
timeout = timeout,
timeout_error_message = f"No HTML elements found of tag <{selector_value}>{timeout_suffix}",
apply_multiplier = False)
apply_multiplier = False,
)
case By.TEXT:
ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
return await self.web_await(
lambda: self.page.find_elements_by_text(selector_value),
timeout = timeout,
timeout_error_message = f"No HTML elements found containing text '{selector_value}'{timeout_suffix}",
apply_multiplier = False)
apply_multiplier = False,
)
case By.XPATH:
ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
return await self.web_await(
lambda: self.page.find_elements_by_text(selector_value),
timeout = timeout,
timeout_error_message = f"No HTML elements found using XPath '{selector_value}'{timeout_suffix}",
apply_multiplier = False)
apply_multiplier = False,
)
raise AssertionError(_("Unsupported selector type: %s") % selector_type)
@@ -885,11 +941,12 @@ class WebScrapingMixin:
lambda: self.web_execute("document.readyState == 'complete'"),
timeout = page_timeout,
timeout_error_message = f"Page did not finish loading within {page_timeout} seconds.",
apply_multiplier = False
apply_multiplier = False,
)
async def web_text(self, selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float | None = None) -> str:
return str(await (await self.web_find(selector_type, selector_value, parent = parent, timeout = timeout)).apply("""
return str(
await (await self.web_find(selector_type, selector_value, parent = parent, timeout = timeout)).apply("""
function (elem) {
let sel = window.getSelection()
sel.removeAllRanges()
@@ -900,16 +957,19 @@ class WebScrapingMixin:
sel.removeAllRanges()
return visibleText
}
"""))
""")
)
async def web_sleep(self, min_ms:int = 1_000, max_ms:int = 2_500) -> None:
duration = max_ms <= min_ms and min_ms or secrets.randbelow(max_ms - min_ms) + min_ms
LOG.log(loggers.INFO if duration > 1_500 else loggers.DEBUG, # noqa: PLR2004 Magic value used in comparison
" ... pausing for %d ms ...", duration)
LOG.log(
loggers.INFO if duration > 1_500 else loggers.DEBUG, # noqa: PLR2004 Magic value used in comparison
" ... pausing for %d ms ...",
duration,
)
await self.page.sleep(duration / 1_000)
async def web_request(self, url:str, method:str = "GET", valid_response_codes:int | Iterable[int] = 200,
headers:dict[str, str] | None = None) -> Any:
async def web_request(self, url:str, method:str = "GET", valid_response_codes:int | Iterable[int] = 200, headers:dict[str, str] | None = None) -> Any:
method = method.upper()
LOG.debug(" -> HTTP %s [%s]...", method, url)
response = await self.web_execute(f"""
@@ -933,9 +993,10 @@ class WebScrapingMixin:
valid_response_codes = [valid_response_codes]
ensure(
response["statusCode"] in valid_response_codes,
f'Invalid response "{response["statusCode"]} response["statusMessage"]" received for HTTP {method} to {url}'
f'Invalid response "{response["statusCode"]} {response["statusMessage"]}" received for HTTP {method} to {url}',
)
return response
# pylint: enable=dangerous-default-value
async def web_scroll_page_down(self, scroll_length:int = 10, scroll_speed:int = 10_000, *, scroll_back_top:bool = False) -> None:
@@ -968,8 +1029,9 @@ class WebScrapingMixin:
:raises UnexpectedTagNameException: if element is not a <select> element
"""
await self.web_await(
lambda: self.web_check(selector_type, selector_value, Is.CLICKABLE), timeout = timeout,
timeout_error_message = f"No clickable HTML element with selector: {selector_type}='{selector_value}' found"
lambda: self.web_check(selector_type, selector_value, Is.CLICKABLE),
timeout = timeout,
timeout_error_message = f"No clickable HTML element with selector: {selector_type}='{selector_value}' found",
)
elem = await self.web_find(selector_type, selector_value, timeout = timeout)
@@ -1107,9 +1169,7 @@ class WebScrapingMixin:
if port_available:
try:
version_info = detect_chrome_version_from_remote_debugging(
remote_host,
remote_port,
timeout = self._effective_timeout("chrome_remote_debugging")
remote_host, remote_port, timeout = self._effective_timeout("chrome_remote_debugging")
)
if version_info:
LOG.debug(" -> Detected version from existing browser: %s", version_info)
@@ -1125,10 +1185,7 @@ class WebScrapingMixin:
binary_path = self.browser_config.binary_location
if binary_path:
LOG.debug(" -> No remote browser detected, trying binary detection")
version_info = detect_chrome_version_from_binary(
binary_path,
timeout = self._effective_timeout("chrome_binary_detection")
)
version_info = detect_chrome_version_from_binary(binary_path, timeout = self._effective_timeout("chrome_binary_detection"))
# Validate if Chrome 136+ detected
if version_info and version_info.is_chrome_136_plus:
@@ -1158,14 +1215,8 @@ class WebScrapingMixin:
AssertionError: If configuration is invalid
"""
# Check if user-data-dir is specified in arguments or configuration
has_user_data_dir_arg = any(
arg.startswith("--user-data-dir=")
for arg in self.browser_config.arguments
)
has_user_data_dir_config = (
self.browser_config.user_data_dir is not None and
self.browser_config.user_data_dir.strip()
)
has_user_data_dir_arg = any(arg.startswith("--user-data-dir=") for arg in self.browser_config.arguments)
has_user_data_dir_config = self.browser_config.user_data_dir is not None and bool(self.browser_config.user_data_dir.strip())
if not has_user_data_dir_arg and not has_user_data_dir_config:
error_message = (
@@ -1198,14 +1249,18 @@ class WebScrapingMixin:
remote_host = "127.0.0.1",
remote_port = remote_port if remote_port > 0 else None,
remote_timeout = self._effective_timeout("chrome_remote_debugging"),
binary_timeout = self._effective_timeout("chrome_binary_detection")
binary_timeout = self._effective_timeout("chrome_binary_detection"),
)
# Report binary detection results
if diagnostic_info["binary_detection"]:
binary_info = diagnostic_info["binary_detection"]
LOG.info("(info) %s version from binary: %s %s (major: %d)",
binary_info["browser_name"], binary_info["browser_name"], binary_info["version_string"], binary_info["major_version"])
LOG.info(
"(info) %s version from binary: %s (major: %d)",
binary_info["browser_name"],
binary_info["version_string"],
binary_info["major_version"],
)
if binary_info["is_chrome_136_plus"]:
LOG.info("(info) %s 136+ detected - security validation required", binary_info["browser_name"])
@@ -1215,17 +1270,18 @@ class WebScrapingMixin:
# Report remote detection results
if diagnostic_info["remote_detection"]:
remote_info = diagnostic_info["remote_detection"]
LOG.info("(info) %s version from remote debugging: %s %s (major: %d)",
remote_info["browser_name"], remote_info["browser_name"], remote_info["version_string"], remote_info["major_version"])
LOG.info(
"(info) %s version from remote debugging: %s (major: %d)",
remote_info["browser_name"],
remote_info["version_string"],
remote_info["major_version"],
)
if remote_info["is_chrome_136_plus"]:
LOG.info("(info) Remote %s 136+ detected - validating configuration", remote_info["browser_name"])
# Validate configuration for Chrome/Edge 136+
is_valid, error_message = validate_chrome_136_configuration(
list(self.browser_config.arguments),
self.browser_config.user_data_dir
)
is_valid, error_message = validate_chrome_136_configuration(list(self.browser_config.arguments), self.browser_config.user_data_dir)
if not is_valid:
LOG.error("(fail) %s 136+ configuration validation failed: %s", remote_info["browser_name"], error_message)

View File

@@ -213,13 +213,13 @@ def get_browser_profile_path(mode: str | InstallationMode, config_override: str
"""
mode = _normalize_mode(mode)
if config_override:
profile_path = Path(config_override)
profile_path = Path(config_override).expanduser().resolve()
LOG.debug("Resolving browser profile path for mode '%s' (config override): %s", mode, profile_path)
elif mode == "portable":
profile_path = Path.cwd() / ".temp" / "browser-profile"
profile_path = (Path.cwd() / ".temp" / "browser-profile").resolve()
LOG.debug("Resolving browser profile path for mode '%s': %s", mode, profile_path)
else: # xdg
profile_path = get_xdg_base_dir("cache") / "browser-profile"
profile_path = (get_xdg_base_dir("cache") / "browser-profile").resolve()
LOG.debug("Resolving browser profile path for mode '%s': %s", mode, profile_path)
# Create directory if it doesn't exist

View File

@@ -33,7 +33,7 @@ async def atest_init() -> None:
web_scraping_mixin.close_browser_session()
@pytest.mark.flaky(reruns = 4, reruns_delay = 5)
@pytest.mark.flaky(reruns = 5, reruns_delay = 10)
@pytest.mark.itest
def test_init() -> None:
nodriver.loop().run_until_complete(atest_init()) # type: ignore[attr-defined]

View File

@@ -1186,7 +1186,14 @@ class TestAdExtractorDownload:
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False),
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []),
patch.object(
extractor, "_extract_contact_from_ad_page", new_callable=AsyncMock, return_value=ContactPartial(name="Test", zipcode="12345", location="Berlin")
extractor,
"_extract_contact_from_ad_page",
new_callable = AsyncMock,
return_value = ContactPartial(
name = "Test",
zipcode = "12345",
location = "Berlin",
),
),
):
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(base_dir, 12345)
@@ -1243,7 +1250,14 @@ class TestAdExtractorDownload:
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False),
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []),
patch.object(
extractor, "_extract_contact_from_ad_page", new_callable=AsyncMock, return_value=ContactPartial(name="Test", zipcode="12345", location="Berlin")
extractor,
"_extract_contact_from_ad_page",
new_callable = AsyncMock,
return_value = ContactPartial(
name = "Test",
zipcode = "12345",
location = "Berlin",
),
),
):
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(base_dir, 12345)
@@ -1302,7 +1316,14 @@ class TestAdExtractorDownload:
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False),
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []),
patch.object(
extractor, "_extract_contact_from_ad_page", new_callable=AsyncMock, return_value=ContactPartial(name="Test", zipcode="12345", location="Berlin")
extractor,
"_extract_contact_from_ad_page",
new_callable = AsyncMock,
return_value = ContactPartial(
name = "Test",
zipcode = "12345",
location = "Berlin",
),
),
):
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(base_dir, 12345)
@@ -1358,7 +1379,14 @@ class TestAdExtractorDownload:
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False),
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []),
patch.object(
extractor, "_extract_contact_from_ad_page", new_callable=AsyncMock, return_value=ContactPartial(name="Test", zipcode="12345", location="Berlin")
extractor,
"_extract_contact_from_ad_page",
new_callable = AsyncMock,
return_value = ContactPartial(
name = "Test",
zipcode = "12345",
location = "Berlin",
),
),
):
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(base_dir, 12345)

View File

@@ -641,6 +641,31 @@ class TestKleinanzeigenBotArgParsing:
test_bot.parse_args(["script.py", "help", "version"])
assert exc_info.value.code == 2
def test_parse_args_explicit_flags(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None:
"""Test that explicit flags are set when --config and --logfile options are provided."""
config_path = tmp_path / "custom_config.yaml"
log_path = tmp_path / "custom.log"
# Test --config flag sets config_explicitly_provided
test_bot.parse_args(["script.py", "--config", str(config_path), "help"])
assert test_bot.config_explicitly_provided is True
assert str(config_path.absolute()) == test_bot.config_file_path
# Reset for next test
test_bot.config_explicitly_provided = False
# Test --logfile flag sets log_file_explicitly_provided
test_bot.parse_args(["script.py", "--logfile", str(log_path), "help"])
assert test_bot.log_file_explicitly_provided is True
assert str(log_path.absolute()) == test_bot.log_file_path
# Test both flags together
test_bot.config_explicitly_provided = False
test_bot.log_file_explicitly_provided = False
test_bot.parse_args(["script.py", "--config", str(config_path), "--logfile", str(log_path), "help"])
assert test_bot.config_explicitly_provided is True
assert test_bot.log_file_explicitly_provided is True
class TestKleinanzeigenBotCommands:
"""Tests for command execution."""
@@ -863,7 +888,7 @@ class TestKleinanzeigenBotAdDeletion:
async def test_delete_ad_by_title(self, test_bot:KleinanzeigenBot, minimal_ad_config:dict[str, Any]) -> None:
"""Test deleting an ad by title."""
test_bot.page = MagicMock()
test_bot.page.evaluate = AsyncMock(return_value = {"statusCode": 200, "content": "{}"})
test_bot.page.evaluate = AsyncMock(return_value = {"statusCode": 200, "statusMessage": "OK", "content": "{}"})
test_bot.page.sleep = AsyncMock()
# Use minimal config since we only need title for deletion by title
@@ -891,7 +916,7 @@ class TestKleinanzeigenBotAdDeletion:
async def test_delete_ad_by_id(self, test_bot:KleinanzeigenBot, minimal_ad_config:dict[str, Any]) -> None:
"""Test deleting an ad by ID."""
test_bot.page = MagicMock()
test_bot.page.evaluate = AsyncMock(return_value = {"statusCode": 200, "content": "{}"})
test_bot.page.evaluate = AsyncMock(return_value = {"statusCode": 200, "statusMessage": "OK", "content": "{}"})
test_bot.page.sleep = AsyncMock()
# Create config with ID for deletion by ID
@@ -918,7 +943,7 @@ class TestKleinanzeigenBotAdDeletion:
async def test_delete_ad_by_id_with_non_string_csrf_token(self, test_bot:KleinanzeigenBot, minimal_ad_config:dict[str, Any]) -> None:
"""Test deleting an ad by ID with non-string CSRF token to cover str() conversion."""
test_bot.page = MagicMock()
test_bot.page.evaluate = AsyncMock(return_value = {"statusCode": 200, "content": "{}"})
test_bot.page.evaluate = AsyncMock(return_value = {"statusCode": 200, "statusMessage": "OK", "content": "{}"})
test_bot.page.sleep = AsyncMock()
# Create config with ID for deletion by ID

View File

@@ -20,9 +20,7 @@ class TestWebScrapingMixinChromeVersionValidation:
return WebScrapingMixin()
@patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_binary")
async def test_validate_chrome_version_configuration_chrome_136_plus_valid(
self, mock_detect:Mock, scraper:WebScrapingMixin
) -> None:
async def test_validate_chrome_version_configuration_chrome_136_plus_valid(self, mock_detect:Mock, scraper:WebScrapingMixin) -> None:
"""Test Chrome 136+ validation with valid configuration."""
# Setup mocks
mock_detect.return_value = ChromeVersionInfo("136.0.6778.0", 136, "Chrome")
@@ -88,9 +86,7 @@ class TestWebScrapingMixinChromeVersionValidation:
os.environ["PYTEST_CURRENT_TEST"] = original_env
@patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_binary")
async def test_validate_chrome_version_configuration_chrome_pre_136(
self, mock_detect:Mock, scraper:WebScrapingMixin
) -> None:
async def test_validate_chrome_version_configuration_chrome_pre_136(self, mock_detect:Mock, scraper:WebScrapingMixin) -> None:
"""Test Chrome pre-136 validation (no special requirements)."""
# Setup mocks
mock_detect.return_value = ChromeVersionInfo("120.0.6099.109", 120, "Chrome")
@@ -121,11 +117,7 @@ class TestWebScrapingMixinChromeVersionValidation:
@patch("kleinanzeigen_bot.utils.chrome_version_detector.detect_chrome_version_from_binary")
@patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_remote_debugging")
async def test_validate_chrome_version_logs_remote_detection(
self,
mock_remote:Mock,
mock_binary:Mock,
scraper:WebScrapingMixin,
caplog:pytest.LogCaptureFixture
self, mock_remote:Mock, mock_binary:Mock, scraper:WebScrapingMixin, caplog:pytest.LogCaptureFixture
) -> None:
"""When a remote browser responds, the detected version should be logged."""
mock_remote.return_value = ChromeVersionInfo("136.0.6778.0", 136, "Chrome")
@@ -134,17 +126,14 @@ class TestWebScrapingMixinChromeVersionValidation:
scraper.browser_config.binary_location = "/path/to/chrome"
caplog.set_level("DEBUG")
with patch.dict(os.environ, {}, clear = True), \
patch.object(scraper, "_check_port_with_retry", return_value = True):
with patch.dict(os.environ, {}, clear = True), patch.object(scraper, "_check_port_with_retry", return_value = True):
await scraper._validate_chrome_version_configuration()
assert "Detected version from existing browser" in caplog.text
mock_remote.assert_called_once()
@patch("kleinanzeigen_bot.utils.chrome_version_detector.detect_chrome_version_from_binary")
async def test_validate_chrome_version_configuration_no_binary_location(
self, mock_detect:Mock, scraper:WebScrapingMixin
) -> None:
async def test_validate_chrome_version_configuration_no_binary_location(self, mock_detect:Mock, scraper:WebScrapingMixin) -> None:
"""Test Chrome version validation when no binary location is set."""
# Configure scraper without binary location
scraper.browser_config.binary_location = None
@@ -204,15 +193,10 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
"""Test Chrome version diagnostics with binary detection."""
# Setup mocks
mock_get_diagnostic.return_value = {
"binary_detection": {
"version_string": "136.0.6778.0",
"major_version": 136,
"browser_name": "Chrome",
"is_chrome_136_plus": True
},
"binary_detection": {"version_string": "136.0.6778.0", "major_version": 136, "browser_name": "Chrome", "is_chrome_136_plus": True},
"remote_detection": None,
"chrome_136_plus_detected": True,
"recommendations": []
"recommendations": [],
}
mock_validate.return_value = (True, "")
@@ -230,7 +214,7 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
scraper._diagnose_chrome_version_issues(9222)
# Verify logs
assert "Chrome version from binary: Chrome 136.0.6778.0 (major: 136)" in caplog.text
assert "Chrome version from binary: 136.0.6778.0 (major: 136)" in caplog.text
assert "Chrome 136+ detected - security validation required" in caplog.text
# Verify mocks were called
@@ -255,14 +239,9 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
# Setup mocks
mock_get_diagnostic.return_value = {
"binary_detection": None,
"remote_detection": {
"version_string": "136.0.6778.0",
"major_version": 136,
"browser_name": "Chrome",
"is_chrome_136_plus": True
},
"remote_detection": {"version_string": "136.0.6778.0", "major_version": 136, "browser_name": "Chrome", "is_chrome_136_plus": True},
"chrome_136_plus_detected": True,
"recommendations": []
"recommendations": [],
}
mock_validate.return_value = (False, "Chrome 136+ requires --user-data-dir")
@@ -280,32 +259,22 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
scraper._diagnose_chrome_version_issues(9222)
# Verify logs
assert "Chrome version from remote debugging: Chrome 136.0.6778.0 (major: 136)" in caplog.text
assert "(info) Chrome version from remote debugging: 136.0.6778.0 (major: 136)" in caplog.text
assert "Remote Chrome 136+ detected - validating configuration" in caplog.text
assert "Chrome 136+ configuration validation failed" in caplog.text
# Verify validation was called
mock_validate.assert_called_once_with(
["--remote-debugging-port=9222"],
None
)
mock_validate.assert_called_once_with(["--remote-debugging-port=9222"], None)
finally:
# Restore environment
if original_env:
os.environ["PYTEST_CURRENT_TEST"] = original_env
@patch("kleinanzeigen_bot.utils.web_scraping_mixin.get_chrome_version_diagnostic_info")
def test_diagnose_chrome_version_issues_no_detection(
self, mock_get_diagnostic:Mock, scraper:WebScrapingMixin, caplog:pytest.LogCaptureFixture
) -> None:
def test_diagnose_chrome_version_issues_no_detection(self, mock_get_diagnostic:Mock, scraper:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test Chrome version diagnostics with no detection."""
# Setup mocks
mock_get_diagnostic.return_value = {
"binary_detection": None,
"remote_detection": None,
"chrome_136_plus_detected": False,
"recommendations": []
}
mock_get_diagnostic.return_value = {"binary_detection": None, "remote_detection": None, "chrome_136_plus_detected": False, "recommendations": []}
# Configure scraper
scraper.browser_config.binary_location = "/path/to/chrome"
@@ -334,15 +303,10 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
"""Test Chrome version diagnostics with Chrome 136+ recommendations."""
# Setup mocks
mock_get_diagnostic.return_value = {
"binary_detection": {
"version_string": "136.0.6778.0",
"major_version": 136,
"browser_name": "Chrome",
"is_chrome_136_plus": True
},
"binary_detection": {"version_string": "136.0.6778.0", "major_version": 136, "browser_name": "Chrome", "is_chrome_136_plus": True},
"remote_detection": None,
"chrome_136_plus_detected": True,
"recommendations": []
"recommendations": [],
}
# Configure scraper
@@ -377,11 +341,11 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
"version_string": "120.0.6099.109",
"major_version": 120,
"browser_name": "Chrome",
"is_chrome_136_plus": False # This triggers the else branch (lines 832-849)
"is_chrome_136_plus": False, # This triggers the else branch (lines 832-849)
},
"remote_detection": None, # Ensure this is None to avoid other branches
"chrome_136_plus_detected": False, # Ensure this is False to avoid recommendations
"recommendations": []
"recommendations": [],
}
# Configure scraper
@@ -420,14 +384,9 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
# Setup mocks
mock_get_diagnostic.return_value = {
"binary_detection": None,
"remote_detection": {
"version_string": "136.0.6778.0",
"major_version": 136,
"browser_name": "Chrome",
"is_chrome_136_plus": True
},
"remote_detection": {"version_string": "136.0.6778.0", "major_version": 136, "browser_name": "Chrome", "is_chrome_136_plus": True},
"chrome_136_plus_detected": True,
"recommendations": []
"recommendations": [],
}
mock_validate.return_value = (True, "") # This triggers the else branch (line 846)
@@ -451,7 +410,7 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
# Verify validation was called with correct arguments
mock_validate.assert_called_once_with(
["--remote-debugging-port=9222", "--user-data-dir=/tmp/chrome-debug"], # noqa: S108
"/tmp/chrome-debug" # noqa: S108
"/tmp/chrome-debug", # noqa: S108
)
finally:
# Restore environment
@@ -469,9 +428,7 @@ class TestWebScrapingMixinIntegration:
@patch.object(WebScrapingMixin, "_validate_chrome_version_configuration")
@patch.object(WebScrapingMixin, "get_compatible_browser")
async def test_create_browser_session_calls_chrome_validation(
self, mock_get_browser:Mock, mock_validate:Mock, scraper:WebScrapingMixin
) -> None:
async def test_create_browser_session_calls_chrome_validation(self, mock_get_browser:Mock, mock_validate:Mock, scraper:WebScrapingMixin) -> None:
"""Test that create_browser_session calls Chrome version validation."""
# Setup mocks
mock_get_browser.return_value = "/path/to/chrome"
@@ -493,9 +450,7 @@ class TestWebScrapingMixinIntegration:
@patch.object(WebScrapingMixin, "_diagnose_chrome_version_issues")
@patch.object(WebScrapingMixin, "get_compatible_browser")
def test_diagnose_browser_issues_calls_chrome_diagnostics(
self, mock_get_browser:Mock, mock_diagnose:Mock, scraper:WebScrapingMixin
) -> None:
def test_diagnose_browser_issues_calls_chrome_diagnostics(self, mock_get_browser:Mock, mock_diagnose:Mock, scraper:WebScrapingMixin) -> None:
"""Test that diagnose_browser_issues calls Chrome version diagnostics."""
# Setup mocks
mock_get_browser.return_value = "/path/to/chrome"
@@ -521,9 +476,7 @@ class TestWebScrapingMixinIntegration:
# Mock Chrome version detection to return pre-136 version
with patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_binary") as mock_detect:
mock_detect.return_value = ChromeVersionInfo(
"120.0.6099.109", 120, "Chrome"
)
mock_detect.return_value = ChromeVersionInfo("120.0.6099.109", 120, "Chrome")
# Temporarily unset PYTEST_CURRENT_TEST to allow validation to run
original_env = os.environ.get("PYTEST_CURRENT_TEST")
@@ -541,3 +494,68 @@ class TestWebScrapingMixinIntegration:
# Restore environment
if original_env:
os.environ["PYTEST_CURRENT_TEST"] = original_env
@patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_binary")
async def test_validate_chrome_136_configuration_with_whitespace_user_data_dir(
self, mock_detect:Mock, scraper:WebScrapingMixin, caplog:pytest.LogCaptureFixture
) -> None:
"""Test Chrome 136+ validation correctly handles whitespace-only user_data_dir."""
# Setup mocks
mock_detect.return_value = ChromeVersionInfo("136.0.6778.0", 136, "Chrome")
# Configure scraper with whitespace-only user_data_dir
scraper.browser_config.binary_location = "/path/to/chrome"
scraper.browser_config.arguments = ["--remote-debugging-port=9222"]
scraper.browser_config.user_data_dir = " " # Only whitespace
# Temporarily unset PYTEST_CURRENT_TEST to allow validation to run
original_env = os.environ.get("PYTEST_CURRENT_TEST")
if "PYTEST_CURRENT_TEST" in os.environ:
del os.environ["PYTEST_CURRENT_TEST"]
try:
# Test validation should fail because whitespace-only is treated as empty
await scraper._validate_chrome_version_configuration()
# Verify detection was called
assert mock_detect.call_count == 1
# Verify error was logged
assert "Chrome 136+ configuration validation failed" in caplog.text
assert "Chrome 136+ requires --user-data-dir" in caplog.text
finally:
# Restore environment
if original_env:
os.environ["PYTEST_CURRENT_TEST"] = original_env
@patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_binary")
async def test_validate_chrome_136_configuration_with_valid_user_data_dir(
self, mock_detect:Mock, scraper:WebScrapingMixin, caplog:pytest.LogCaptureFixture
) -> None:
"""Test Chrome 136+ validation passes with valid user_data_dir."""
# Setup mocks
mock_detect.return_value = ChromeVersionInfo("136.0.6778.0", 136, "Chrome")
# Configure scraper with valid user_data_dir
scraper.browser_config.binary_location = "/path/to/chrome"
scraper.browser_config.arguments = ["--remote-debugging-port=9222"]
scraper.browser_config.user_data_dir = "/tmp/valid-profile" # noqa: S108
# Temporarily unset PYTEST_CURRENT_TEST to allow validation to run
original_env = os.environ.get("PYTEST_CURRENT_TEST")
if "PYTEST_CURRENT_TEST" in os.environ:
del os.environ["PYTEST_CURRENT_TEST"]
try:
# Test validation should pass
await scraper._validate_chrome_version_configuration()
# Verify detection was called
assert mock_detect.call_count == 1
# Verify success was logged
assert "Chrome 136+ configuration validation passed" in caplog.text
finally:
# Restore environment
if original_env:
os.environ["PYTEST_CURRENT_TEST"] = original_env