From 50fc8781a992eda96b833c196d3e471a480a98e6 Mon Sep 17 00:00:00 2001
From: Jens <1742418+1cu@users.noreply.github.com>
Date: Fri, 13 Feb 2026 16:45:52 +0100
Subject: [PATCH] feat: collect timeout timing sessions for diagnostics (#814)
---
README.md | 8 +-
docs/BROWSER_TROUBLESHOOTING.md | 5 +
docs/CONFIGURATION.md | 40 +-
src/kleinanzeigen_bot/__init__.py | 17 +-
src/kleinanzeigen_bot/extract.py | 2 +-
src/kleinanzeigen_bot/model/config_model.py | 4 +
.../resources/translations.de.yaml | 13 +
.../utils/timing_collector.py | 168 ++++
.../utils/web_scraping_mixin.py | 54 +-
tests/smoke/test_smoke_health.py | 12 +-
tests/unit/test_timing_collector.py | 204 +++++
tests/unit/test_web_scraping_mixin.py | 725 ++++++++++--------
12 files changed, 902 insertions(+), 350 deletions(-)
create mode 100644 src/kleinanzeigen_bot/utils/timing_collector.py
create mode 100644 tests/unit/test_timing_collector.py
diff --git a/README.md b/README.md
index fad419c..98113a6 100644
--- a/README.md
+++ b/README.md
@@ -272,7 +272,7 @@ Path resolution rules:
- Runtime files are mode-dependent write locations (for example, logfile, update state, browser profile/cache, diagnostics, and downloaded ads).
- `--config` selects only the config file; it does not silently switch workspace mode.
-- `--workspace-mode=portable`: runtime files are rooted next to the active config file (or the current working directory if no `--config` is supplied).
+- `--workspace-mode=portable`: runtime files are placed in the same directory as the active config file (or the current working directory if no `--config` is supplied).
- `--workspace-mode=xdg`: runtime files use OS-standard user directories.
- `--config` without `--workspace-mode`: mode is inferred from existing footprints; on ambiguity/unknown, the command fails with guidance (for example: `Could not infer workspace mode for --config ...`) and asks you to rerun with `--workspace-mode=portable` or `--workspace-mode=xdg`.
@@ -280,7 +280,7 @@ Examples:
- `kleinanzeigen-bot --config /sync/dropbox/config1.yaml verify` (no `--workspace-mode`): mode is inferred from detected footprints; if both portable and user-directories footprints are found (or none are found), the command fails and lists the found paths.
- `kleinanzeigen-bot --workspace-mode=portable --config /sync/dropbox/config1.yaml verify`: runtime files are rooted at `/sync/dropbox/` (for example `/sync/dropbox/.temp/` and `/sync/dropbox/downloaded-ads/`).
-- `kleinanzeigen-bot --workspace-mode=xdg --config /sync/dropbox/config1.yaml verify`: config is read from `/sync/dropbox/config1.yaml`, while runtime files stay in user directories (for example Linux `~/.config/kleinanzeigen-bot/`, `~/.local/state/kleinanzeigen-bot/`, `~/.cache/kleinanzeigen-bot/`).
+- `kleinanzeigen-bot --workspace-mode=xdg --config /sync/dropbox/config1.yaml verify`: config is read from `/sync/dropbox/config1.yaml`, while runtime files stay in user directories (on Linux: `~/.config/kleinanzeigen-bot/`, `~/.local/state/kleinanzeigen-bot/`, `~/.cache/kleinanzeigen-bot/`).
1. **Portable mode (recommended for most users, especially on Windows):**
@@ -296,11 +296,11 @@ Examples:
**OS notes (brief):**
-- **Windows:** User directories mode uses AppData (Roaming/Local); portable keeps everything beside the `.exe`.
+- **Windows:** User directories mode uses AppData (Roaming/Local); portable keeps everything alongside the `.exe`.
- **Linux:** User directories mode uses `~/.config/kleinanzeigen-bot/config.yaml`, `~/.local/state/kleinanzeigen-bot/`, and `~/.cache/kleinanzeigen-bot/`; portable uses `./config.yaml`, `./.temp/`, and `./downloaded-ads/`.
- **macOS:** User directories mode uses `~/Library/Application Support/kleinanzeigen-bot/config.yaml` (config), `~/Library/Application Support/kleinanzeigen-bot/` (state/runtime), and `~/Library/Caches/kleinanzeigen-bot/` (cache/diagnostics); portable stays in the current working directory.
-If you have mixed legacy footprints (portable + XDG), pass an explicit mode (for example `--workspace-mode=portable`) and then clean up unused files. See [Configuration: Installation Modes](docs/CONFIGURATION.md#installation-modes).
+If you have footprints from both modes (portable + XDG), pass an explicit mode (for example `--workspace-mode=portable`) and then clean up unused files. See [Configuration: Installation Modes](docs/CONFIGURATION.md#installation-modes).
### 1) Main configuration ⚙️
diff --git a/docs/BROWSER_TROUBLESHOOTING.md b/docs/BROWSER_TROUBLESHOOTING.md
index 539761a..37646ff 100644
--- a/docs/BROWSER_TROUBLESHOOTING.md
+++ b/docs/BROWSER_TROUBLESHOOTING.md
@@ -78,6 +78,11 @@ The bot will also provide specific instructions on how to fix your configuration
1. Override specific keys under `timeouts` (e.g., `pagination_initial: 20.0`) if only a single selector is problematic.
1. For slow email verification prompts, raise `timeouts.email_verification`.
1. Keep `retry_enabled` on so that DOM lookups are retried with exponential backoff.
+1. Attach `timing_data.json` when opening issues so maintainers can tune defaults from real-world timing evidence.
+ - It is written automatically during runs when `diagnostics.timing_collection` is enabled (default: `true`, see `CONFIGURATION.md`).
+ - Portable mode path: `./.temp/timing/timing_data.json`
+ - User directories mode path: `~/.cache/kleinanzeigen-bot/timing/timing_data.json` (Linux), `~/Library/Caches/kleinanzeigen-bot/timing/timing_data.json` (macOS), or `%LOCALAPPDATA%\kleinanzeigen-bot\timing\timing_data.json` (Windows)
+ - Which one applies depends on your installation mode: portable mode writes next to your config/current directory, user directories mode writes in OS-standard user paths. Check which path exists on your system, or see `CONFIGURATION.md#installation-modes` for mode selection details.
### Issue: Bot fails to detect existing login session
diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md
index 508ca0b..9413218 100644
--- a/docs/CONFIGURATION.md
+++ b/docs/CONFIGURATION.md
@@ -262,6 +262,7 @@ diagnostics:
publish: false # Capture screenshot + HTML + JSON on each failed publish attempt (timeouts/protocol errors)
capture_log_copy: false # Copy entire bot log file when diagnostics are captured (may duplicate log content)
pause_on_login_detection_failure: false # Pause for manual inspection (interactive only)
+ timing_collection: true # Collect timeout timing data locally for troubleshooting and tuning
output_dir: "" # Custom output directory (see "Output locations (default)" below)
```
@@ -309,7 +310,44 @@ The bot uses a layered approach to detect login state, prioritizing stealth over
- **User directories mode**: `~/.cache/kleinanzeigen-bot/diagnostics/` (Linux), `~/Library/Caches/kleinanzeigen-bot/diagnostics/` (macOS), or `%LOCALAPPDATA%\kleinanzeigen-bot\Cache\diagnostics\` (Windows)
- **Custom**: Path resolved relative to your `config.yaml` if `output_dir` is specified
-> **⚠️ PII Warning:** HTML dumps, JSON payloads, and log copies may contain PII. Typical examples include account email, ad titles/descriptions, contact info, and prices. Log copies are produced by `capture_log_copy` when diagnostics capture runs, such as `capture_on.publish` or `capture_on.login_detection`. Review or redact these artifacts before sharing them publicly.
+**Timing collection output (default):**
+
+- **Portable mode**: `./.temp/timing/timing_data.json`
+- **User directories mode**: `~/.cache/kleinanzeigen-bot/timing/timing_data.json` (Linux) or `~/Library/Caches/kleinanzeigen-bot/timing/timing_data.json` (macOS)
+- Data is grouped by run/session and retained for 30 days via automatic cleanup during each data write
+
+Example structure:
+
+```json
+[
+ {
+ "session_id": "abc12345",
+ "command": "publish",
+ "started_at": "2026-02-07T10:00:00+01:00",
+ "ended_at": "2026-02-07T10:04:30+01:00",
+ "records": [
+ {
+ "operation_key": "default",
+ "operation_type": "web_find",
+ "effective_timeout_sec": 5.0,
+ "actual_duration_sec": 1.2,
+ "attempt_index": 0,
+ "success": true
+ }
+ ]
+ }
+]
+```
+
+How to read it quickly:
+
+- Group by `command` and `session_id` first to compare slow vs fast runs
+- Look for high `actual_duration_sec` values near `effective_timeout_sec` and repeated `success: false` entries
+- `attempt_index` is zero-based (`0` first attempt, `1` first retry)
+- Use `operation_key` + `operation_type` to identify which timeout bucket (`default`, `page_load`, etc.) needs tuning
+- For deeper timeout tuning workflow, see [Browser Troubleshooting](./BROWSER_TROUBLESHOOTING.md)
+
+> **⚠️ PII Warning:** HTML dumps, JSON payloads, timing data JSON files (for example `timing_data.json`), and log copies may contain PII. Typical examples include account email, ad titles/descriptions, contact info, and prices. Log copies are produced by `capture_log_copy` when diagnostics capture runs, such as `capture_on.publish` or `capture_on.login_detection`. Review or redact these artifacts before sharing them publicly.
## Installation Modes
diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py
index f274d44..fa1ba2e 100644
--- a/src/kleinanzeigen_bot/__init__.py
+++ b/src/kleinanzeigen_bot/__init__.py
@@ -24,6 +24,7 @@ from .utils.exceptions import CaptchaEncountered
from .utils.files import abspath
from .utils.i18n import Locale, get_current_locale, pluralize, set_current_locale
from .utils.misc import ainput, ensure, is_frozen
+from .utils.timing_collector import TimingCollector
from .utils.web_scraping_mixin import By, Element, Is, WebScrapingMixin
# W0406: possibly a bug, see https://github.com/PyCQA/pylint/issues/3933
@@ -179,13 +180,14 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
self._log_basename = os.path.splitext(os.path.basename(sys.executable))[0] if is_frozen() else self.__module__
self.log_file_path:str | None = abspath(f"{self._log_basename}.log")
self._logfile_arg:str | None = None
- self._logfile_explicitly_provided = False
+ self._logfile_explicitly_provided:bool = False
self.command = "help"
self.ads_selector = "due"
self.keep_old_ads = False
self._login_detection_diagnostics_captured:bool = False
+ self._timing_collector:TimingCollector | None = None
def __del__(self) -> None:
if self.file_log:
@@ -393,6 +395,12 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
sys.exit(2)
finally:
self.close_browser_session()
+ if self._timing_collector is not None:
+ try:
+ loop = asyncio.get_running_loop()
+ await loop.run_in_executor(None, self._timing_collector.flush)
+ except Exception as exc: # noqa: BLE001
+ LOG.warning("Timing collector flush failed: %s", exc)
def show_help(self) -> None:
if is_frozen():
@@ -613,6 +621,13 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
config_yaml = dicts.load_dict_if_exists(self.config_file_path, _("config"))
self.config = Config.model_validate(config_yaml, strict = True, context = self.config_file_path)
+ timing_enabled = self.config.diagnostics.timing_collection
+ if timing_enabled and self.workspace:
+ timing_dir = self.workspace.diagnostics_dir.parent / "timing"
+ self._timing_collector = TimingCollector(timing_dir, self.command)
+ else:
+ self._timing_collector = None
+
# load built-in category mappings
self.categories = dicts.load_dict_from_module(resources, "categories.yaml", "")
LOG.debug("Loaded %s categories from categories.yaml", len(self.categories))
diff --git a/src/kleinanzeigen_bot/extract.py b/src/kleinanzeigen_bot/extract.py
index da51f0e..6fa82cd 100644
--- a/src/kleinanzeigen_bot/extract.py
+++ b/src/kleinanzeigen_bot/extract.py
@@ -43,7 +43,7 @@ class AdExtractor(WebScrapingMixin):
super().__init__()
self.browser = browser
self.config:Config = config
- self.download_dir = download_dir
+ self.download_dir:Path = download_dir
self.published_ads_by_id:dict[int, dict[str, Any]] = published_ads_by_id or {}
async def download_ad(self, ad_id:int) -> None:
diff --git a/src/kleinanzeigen_bot/model/config_model.py b/src/kleinanzeigen_bot/model/config_model.py
index 379adc2..0e2d44d 100644
--- a/src/kleinanzeigen_bot/model/config_model.py
+++ b/src/kleinanzeigen_bot/model/config_model.py
@@ -265,6 +265,10 @@ class DiagnosticsConfig(ContextualModel):
default = None,
description = "Optional output directory for diagnostics artifacts. If omitted, a safe default is used based on installation mode.",
)
+ timing_collection:bool = Field(
+ default = True,
+ description = "If true, collect local timeout timing data and write it to diagnostics JSON for troubleshooting and tuning.",
+ )
@model_validator(mode = "before")
@classmethod
diff --git a/src/kleinanzeigen_bot/resources/translations.de.yaml b/src/kleinanzeigen_bot/resources/translations.de.yaml
index 17fc093..1ec4ef2 100644
--- a/src/kleinanzeigen_bot/resources/translations.de.yaml
+++ b/src/kleinanzeigen_bot/resources/translations.de.yaml
@@ -261,6 +261,7 @@ kleinanzeigen_bot/__init__.py:
"You provided no ads selector. Defaulting to \"new\".": "Es wurden keine Anzeigen-Selektor angegeben. Es wird \"new\" verwendet."
"You provided no ads selector. Defaulting to \"changed\".": "Es wurden keine Anzeigen-Selektor angegeben. Es wird \"changed\" verwendet."
"Unknown command: %s": "Unbekannter Befehl: %s"
+ "Timing collector flush failed: %s": "Zeitmessdaten konnten nicht gespeichert werden: %s"
fill_login_data_and_send:
"Logging in as [%s]...": "Anmeldung als [%s]..."
@@ -527,6 +528,9 @@ kleinanzeigen_bot/utils/web_scraping_mixin.py:
"Last page reached (no enabled 'Naechste' button found).": "Letzte Seite erreicht (kein aktivierter 'Naechste'-Button gefunden)."
"No pagination controls found. Assuming last page.": "Keine Paginierungssteuerung gefunden. Es wird von der letzten Seite ausgegangen."
+ _record_timing:
+ "Timing collector failed for key=%s operation=%s: %s": "Zeitmessung fehlgeschlagen für key=%s operation=%s: %s"
+
close_browser_session:
"Closing Browser session...": "Schließe Browser-Sitzung..."
@@ -685,6 +689,15 @@ kleinanzeigen_bot/utils/diagnostics.py:
"Diagnostics capture attempted but no artifacts were saved (all captures failed)": "Diagnoseerfassung versucht, aber keine Artefakte gespeichert (alle Erfassungen fehlgeschlagen)"
"Diagnostics capture failed: %s": "Diagnoseerfassung fehlgeschlagen: %s"
+#################################################
+kleinanzeigen_bot/utils/timing_collector.py:
+#################################################
+ _load_existing_sessions:
+ "Unable to load timing collection data from %s: %s": "Zeitmessdaten aus %s konnten nicht geladen werden: %s"
+
+ flush:
+ "Failed to flush timing collection data: %s": "Zeitmessdaten konnten nicht gespeichert werden: %s"
+
#################################################
kleinanzeigen_bot/utils/xdg_paths.py:
#################################################
diff --git a/src/kleinanzeigen_bot/utils/timing_collector.py b/src/kleinanzeigen_bot/utils/timing_collector.py
new file mode 100644
index 0000000..8f146ca
--- /dev/null
+++ b/src/kleinanzeigen_bot/utils/timing_collector.py
@@ -0,0 +1,168 @@
+# SPDX-FileCopyrightText: © Jens Bergmann and contributors
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
+
+"""Collect per-operation timeout timings and persist per-run JSON sessions.
+
+`TimingCollector` records operation durations in seconds, grouped by a single bot run
+(`session_id`). Call `record(...)` during runtime and `flush()` once at command end to
+append the current session to `timing_data.json` with automatic 30-day retention.
+The collector is best-effort and designed for troubleshooting, not strict telemetry.
+"""
+
+from __future__ import annotations
+
+import json, uuid # isort: skip
+import os
+from dataclasses import asdict, dataclass
+from datetime import timedelta
+from typing import TYPE_CHECKING, Any, Final
+
+if TYPE_CHECKING:
+ from pathlib import Path
+
+from kleinanzeigen_bot.utils import loggers, misc
+
+LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
+
+RETENTION_DAYS:Final[int] = 30
+TIMING_FILE:Final[str] = "timing_data.json"
+
+
+@dataclass
+class TimingRecord:
+ timestamp:str
+ operation_key:str
+ operation_type:str
+ description:str
+ configured_timeout_sec:float
+ effective_timeout_sec:float
+ actual_duration_sec:float
+ attempt_index:int
+ success:bool
+
+ def to_dict(self) -> dict[str, Any]:
+ return asdict(self)
+
+
+class TimingCollector:
+ def __init__(self, output_dir:Path, command:str) -> None:
+ self.output_dir = output_dir.resolve()
+ self.command = command
+ self.session_id = uuid.uuid4().hex[:8]
+ self.started_at = misc.now().isoformat()
+ self.records:list[TimingRecord] = []
+ self._flushed = False
+
+ LOG.debug("Timing collection initialized (session=%s, output_dir=%s, command=%s)", self.session_id, self.output_dir, command)
+
+ def record(
+ self,
+ *,
+ key:str,
+ operation_type:str,
+ description:str,
+ configured_timeout:float,
+ effective_timeout:float,
+ actual_duration:float,
+ attempt_index:int,
+ success:bool,
+ ) -> None:
+ self.records.append(
+ TimingRecord(
+ timestamp = misc.now().isoformat(),
+ operation_key = key,
+ operation_type = operation_type,
+ description = description,
+ configured_timeout_sec = configured_timeout,
+ effective_timeout_sec = effective_timeout,
+ actual_duration_sec = actual_duration,
+ attempt_index = attempt_index,
+ success = success,
+ )
+ )
+ LOG.debug(
+ "Timing captured: %s [%s] duration=%.3fs timeout=%.3fs success=%s",
+ operation_type,
+ key,
+ actual_duration,
+ effective_timeout,
+ success,
+ )
+
+ def flush(self) -> Path | None:
+ if self._flushed:
+ LOG.debug("Timing collection already flushed for this run")
+ return None
+ if not self.records:
+ LOG.debug("Timing collection enabled but no records captured in this run")
+ return None
+
+ try:
+ self.output_dir.mkdir(parents = True, exist_ok = True)
+ data = self._load_existing_sessions()
+ data.append(
+ {
+ "session_id": self.session_id,
+ "command": self.command,
+ "started_at": self.started_at,
+ "ended_at": misc.now().isoformat(),
+ "records": [record.to_dict() for record in self.records],
+ }
+ )
+
+ cutoff = misc.now() - timedelta(days = RETENTION_DAYS)
+ retained:list[dict[str, Any]] = []
+ dropped = 0
+ for session in data:
+ try:
+ parsed = misc.parse_datetime(session.get("started_at"), add_timezone_if_missing = True)
+ except ValueError:
+ parsed = None
+ if parsed is None:
+ dropped += 1
+ continue
+ if parsed >= cutoff:
+ retained.append(session)
+ else:
+ dropped += 1
+
+ if dropped > 0:
+ LOG.debug("Timing collection pruned %d old or malformed sessions", dropped)
+
+ output_file = self.output_dir / TIMING_FILE
+ temp_file = self.output_dir / f".{TIMING_FILE}.{self.session_id}.tmp"
+ with temp_file.open("w", encoding = "utf-8") as fd:
+ json.dump(retained, fd, indent = 2)
+ fd.write("\n")
+ fd.flush()
+ os.fsync(fd.fileno())
+ temp_file.replace(output_file)
+
+ LOG.debug(
+ "Timing collection flushed to %s (%d sessions, %d current records, retention=%d days)",
+ output_file,
+ len(retained),
+ len(self.records),
+ RETENTION_DAYS,
+ )
+ self.records = []
+ self._flushed = True
+ return output_file
+ except Exception as exc: # noqa: BLE001
+ LOG.warning("Failed to flush timing collection data: %s", exc)
+ return None
+
+ def _load_existing_sessions(self) -> list[dict[str, Any]]:
+ file_path = self.output_dir / TIMING_FILE
+ if not file_path.exists():
+ return []
+
+ try:
+ with file_path.open(encoding = "utf-8") as fd:
+ payload = json.load(fd)
+ if isinstance(payload, list):
+ return [item for item in payload if isinstance(item, dict)]
+ except Exception as exc: # noqa: BLE001
+ LOG.warning("Unable to load timing collection data from %s: %s", file_path, exc)
+ return []
diff --git a/src/kleinanzeigen_bot/utils/web_scraping_mixin.py b/src/kleinanzeigen_bot/utils/web_scraping_mixin.py
index f81e7ff..d354866 100644
--- a/src/kleinanzeigen_bot/utils/web_scraping_mixin.py
+++ b/src/kleinanzeigen_bot/utils/web_scraping_mixin.py
@@ -183,6 +183,36 @@ class WebScrapingMixin:
# Always perform the initial attempt plus the configured number of retries.
return 1 + cfg.retry_max_attempts
+ def _record_timing(
+ self,
+ *,
+ key:str,
+ description:str,
+ configured_timeout:float,
+ effective_timeout:float,
+ actual_duration:float,
+ attempt_index:int,
+ success:bool,
+ ) -> None:
+ collector = getattr(self, "_timing_collector", None)
+ if collector is None:
+ return
+
+ operation_type = description.split("(", 1)[0] if "(" in description else description
+ try:
+ collector.record(
+ key = key,
+ operation_type = operation_type,
+ description = description,
+ configured_timeout = configured_timeout,
+ effective_timeout = effective_timeout,
+ actual_duration = actual_duration,
+ attempt_index = attempt_index,
+ success = success,
+ )
+ except Exception as exc: # noqa: BLE001
+ LOG.warning("Timing collector failed for key=%s operation=%s: %s", key, operation_type, exc)
+
async def _run_with_timeout_retries(
self, operation:Callable[[float], Awaitable[T]], *, description:str, key:str = "default", override:float | None = None
) -> T:
@@ -190,12 +220,34 @@ class WebScrapingMixin:
Execute an async callable with retry/backoff handling for TimeoutError.
"""
attempts = self._timeout_attempts()
+ configured_timeout = self._timeout(key, override)
+ loop = asyncio.get_running_loop()
for attempt in range(attempts):
effective_timeout = self._effective_timeout(key, override, attempt = attempt)
+ attempt_started = loop.time()
try:
- return await operation(effective_timeout)
+ result = await operation(effective_timeout)
+ self._record_timing(
+ key = key,
+ description = description,
+ configured_timeout = configured_timeout,
+ effective_timeout = effective_timeout,
+ actual_duration = loop.time() - attempt_started,
+ attempt_index = attempt,
+ success = True,
+ )
+ return result
except TimeoutError:
+ self._record_timing(
+ key = key,
+ description = description,
+ configured_timeout = configured_timeout,
+ effective_timeout = effective_timeout,
+ actual_duration = loop.time() - attempt_started,
+ attempt_index = attempt,
+ success = False,
+ )
if attempt >= attempts - 1:
raise
LOG.debug("Retrying %s after TimeoutError (attempt %d/%d, timeout %.1fs)", description, attempt + 1, attempts, effective_timeout)
diff --git a/tests/smoke/test_smoke_health.py b/tests/smoke/test_smoke_health.py
index 1fec612..10b13e8 100644
--- a/tests/smoke/test_smoke_health.py
+++ b/tests/smoke/test_smoke_health.py
@@ -96,12 +96,12 @@ def invoke_cli(
set_current_locale(previous_locale)
-def _xdg_env_overrides(tmp_path:Path) -> dict[str, str]:
- """Create temporary HOME/XDG environment overrides for isolated smoke test runs."""
- home = tmp_path / "home"
- xdg_config = tmp_path / "xdg" / "config"
- xdg_state = tmp_path / "xdg" / "state"
- xdg_cache = tmp_path / "xdg" / "cache"
+def _xdg_env_overrides(base_path:Path) -> dict[str, str]:
+ """Create temporary HOME/XDG environment overrides rooted at the provided base path."""
+ home = base_path / "home"
+ xdg_config = base_path / "xdg" / "config"
+ xdg_state = base_path / "xdg" / "state"
+ xdg_cache = base_path / "xdg" / "cache"
for path in (home, xdg_config, xdg_state, xdg_cache):
path.mkdir(parents = True, exist_ok = True)
return {
diff --git a/tests/unit/test_timing_collector.py b/tests/unit/test_timing_collector.py
new file mode 100644
index 0000000..c0febb2
--- /dev/null
+++ b/tests/unit/test_timing_collector.py
@@ -0,0 +1,204 @@
+# SPDX-FileCopyrightText: © Jens Bergmann and contributors
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
+
+import json
+from datetime import timedelta
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from kleinanzeigen_bot.utils import misc
+from kleinanzeigen_bot.utils.timing_collector import RETENTION_DAYS, TimingCollector
+
+pytestmark = pytest.mark.unit
+
+
+class TestTimingCollector:
+ def test_output_dir_resolves_to_given_path(self, tmp_path:Path) -> None:
+ collector = TimingCollector(tmp_path / "xdg-cache" / "timing", "publish")
+
+ assert collector.output_dir == (tmp_path / "xdg-cache" / "timing").resolve()
+
+ def test_flush_writes_session_data(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None:
+ monkeypatch.chdir(tmp_path)
+ collector = TimingCollector(tmp_path / ".temp" / "timing", "publish")
+ collector.record(
+ key = "default",
+ operation_type = "web_find",
+ description = "web_find(ID, submit)",
+ configured_timeout = 5.0,
+ effective_timeout = 5.0,
+ actual_duration = 0.4,
+ attempt_index = 0,
+ success = True,
+ )
+
+ file_path = collector.flush()
+
+ assert file_path is not None
+ assert file_path.exists()
+
+ data = json.loads(file_path.read_text(encoding = "utf-8"))
+ assert isinstance(data, list)
+ assert len(data) == 1
+ assert data[0]["command"] == "publish"
+ assert len(data[0]["records"]) == 1
+ assert data[0]["records"][0]["operation_key"] == "default"
+
+ def test_flush_prunes_old_and_malformed_sessions(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None:
+ monkeypatch.chdir(tmp_path)
+
+ output_dir = tmp_path / ".temp" / "timing"
+ output_dir.mkdir(parents = True, exist_ok = True)
+ data_path = output_dir / "timing_data.json"
+
+ old_started = (misc.now() - timedelta(days = RETENTION_DAYS + 1)).isoformat()
+ recent_started = (misc.now() - timedelta(days = 2)).isoformat()
+
+ existing_payload = [
+ {
+ "session_id": "old-session",
+ "command": "publish",
+ "started_at": old_started,
+ "ended_at": old_started,
+ "records": [],
+ },
+ {
+ "session_id": "recent-session",
+ "command": "publish",
+ "started_at": recent_started,
+ "ended_at": recent_started,
+ "records": [],
+ },
+ {
+ "session_id": "malformed-session",
+ "command": "publish",
+ "started_at": "not-a-datetime",
+ "ended_at": "not-a-datetime",
+ "records": [],
+ },
+ ]
+ data_path.write_text(json.dumps(existing_payload), encoding = "utf-8")
+
+ collector = TimingCollector(tmp_path / ".temp" / "timing", "verify")
+ collector.record(
+ key = "default",
+ operation_type = "web_find",
+ description = "web_find(ID, submit)",
+ configured_timeout = 5.0,
+ effective_timeout = 5.0,
+ actual_duration = 0.2,
+ attempt_index = 0,
+ success = True,
+ )
+
+ file_path = collector.flush()
+
+ assert file_path is not None
+ data = json.loads(file_path.read_text(encoding = "utf-8"))
+ session_ids = [session["session_id"] for session in data]
+ assert "old-session" not in session_ids
+ assert "malformed-session" not in session_ids
+ assert "recent-session" in session_ids
+ assert collector.session_id in session_ids
+
+ def test_flush_returns_none_when_already_flushed(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None:
+ monkeypatch.chdir(tmp_path)
+ collector = TimingCollector(tmp_path / ".temp" / "timing", "publish")
+ collector.record(
+ key = "default",
+ operation_type = "web_find",
+ description = "web_find(ID, submit)",
+ configured_timeout = 5.0,
+ effective_timeout = 5.0,
+ actual_duration = 0.1,
+ attempt_index = 0,
+ success = True,
+ )
+
+ first = collector.flush()
+ second = collector.flush()
+
+ assert first is not None
+ assert second is None
+
+ def test_flush_returns_none_when_no_records(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None:
+ monkeypatch.chdir(tmp_path)
+ collector = TimingCollector(tmp_path / ".temp" / "timing", "publish")
+
+ assert collector.flush() is None
+
+ def test_flush_recovers_from_corrupted_json(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None:
+ monkeypatch.chdir(tmp_path)
+
+ output_dir = tmp_path / ".temp" / "timing"
+ output_dir.mkdir(parents = True, exist_ok = True)
+ data_path = output_dir / "timing_data.json"
+ data_path.write_text("{ this is invalid json", encoding = "utf-8")
+
+ collector = TimingCollector(tmp_path / ".temp" / "timing", "verify")
+ collector.record(
+ key = "default",
+ operation_type = "web_find",
+ description = "web_find(ID, submit)",
+ configured_timeout = 5.0,
+ effective_timeout = 5.0,
+ actual_duration = 0.1,
+ attempt_index = 0,
+ success = True,
+ )
+
+ file_path = collector.flush()
+
+ assert file_path is not None
+ payload = json.loads(file_path.read_text(encoding = "utf-8"))
+ assert isinstance(payload, list)
+ assert len(payload) == 1
+ assert payload[0]["session_id"] == collector.session_id
+
+ def test_flush_ignores_non_list_payload(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None:
+ monkeypatch.chdir(tmp_path)
+
+ output_dir = tmp_path / ".temp" / "timing"
+ output_dir.mkdir(parents = True, exist_ok = True)
+ data_path = output_dir / "timing_data.json"
+ data_path.write_text(json.dumps({"unexpected": "shape"}), encoding = "utf-8")
+
+ collector = TimingCollector(tmp_path / ".temp" / "timing", "verify")
+ collector.record(
+ key = "default",
+ operation_type = "web_find",
+ description = "web_find(ID, submit)",
+ configured_timeout = 5.0,
+ effective_timeout = 5.0,
+ actual_duration = 0.1,
+ attempt_index = 0,
+ success = True,
+ )
+
+ file_path = collector.flush()
+
+ assert file_path is not None
+ payload = json.loads(file_path.read_text(encoding = "utf-8"))
+ assert isinstance(payload, list)
+ assert len(payload) == 1
+ assert payload[0]["session_id"] == collector.session_id
+
+ def test_flush_returns_none_when_write_raises(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None:
+ monkeypatch.chdir(tmp_path)
+ collector = TimingCollector(tmp_path / ".temp" / "timing", "verify")
+ collector.record(
+ key = "default",
+ operation_type = "web_find",
+ description = "web_find(ID, submit)",
+ configured_timeout = 5.0,
+ effective_timeout = 5.0,
+ actual_duration = 0.1,
+ attempt_index = 0,
+ success = True,
+ )
+
+ with patch.object(Path, "mkdir", side_effect = OSError("cannot create dir")):
+ assert collector.flush() is None
diff --git a/tests/unit/test_web_scraping_mixin.py b/tests/unit/test_web_scraping_mixin.py
index c18c483..f3a835f 100644
--- a/tests/unit/test_web_scraping_mixin.py
+++ b/tests/unit/test_web_scraping_mixin.py
@@ -31,6 +31,7 @@ from kleinanzeigen_bot.utils.web_scraping_mixin import By, Is, WebScrapingMixin,
class ConfigProtocol(Protocol):
"""Protocol for Config objects used in tests."""
+
extensions:list[str]
browser_args:list[str]
user_data_dir:str | None
@@ -44,6 +45,23 @@ def _nodriver_start_mock() -> Mock:
return cast(Mock, cast(Any, nodriver).start)
+class RecordingCollector:
+ """Helper collector that stores timing records for assertions."""
+
+ def __init__(self, sink:list[dict[str, Any]]) -> None:
+ self._sink = sink
+
+ def record(self, **kwargs:Any) -> None:
+ self._sink.append(kwargs)
+
+
+class FailingCollector:
+ """Helper collector that raises to test error handling."""
+
+ def record(self, **kwargs:Any) -> None:
+ raise RuntimeError("collector failed")
+
+
class TrulyAwaitableMockPage:
"""A helper to make a mock Page object truly awaitable for tests."""
@@ -271,7 +289,7 @@ class TestWebScrapingErrorHandling:
input_field.send_keys.assert_awaited_once_with(special_value)
# Verify that the JavaScript received properly escaped value
call_args = dropdown_elem.apply.call_args[0][0]
- assert '"quotes"' in call_args or r'\"quotes\"' in call_args # JSON escaping should handle quotes
+ assert '"quotes"' in call_args or r"\"quotes\"" in call_args # JSON escaping should handle quotes
@pytest.mark.asyncio
async def test_web_select_by_value(self, web_scraper:WebScrapingMixin) -> None:
@@ -336,7 +354,9 @@ class TestWebScrapingErrorHandling:
await web_scraper.web_open("https://example.com", timeout = 0.1)
@pytest.mark.asyncio
- async def test_web_open_skip_when_url_already_loaded(self, web_scraper:WebScrapingMixin, mock_browser:AsyncMock, mock_page:TrulyAwaitableMockPage) -> None:
+ async def test_web_open_skip_when_url_already_loaded(
+ self, web_scraper:WebScrapingMixin, mock_browser:AsyncMock, mock_page:TrulyAwaitableMockPage
+ ) -> None:
"""web_open should short-circuit when the requested URL is already active."""
mock_browser.get.reset_mock()
mock_page.url = "https://example.com"
@@ -402,8 +422,7 @@ class TestWebScrapingErrorHandling:
recorded:list[tuple[float, bool]] = []
- async def fake_web_await(condition:Callable[[], object], *, timeout:float, timeout_error_message:str = "",
- apply_multiplier:bool = True) -> Element:
+ async def fake_web_await(condition:Callable[[], object], *, timeout:float, timeout_error_message:str = "", apply_multiplier:bool = True) -> Element:
recorded.append((timeout, apply_multiplier))
raise TimeoutError(timeout_error_message or "timeout")
@@ -420,10 +439,12 @@ class TestTimeoutAndRetryHelpers:
def test_get_timeout_config_prefers_config_timeouts(self, web_scraper:WebScrapingMixin) -> None:
"""_get_timeout_config should return the config-provided timeout model when available."""
- custom_config = Config.model_validate({
- "login": {"username": "user@example.com", "password": "secret"}, # noqa: S105
- "timeouts": {"default": 7.5}
- })
+ custom_config = Config.model_validate(
+ {
+ "login": {"username": "user@example.com", "password": "secret"}, # noqa: S105
+ "timeouts": {"default": 7.5},
+ }
+ )
web_scraper.config = custom_config
assert web_scraper._get_timeout_config() is custom_config.timeouts
@@ -454,14 +475,65 @@ class TestTimeoutAndRetryHelpers:
assert result == "done"
assert len(attempts) == 2
+ @pytest.mark.asyncio
+ async def test_run_with_timeout_retries_records_success_timing(self, web_scraper:WebScrapingMixin) -> None:
+ """_run_with_timeout_retries should emit a timing record for successful attempts."""
+ recorded:list[dict[str, Any]] = []
+ cast(Any, web_scraper)._timing_collector = RecordingCollector(recorded)
+
+ async def operation(_timeout:float) -> str:
+ return "ok"
+
+ result = await web_scraper._run_with_timeout_retries(operation, description = "web_find(ID, test)")
+
+ assert result == "ok"
+ assert len(recorded) == 1
+ assert recorded[0]["operation_type"] == "web_find"
+ assert recorded[0]["success"] is True
+ assert recorded[0]["attempt_index"] == 0
+
+ @pytest.mark.asyncio
+ async def test_run_with_timeout_retries_records_timeout_timing(self, web_scraper:WebScrapingMixin) -> None:
+ """_run_with_timeout_retries should emit timing records for timed out attempts."""
+ recorded:list[dict[str, Any]] = []
+ cast(Any, web_scraper)._timing_collector = RecordingCollector(recorded)
+ web_scraper.config.timeouts.retry_max_attempts = 1
+
+ async def always_timeout(_timeout:float) -> str:
+ raise TimeoutError("boom")
+
+ with pytest.raises(TimeoutError, match = "boom"):
+ await web_scraper._run_with_timeout_retries(always_timeout, description = "web_find(ID, test)")
+
+ assert len(recorded) == 2
+ assert all(entry["operation_type"] == "web_find" for entry in recorded)
+ assert all(entry["success"] is False for entry in recorded)
+ assert recorded[0]["attempt_index"] == 0
+ assert recorded[1]["attempt_index"] == 1
+
+ @pytest.mark.asyncio
+ async def test_run_with_timeout_retries_ignores_collector_failure(self, web_scraper:WebScrapingMixin) -> None:
+ """_run_with_timeout_retries should continue when timing collector record fails."""
+ cast(Any, web_scraper)._timing_collector = FailingCollector()
+
+ async def operation(_timeout:float) -> str:
+ return "ok"
+
+ result = await web_scraper._run_with_timeout_retries(operation, description = "web_find(ID, test)")
+
+ assert result == "ok"
+
@pytest.mark.asyncio
async def test_run_with_timeout_retries_guard_clause(self, web_scraper:WebScrapingMixin) -> None:
"""_run_with_timeout_retries should guard against zero-attempt edge cases."""
+
async def never_called(timeout:float) -> None:
pytest.fail("operation should not run when attempts are zero")
- with patch.object(web_scraper, "_timeout_attempts", return_value = 0), \
- pytest.raises(TimeoutError, match = "guarded-op failed without executing operation"):
+ with (
+ patch.object(web_scraper, "_timeout_attempts", return_value = 0),
+ pytest.raises(TimeoutError, match = "guarded-op failed without executing operation"),
+ ):
await web_scraper._run_with_timeout_retries(never_called, description = "guarded-op")
@@ -476,15 +548,9 @@ class TestSelectorTimeoutMessages:
(By.CSS_SELECTOR, ".hero", "No HTML element found using CSS selector '.hero' within 2.0 seconds."),
(By.TEXT, "Submit", "No HTML element found containing text 'Submit' within 2.0 seconds."),
(By.XPATH, "//div[@class='hero']", "No HTML element found using XPath '//div[@class='hero']' within 2.0 seconds."),
- ]
+ ],
)
- async def test_web_find_timeout_suffixes(
- self,
- web_scraper:WebScrapingMixin,
- selector_type:By,
- selector_value:str,
- expected_message:str
- ) -> None:
+ async def test_web_find_timeout_suffixes(self, web_scraper:WebScrapingMixin, selector_type:By, selector_value:str, expected_message:str) -> None:
"""web_find should pass descriptive timeout messages for every selector strategy."""
mock_element = AsyncMock(spec = Element)
mock_wait = AsyncMock(return_value = mock_element)
@@ -506,14 +572,10 @@ class TestSelectorTimeoutMessages:
(By.TAG_NAME, "article", "No HTML elements found of tag within 1 seconds."),
(By.TEXT, "Listings", "No HTML elements found containing text 'Listings' within 1 seconds."),
(By.XPATH, "//footer", "No HTML elements found using XPath '//footer' within 1 seconds."),
- ]
+ ],
)
async def test_web_find_all_once_timeout_suffixes(
- self,
- web_scraper:WebScrapingMixin,
- selector_type:By,
- selector_value:str,
- expected_message:str
+ self, web_scraper:WebScrapingMixin, selector_type:By, selector_value:str, expected_message:str
) -> None:
"""_web_find_all_once should surface informative timeout errors for each selector."""
elements = [AsyncMock(spec = Element)]
@@ -674,12 +736,7 @@ class TestWebScrolling:
# Expect four scrollTo operations: two down, two up
assert scripts.count("document.body.scrollHeight") == 1
scroll_calls = [script for script in scripts if script.startswith("window.scrollTo")]
- assert scroll_calls == [
- "window.scrollTo(0, 10)",
- "window.scrollTo(0, 20)",
- "window.scrollTo(0, 10)",
- "window.scrollTo(0, 0)"
- ]
+ assert scroll_calls == ["window.scrollTo(0, 10)", "window.scrollTo(0, 20)", "window.scrollTo(0, 10)", "window.scrollTo(0, 0)"]
sleep_durations = [call.args[0] for call in mock_sleep.await_args_list]
assert sleep_durations == [1.0, 1.0, 0.5, 0.5]
@@ -838,6 +895,7 @@ class TestWebScrapingBrowserConfiguration:
@pytest.mark.asyncio
async def test_browser_profile_configuration(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None:
"""Test browser profile configuration and preferences handling."""
+
class DummyConfig:
def __init__(self, **kwargs:object) -> None:
self.browser_args:list[str] = []
@@ -889,7 +947,7 @@ class TestWebScrapingBrowserConfiguration:
"C:\\Users\\runneradmin\\AppData\\Local\\Chromium\\Application\\chrome.exe",
"C:\\Program Files\\Chrome\\Application\\chrome.exe",
"C:\\Program Files (x86)\\Chrome\\Application\\chrome.exe",
- "C:\\Users\\runneradmin\\AppData\\Local\\Chrome\\Application\\chrome.exe"
+ "C:\\Users\\runneradmin\\AppData\\Local\\Chrome\\Application\\chrome.exe",
}:
return True
if "Preferences" in str(path) and str(tmp_path) in str(path):
@@ -934,6 +992,7 @@ class TestWebScrapingBrowserConfiguration:
@pytest.mark.asyncio
async def test_browser_arguments_configuration(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None:
"""Test browser arguments configuration."""
+
class DummyConfig:
def __init__(self, **kwargs:object) -> None:
self.browser_args:list[str] = []
@@ -960,6 +1019,7 @@ class TestWebScrapingBrowserConfiguration:
async def mock_exists_async(path:str | Path) -> bool:
return str(path) in {"/usr/bin/chrome", "/usr/bin/edge"}
+
monkeypatch.setattr(files, "exists", mock_exists_async)
# Test with custom arguments
@@ -1077,6 +1137,7 @@ class TestWebScrapingBrowserConfiguration:
@pytest.mark.asyncio
async def test_browser_extension_loading(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None:
"""Test browser extension loading."""
+
class DummyConfig:
def __init__(self, **kwargs:object) -> None:
self.browser_args:list[str] = []
@@ -1157,12 +1218,8 @@ class TestWebScrapingBrowserConfiguration:
# Test Linux with multiple browser options
def which_mock(x:str) -> str | None:
- return {
- "chromium": "/usr/bin/chromium",
- "chromium-browser": None,
- "google-chrome": None,
- "microsoft-edge": None
- }.get(x)
+ return {"chromium": "/usr/bin/chromium", "chromium-browser": None, "google-chrome": None, "microsoft-edge": None}.get(x)
+
monkeypatch.setattr(platform, "system", lambda: "Linux")
monkeypatch.setattr(shutil, "which", which_mock)
monkeypatch.setattr(os.path, "isfile", lambda p: p == "/usr/bin/chromium")
@@ -1215,6 +1272,7 @@ class TestWebScrapingBrowserConfiguration:
@pytest.mark.asyncio
async def test_session_state_persistence(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None:
"""Test that session state persists across browser restarts when user_data_dir is set."""
+
# DummyConfig to simulate browser config
class DummyConfig:
def __init__(self, **kwargs:object) -> None:
@@ -1269,6 +1327,7 @@ class TestWebScrapingBrowserConfiguration:
@pytest.mark.asyncio
async def test_session_creation_error_cleanup(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None:
"""Test that resources are cleaned up when session creation fails."""
+
class DummyConfig:
def __init__(self, **kwargs:object) -> None:
self.browser_args:list[str] = []
@@ -1336,6 +1395,7 @@ class TestWebScrapingBrowserConfiguration:
@pytest.mark.asyncio
async def test_external_process_termination(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None:
"""Test handling of external browser process termination."""
+
class DummyConfig:
def __init__(self, **kwargs:object) -> None:
self.browser_args:list[str] = []
@@ -1427,8 +1487,7 @@ class TestWebScrapingDiagnostics:
def test_diagnose_browser_issues_binary_exists_executable(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test diagnostic when browser binary exists and is executable."""
- with patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = True):
+ with patch("os.path.exists", return_value = True), patch("os.access", return_value = True):
scraper_with_config.browser_config.binary_location = "/usr/bin/chrome"
scraper_with_config.diagnose_browser_issues()
@@ -1437,8 +1496,7 @@ class TestWebScrapingDiagnostics:
def test_diagnose_browser_issues_binary_exists_not_executable(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test diagnostic when browser binary exists but is not executable."""
- with patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = False):
+ with patch("os.path.exists", return_value = True), patch("os.access", return_value = False):
scraper_with_config.browser_config.binary_location = "/usr/bin/chrome"
scraper_with_config.diagnose_browser_issues()
@@ -1470,13 +1528,15 @@ class TestWebScrapingDiagnostics:
assert "(fail) No compatible browser found" in caplog.text
def test_diagnose_browser_issues_user_data_dir_exists_readable(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
) -> None:
"""Test diagnostic when user data directory exists and is readable/writable."""
test_dir = str(tmp_path / "chrome-profile")
- with patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = True), \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("os.access", return_value = True),
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ ):
scraper_with_config.browser_config.user_data_dir = test_dir
scraper_with_config.diagnose_browser_issues()
@@ -1484,13 +1544,15 @@ class TestWebScrapingDiagnostics:
assert "(ok) User data directory is readable and writable" in caplog.text
def test_diagnose_browser_issues_user_data_dir_exists_not_readable(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
) -> None:
"""Test diagnostic when user data directory exists but is not readable/writable."""
test_dir = str(tmp_path / "chrome-profile")
- with patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = False), \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("os.access", return_value = False),
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ ):
scraper_with_config.browser_config.user_data_dir = test_dir
scraper_with_config.diagnose_browser_issues()
@@ -1498,22 +1560,24 @@ class TestWebScrapingDiagnostics:
assert "(fail) User data directory permissions issue" in caplog.text
def test_diagnose_browser_issues_user_data_dir_not_exists(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
) -> None:
"""Test diagnostic when user data directory does not exist."""
test_dir = str(tmp_path / "chrome-profile")
- with patch("os.path.exists", side_effect = lambda path: path != test_dir), \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ with (
+ patch("os.path.exists", side_effect = lambda path: path != test_dir),
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ ):
scraper_with_config.browser_config.user_data_dir = test_dir
scraper_with_config.diagnose_browser_issues()
assert f"(info) User data directory does not exist (will be created): {test_dir}" in caplog.text
def test_diagnose_browser_issues_remote_debugging_port_configured_open(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
"""Test diagnostic when remote debugging port is configured and open."""
- with patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True), \
- patch("urllib.request.urlopen") as mock_urlopen:
+ with patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True), patch("urllib.request.urlopen") as mock_urlopen:
mock_response = Mock()
mock_response.read.return_value = b'{"Browser": "Chrome/120.0.0.0"}'
mock_urlopen.return_value = mock_response
@@ -1526,10 +1590,13 @@ class TestWebScrapingDiagnostics:
assert "(ok) Remote debugging API accessible - Browser: Chrome/120.0.0.0" in caplog.text
def test_diagnose_browser_issues_remote_debugging_port_configured_open_api_fails(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
"""Test diagnostic when remote debugging port is open but API is not accessible."""
- with patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True), \
- patch("urllib.request.urlopen", side_effect = Exception("Connection refused")):
+ with (
+ patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True),
+ patch("urllib.request.urlopen", side_effect = Exception("Connection refused")),
+ ):
scraper_with_config.browser_config.arguments = ["--remote-debugging-port=9222"]
scraper_with_config.diagnose_browser_issues()
@@ -1539,7 +1606,8 @@ class TestWebScrapingDiagnostics:
assert "This might indicate a browser update issue or configuration problem" in caplog.text
def test_diagnose_browser_issues_remote_debugging_port_configured_closed(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
"""Test diagnostic when remote debugging port is configured but closed."""
with patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = False):
scraper_with_config.browser_config.arguments = ["--remote-debugging-port=9222"]
@@ -1549,7 +1617,8 @@ class TestWebScrapingDiagnostics:
assert "(info) Remote debugging port is not open" in caplog.text
def test_diagnose_browser_issues_remote_debugging_port_not_configured(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
"""Test diagnostic when remote debugging port is not configured."""
scraper_with_config.browser_config.arguments = ["--other-arg"]
scraper_with_config.diagnose_browser_issues()
@@ -1565,11 +1634,13 @@ class TestWebScrapingDiagnostics:
Mock(info = {"pid": 1234, "name": "chrome", "cmdline": ["/usr/bin/chrome"]}),
Mock(info = {"pid": 5678, "name": "chromium", "cmdline": ["/usr/bin/chromium"]}),
Mock(info = {"pid": 9012, "name": "edge", "cmdline": ["/usr/bin/edge"]}),
- Mock(info = {"pid": 3456, "name": "chrome", "cmdline": ["/usr/bin/chrome", "--remote-debugging-port=9222"]})
+ Mock(info = {"pid": 3456, "name": "chrome", "cmdline": ["/usr/bin/chrome", "--remote-debugging-port=9222"]}),
]
- with patch("psutil.process_iter", return_value = mock_processes), \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ with (
+ patch("psutil.process_iter", return_value = mock_processes),
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ ):
scraper_with_config.diagnose_browser_issues()
# Should find 2 chrome processes (target browser), one with debugging, one without
@@ -1586,7 +1657,7 @@ class TestWebScrapingDiagnostics:
@patch("kleinanzeigen_bot.utils.web_scraping_mixin.get_chrome_version_diagnostic_info")
def test_diagnose_browser_issues_macos_platform_with_user_data_dir(
- self, mock_get_diagnostic:Mock, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
+ self, mock_get_diagnostic:Mock, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
) -> None:
"""Test diagnostic on macOS platform with user data directory."""
test_dir = str(tmp_path / "chrome-profile")
@@ -1594,14 +1665,9 @@ class TestWebScrapingDiagnostics:
# Setup mock for Chrome 136+ detection with valid configuration
mock_get_diagnostic.return_value = {
"binary_detection": None,
- "remote_detection": {
- "version_string": "136.0.6778.0",
- "major_version": 136,
- "browser_name": "Chrome",
- "is_chrome_136_plus": True
- },
+ "remote_detection": {"version_string": "136.0.6778.0", "major_version": 136, "browser_name": "Chrome", "is_chrome_136_plus": True},
"chrome_136_plus_detected": True,
- "recommendations": []
+ "recommendations": [],
}
# Temporarily unset PYTEST_CURRENT_TEST to allow diagnostics to run
@@ -1610,13 +1676,14 @@ class TestWebScrapingDiagnostics:
del os.environ["PYTEST_CURRENT_TEST"]
try:
- with patch("platform.system", return_value = "Darwin"), \
- patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = True), \
- patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True), \
- patch("urllib.request.urlopen") as mock_urlopen, \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
-
+ with (
+ patch("platform.system", return_value = "Darwin"),
+ patch("os.path.exists", return_value = True),
+ patch("os.access", return_value = True),
+ patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True),
+ patch("urllib.request.urlopen") as mock_urlopen,
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ ):
# Mock Chrome 136+ detection from remote debugging
mock_response = Mock()
mock_response.read.return_value = b'{"Browser": "Chrome/136.0.6778.0"}'
@@ -1636,9 +1703,11 @@ class TestWebScrapingDiagnostics:
def test_diagnose_browser_issues_linux_platform_not_root(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test diagnostic on Linux platform when not running as root."""
- with patch("platform.system", return_value = "Linux"), \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False):
+ with (
+ patch("platform.system", return_value = "Linux"),
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False),
+ ):
scraper_with_config.diagnose_browser_issues()
# Linux platform detection was removed - no specific message expected
@@ -1648,9 +1717,11 @@ class TestWebScrapingDiagnostics:
def test_diagnose_browser_issues_linux_platform_root(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test diagnostic on Linux platform when running as root."""
- with patch("platform.system", return_value = "Linux"), \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = True):
+ with (
+ patch("platform.system", return_value = "Linux"),
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = True),
+ ):
scraper_with_config.diagnose_browser_issues()
# Linux platform detection was removed - no specific message expected
@@ -1659,8 +1730,10 @@ class TestWebScrapingDiagnostics:
def test_diagnose_browser_issues_unknown_platform(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test diagnostic on unknown platform."""
- with patch("platform.system", return_value = "UnknownOS"), \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ with (
+ patch("platform.system", return_value = "UnknownOS"),
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ ):
scraper_with_config.diagnose_browser_issues()
# Should not show any platform-specific messages
@@ -1670,28 +1743,25 @@ class TestWebScrapingDiagnostics:
def test_diagnose_browser_issues_macos_remote_debugging_instructions(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test diagnostic shows macOS-specific remote debugging instructions."""
- with patch("platform.system", return_value = "Darwin"), \
- patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = False), \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ with (
+ patch("platform.system", return_value = "Darwin"),
+ patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = False),
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ ):
scraper_with_config.browser_config.arguments = ["--remote-debugging-port=9222"]
scraper_with_config.diagnose_browser_issues()
@patch("kleinanzeigen_bot.utils.web_scraping_mixin.get_chrome_version_diagnostic_info")
def test_diagnose_browser_issues_chrome_136_plus_misconfigured(
- self, mock_get_diagnostic:Mock, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ self, mock_get_diagnostic:Mock, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
) -> None:
"""Test diagnostic when Chrome 136+ is detected but user data directory is not configured."""
# Setup mock for Chrome 136+ detection with invalid configuration
mock_get_diagnostic.return_value = {
"binary_detection": None,
- "remote_detection": {
- "version_string": "136.0.6778.0",
- "major_version": 136,
- "browser_name": "Chrome",
- "is_chrome_136_plus": True
- },
+ "remote_detection": {"version_string": "136.0.6778.0", "major_version": 136, "browser_name": "Chrome", "is_chrome_136_plus": True},
"chrome_136_plus_detected": True,
- "recommendations": []
+ "recommendations": [],
}
# Temporarily unset PYTEST_CURRENT_TEST to allow diagnostics to run
@@ -1700,10 +1770,11 @@ class TestWebScrapingDiagnostics:
del os.environ["PYTEST_CURRENT_TEST"]
try:
- with patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True), \
- patch("urllib.request.urlopen") as mock_urlopen, \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
-
+ with (
+ patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True),
+ patch("urllib.request.urlopen") as mock_urlopen,
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ ):
# Mock Chrome 136+ detection from remote debugging
mock_response = Mock()
mock_response.read.return_value = b'{"Browser": "Chrome/136.0.6778.0"}'
@@ -1725,18 +1796,19 @@ class TestWebScrapingDiagnostics:
os.environ["PYTEST_CURRENT_TEST"] = original_env
def test_diagnose_browser_issues_complete_diagnostic_flow(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
) -> None:
"""Test complete diagnostic flow with all components."""
test_dir = str(tmp_path / "chrome-profile")
- with patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = True), \
- patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True), \
- patch("urllib.request.urlopen") as mock_urlopen, \
- patch("psutil.process_iter", return_value = []), \
- patch("platform.system", return_value = "Linux"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False):
-
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("os.access", return_value = True),
+ patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True),
+ patch("urllib.request.urlopen") as mock_urlopen,
+ patch("psutil.process_iter", return_value = []),
+ patch("platform.system", return_value = "Linux"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False),
+ ):
mock_response = Mock()
mock_response.read.return_value = b'{"Browser": "Chrome/120.0.0.0"}'
mock_urlopen.return_value = mock_response
@@ -1761,169 +1833,168 @@ class TestWebScrapingDiagnostics:
assert "Linux detected" not in caplog.text
assert "=== End Diagnostics ===" in caplog.text
- def test_diagnose_browser_issues_remote_debugging_host_configured(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
- ) -> None:
+ def test_diagnose_browser_issues_remote_debugging_host_configured(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test diagnostic when remote debugging host is configured."""
- with patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = True), \
- patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True), \
- patch("urllib.request.urlopen") as mock_urlopen, \
- patch("psutil.process_iter", return_value = []), \
- patch("platform.system", return_value = "Linux"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("os.access", return_value = True),
+ patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True),
+ patch("urllib.request.urlopen") as mock_urlopen,
+ patch("psutil.process_iter", return_value = []),
+ patch("platform.system", return_value = "Linux"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False),
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ ):
mock_response = Mock()
mock_response.read.return_value = b'{"Browser": "Chrome/120.0.0.0"}'
mock_urlopen.return_value = mock_response
- scraper_with_config.browser_config.arguments = [
- "--remote-debugging-host=192.168.1.100",
- "--remote-debugging-port=9222"
- ]
+ scraper_with_config.browser_config.arguments = ["--remote-debugging-host=192.168.1.100", "--remote-debugging-port=9222"]
scraper_with_config.diagnose_browser_issues()
assert "(info) Remote debugging port configured: 9222" in caplog.text
assert "(ok) Remote debugging port is open" in caplog.text
- def test_diagnose_browser_issues_process_info_missing_name(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
- ) -> None:
+ def test_diagnose_browser_issues_process_info_missing_name(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test diagnostic when process info is missing name."""
mock_process = Mock()
mock_process.info = {"pid": 1234, "name": None, "cmdline": []}
- with patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = True), \
- patch("psutil.process_iter", return_value = [mock_process]), \
- patch("platform.system", return_value = "Linux"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("os.access", return_value = True),
+ patch("psutil.process_iter", return_value = [mock_process]),
+ patch("platform.system", return_value = "Linux"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False),
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ ):
scraper_with_config.diagnose_browser_issues()
assert "(info) No browser processes currently running" in caplog.text
- def test_diagnose_browser_issues_psutil_exception_handling(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
- ) -> None:
+ def test_diagnose_browser_issues_psutil_exception_handling(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test diagnostic when psutil raises an exception during process iteration."""
# Mock psutil.process_iter to return a list that will cause an exception when accessing proc.info
mock_process = Mock()
mock_process.info = {"name": "chrome"}
mock_processes = [mock_process]
- with patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = True), \
- patch("psutil.process_iter", return_value = mock_processes), \
- patch("platform.system", return_value = "Linux"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"), \
- patch.object(mock_process, "info", side_effect = psutil.AccessDenied):
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("os.access", return_value = True),
+ patch("psutil.process_iter", return_value = mock_processes),
+ patch("platform.system", return_value = "Linux"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False),
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ patch.object(mock_process, "info", side_effect = psutil.AccessDenied),
+ ):
scraper_with_config.diagnose_browser_issues()
# Should handle the exception gracefully and continue
assert "=== Browser Connection Diagnostics ===" in caplog.text
assert "=== End Diagnostics ===" in caplog.text
- def test_diagnose_browser_issues_browser_not_executable(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
- ) -> None:
+ def test_diagnose_browser_issues_browser_not_executable(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test diagnostic when browser binary exists but is not executable."""
scraper_with_config.browser_config.binary_location = "/usr/bin/chrome"
- with patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = False), \
- patch("platform.system", return_value = "Linux"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
- patch("psutil.process_iter", return_value = []):
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("os.access", return_value = False),
+ patch("platform.system", return_value = "Linux"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False),
+ patch("psutil.process_iter", return_value = []),
+ ):
scraper_with_config.diagnose_browser_issues()
assert "(fail) Browser binary is not executable" in caplog.text
- def test_diagnose_browser_issues_browser_not_found(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
- ) -> None:
+ def test_diagnose_browser_issues_browser_not_found(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test diagnostic when browser binary does not exist."""
scraper_with_config.browser_config.binary_location = "/usr/bin/chrome"
- with patch("os.path.exists", return_value = False), \
- patch("platform.system", return_value = "Linux"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
- patch("psutil.process_iter", return_value = []):
+ with (
+ patch("os.path.exists", return_value = False),
+ patch("platform.system", return_value = "Linux"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False),
+ patch("psutil.process_iter", return_value = []),
+ ):
scraper_with_config.diagnose_browser_issues()
assert "(fail) Browser binary not found:" in caplog.text
- def test_diagnose_browser_issues_no_browser_auto_detection(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
- ) -> None:
+ def test_diagnose_browser_issues_no_browser_auto_detection(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test diagnostic when no browser binary is configured and auto-detection fails."""
scraper_with_config.browser_config.binary_location = None
- with patch("platform.system", return_value = "Linux"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
- patch("psutil.process_iter", return_value = []), \
- patch.object(scraper_with_config, "get_compatible_browser", side_effect = AssertionError("No browser found")):
+ with (
+ patch("platform.system", return_value = "Linux"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False),
+ patch("psutil.process_iter", return_value = []),
+ patch.object(scraper_with_config, "get_compatible_browser", side_effect = AssertionError("No browser found")),
+ ):
scraper_with_config.diagnose_browser_issues()
- assert "(fail) No compatible browser found" in caplog.text
+ assert "(fail) No compatible browser found" in caplog.text
def test_diagnose_browser_issues_user_data_dir_permissions_issue(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
) -> None:
"""Test diagnostic when user data directory has permission issues."""
test_dir = str(tmp_path / "chrome-profile")
scraper_with_config.browser_config.user_data_dir = test_dir
- with patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = False), \
- patch("platform.system", return_value = "Linux"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("os.access", return_value = False),
+ patch("platform.system", return_value = "Linux"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False),
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ ):
scraper_with_config.diagnose_browser_issues()
assert "(fail) User data directory permissions issue" in caplog.text
- def test_diagnose_browser_issues_remote_debugging_api_inaccessible(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
- ) -> None:
+ def test_diagnose_browser_issues_remote_debugging_api_inaccessible(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test diagnostic when remote debugging port is open but API is not accessible."""
scraper_with_config.browser_config.arguments = ["--remote-debugging-port=9222"]
- with patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = True), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \
- patch("urllib.request.urlopen", side_effect = Exception("Connection refused")), \
- patch("platform.system", return_value = "Linux"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("os.access", return_value = True),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True),
+ patch("urllib.request.urlopen", side_effect = Exception("Connection refused")),
+ patch("platform.system", return_value = "Linux"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False),
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ ):
scraper_with_config.diagnose_browser_issues()
assert "(fail) Remote debugging port is open but API not accessible" in caplog.text
assert "This might indicate a browser update issue or configuration problem" in caplog.text
- def test_diagnose_browser_issues_macos_chrome_warning(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
- ) -> None:
+ def test_diagnose_browser_issues_macos_chrome_warning(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test diagnostic when macOS Chrome remote debugging is configured without user_data_dir."""
scraper_with_config.browser_config.arguments = ["--remote-debugging-port=9222"]
scraper_with_config.browser_config.user_data_dir = None
- with patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = True), \
- patch("psutil.process_iter", return_value = []), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = False), \
- patch("platform.system", return_value = "Darwin"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("os.access", return_value = True),
+ patch("psutil.process_iter", return_value = []),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = False),
+ patch("platform.system", return_value = "Darwin"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False),
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ ):
scraper_with_config.diagnose_browser_issues()
- def test_diagnose_browser_issues_linux_root_user(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
- ) -> None:
+ def test_diagnose_browser_issues_linux_root_user(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test diagnostic when running as root on Linux."""
- with patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = True), \
- patch("platform.system", return_value = "Linux"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = True), \
- patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("os.access", return_value = True),
+ patch("platform.system", return_value = "Linux"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = True),
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ ):
scraper_with_config.diagnose_browser_issues()
assert "(fail) Running as root - this can cause browser issues" in caplog.text
@@ -1947,74 +2018,74 @@ class TestWebScrapingDiagnostics:
mock_process2.info = {"name": "edge"}
mock_processes = [mock_process1, mock_process2]
- with patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = True), \
- patch("psutil.process_iter", return_value = mock_processes), \
- patch("platform.system", return_value = "Linux"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.WebScrapingMixin._diagnose_chrome_version_issues"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = False), \
- patch.object(web_scraper, "get_compatible_browser", return_value = "/usr/bin/chrome"), \
- patch.object(mock_process1, "info", side_effect = psutil.NoSuchProcess(pid = 123)), \
- patch.object(mock_process2, "info", side_effect = psutil.AccessDenied(pid = 456)):
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("os.access", return_value = True),
+ patch("psutil.process_iter", return_value = mock_processes),
+ patch("platform.system", return_value = "Linux"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.WebScrapingMixin._diagnose_chrome_version_issues"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = False),
+ patch.object(web_scraper, "get_compatible_browser", return_value = "/usr/bin/chrome"),
+ patch.object(mock_process1, "info", side_effect = psutil.NoSuchProcess(pid = 123)),
+ patch.object(mock_process2, "info", side_effect = psutil.AccessDenied(pid = 456)),
+ ):
# Should not raise any exceptions
web_scraper.diagnose_browser_issues()
- def test_diagnose_browser_issues_handles_per_process_errors(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
- ) -> None:
+ def test_diagnose_browser_issues_handles_per_process_errors(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""diagnose_browser_issues should ignore psutil errors raised per process."""
caplog.set_level(logging.INFO)
class FailingProcess:
-
@property
def info(self) -> dict[str, object]:
raise psutil.AccessDenied(pid = 999)
- with patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = True), \
- patch("psutil.process_iter", return_value = [FailingProcess()]), \
- patch("platform.system", return_value = "Linux"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
- patch.object(scraper_with_config, "_diagnose_chrome_version_issues"):
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("os.access", return_value = True),
+ patch("psutil.process_iter", return_value = [FailingProcess()]),
+ patch("platform.system", return_value = "Linux"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False),
+ patch.object(scraper_with_config, "_diagnose_chrome_version_issues"),
+ ):
scraper_with_config.browser_config.binary_location = "/usr/bin/chrome"
scraper_with_config.diagnose_browser_issues()
assert "(info) No browser processes currently running" in caplog.text
- def test_diagnose_browser_issues_handles_global_psutil_failure(
- self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
- ) -> None:
+ def test_diagnose_browser_issues_handles_global_psutil_failure(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""diagnose_browser_issues should log a warning if psutil.process_iter fails entirely."""
caplog.set_level(logging.WARNING)
- with patch("os.path.exists", return_value = True), \
- patch("os.access", return_value = True), \
- patch("psutil.process_iter", side_effect = psutil.Error("boom")), \
- patch("platform.system", return_value = "Linux"), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
- patch.object(scraper_with_config, "_diagnose_chrome_version_issues"):
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("os.access", return_value = True),
+ patch("psutil.process_iter", side_effect = psutil.Error("boom")),
+ patch("platform.system", return_value = "Linux"),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False),
+ patch.object(scraper_with_config, "_diagnose_chrome_version_issues"),
+ ):
scraper_with_config.browser_config.binary_location = "/usr/bin/chrome"
scraper_with_config.diagnose_browser_issues()
assert "(warn) Unable to inspect browser processes:" in caplog.text
@pytest.mark.asyncio
- async def test_validate_chrome_version_configuration_port_open_but_api_inaccessible(
- self, web_scraper:WebScrapingMixin
- ) -> None:
+ async def test_validate_chrome_version_configuration_port_open_but_api_inaccessible(self, web_scraper:WebScrapingMixin) -> None:
"""Test _validate_chrome_version_configuration when port is open but API is inaccessible."""
# Configure remote debugging
web_scraper.browser_config.arguments = ["--remote-debugging-port=9222"]
web_scraper.browser_config.binary_location = "/usr/bin/chrome"
- with patch.dict("os.environ", {}, clear = True), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.WebScrapingMixin._check_port_with_retry", return_value = True), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_remote_debugging", return_value = None), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_binary", return_value = None), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.LOG") as mock_log:
-
+ with (
+ patch.dict("os.environ", {}, clear = True),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.WebScrapingMixin._check_port_with_retry", return_value = True),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_remote_debugging", return_value = None),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_binary", return_value = None),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.LOG") as mock_log,
+ ):
# Should not raise any exceptions and should log the appropriate debug message
await web_scraper._validate_chrome_version_configuration()
@@ -2022,20 +2093,19 @@ class TestWebScrapingDiagnostics:
mock_log.debug.assert_any_call(" -> Port is open but remote debugging API not accessible")
@pytest.mark.asyncio
- async def test_validate_chrome_version_configuration_remote_detection_exception(
- self, web_scraper:WebScrapingMixin
- ) -> None:
+ async def test_validate_chrome_version_configuration_remote_detection_exception(self, web_scraper:WebScrapingMixin) -> None:
"""Test _validate_chrome_version_configuration when remote detection raises exception."""
# Configure remote debugging
web_scraper.browser_config.arguments = ["--remote-debugging-port=9222"]
web_scraper.browser_config.binary_location = "/usr/bin/chrome"
- with patch.dict("os.environ", {}, clear = True), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.WebScrapingMixin._check_port_with_retry", return_value = True), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_remote_debugging", side_effect = Exception("Test exception")), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_binary", return_value = None), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.LOG") as mock_log:
-
+ with (
+ patch.dict("os.environ", {}, clear = True),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.WebScrapingMixin._check_port_with_retry", return_value = True),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_remote_debugging", side_effect = Exception("Test exception")),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_binary", return_value = None),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.LOG") as mock_log,
+ ):
# Should not raise any exceptions and should log the appropriate debug message
await web_scraper._validate_chrome_version_configuration()
@@ -2045,19 +2115,18 @@ class TestWebScrapingDiagnostics:
assert len(debug_calls) > 0, "Expected debug message not found"
@pytest.mark.asyncio
- async def test_validate_chrome_version_configuration_no_existing_browser(
- self, web_scraper:WebScrapingMixin
- ) -> None:
+ async def test_validate_chrome_version_configuration_no_existing_browser(self, web_scraper:WebScrapingMixin) -> None:
"""Test _validate_chrome_version_configuration when no existing browser is found."""
# Configure remote debugging
web_scraper.browser_config.arguments = ["--remote-debugging-port=9222"]
web_scraper.browser_config.binary_location = "/usr/bin/chrome"
- with patch.dict("os.environ", {}, clear = True), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.WebScrapingMixin._check_port_with_retry", return_value = False), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_binary", return_value = None), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.LOG") as mock_log:
-
+ with (
+ patch.dict("os.environ", {}, clear = True),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.WebScrapingMixin._check_port_with_retry", return_value = False),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_binary", return_value = None),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.LOG") as mock_log,
+ ):
# Should not raise any exceptions and should log the appropriate debug message
await web_scraper._validate_chrome_version_configuration()
@@ -2077,16 +2146,15 @@ class TestWebScrapingMixinPortRetry:
return scraper
@pytest.mark.asyncio
- async def test_browser_connection_error_handling(
- self, scraper_with_remote_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
- ) -> None:
+ async def test_browser_connection_error_handling(self, scraper_with_remote_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test error handling when browser connection fails."""
- with patch("os.path.exists", return_value = True), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to connect as root user")), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
-
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to connect as root user")),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class,
+ ):
mock_config = Mock()
mock_config_class.return_value = mock_config
@@ -2098,15 +2166,16 @@ class TestWebScrapingMixinPortRetry:
@pytest.mark.asyncio
async def test_browser_connection_error_handling_non_root_error(
- self, scraper_with_remote_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ self, scraper_with_remote_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
) -> None:
"""Test error handling when browser connection fails with non-root error."""
- with patch("os.path.exists", return_value = True), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Connection timeout")), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
-
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Connection timeout")),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class,
+ ):
mock_config = Mock()
mock_config_class.return_value = mock_config
@@ -2125,15 +2194,14 @@ class TestWebScrapingMixinPortRetry:
return scraper
@pytest.mark.asyncio
- async def test_browser_startup_error_handling_root_error(
- self, scraper_with_startup_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
- ) -> None:
+ async def test_browser_startup_error_handling_root_error(self, scraper_with_startup_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test error handling when browser startup fails with root error."""
- with patch("os.path.exists", return_value = True), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to start as root user")), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
-
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to start as root user")),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class,
+ ):
mock_config = Mock()
mock_config_class.return_value = mock_config
@@ -2144,15 +2212,14 @@ class TestWebScrapingMixinPortRetry:
assert "Failed to start browser. This error often occurs when:" in caplog.text
@pytest.mark.asyncio
- async def test_browser_startup_error_handling_non_root_error(
- self, scraper_with_startup_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
- ) -> None:
+ async def test_browser_startup_error_handling_non_root_error(self, scraper_with_startup_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
"""Test error handling when browser startup fails with non-root error."""
- with patch("os.path.exists", return_value = True), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Browser binary not found")), \
- patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
-
+ with (
+ patch("os.path.exists", return_value = True),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Browser binary not found")),
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class,
+ ):
mock_config = Mock()
mock_config_class.return_value = mock_config
@@ -2207,19 +2274,18 @@ class TestWebScrapingMixinProfileHandling:
scraper.browser_config.profile_name = "TestProfile"
return scraper
- def test_profile_directory_creation_with_user_data_dir(
- self, scraper_with_profile_config:WebScrapingMixin, tmp_path:Path
- ) -> None:
+ def test_profile_directory_creation_with_user_data_dir(self, scraper_with_profile_config:WebScrapingMixin, tmp_path:Path) -> None:
"""Test profile directory creation when user_data_dir is configured."""
test_dir = str(tmp_path / "test-profile")
scraper_with_profile_config.browser_config.user_data_dir = test_dir
- with patch("os.path.join", return_value = os.path.join(test_dir, "TestProfile")), \
- patch("os.makedirs") as mock_makedirs, \
- patch("os.path.exists", return_value = False), \
- patch("builtins.open", mock_open()), \
- patch("json.dump"):
-
+ with (
+ patch("os.path.join", return_value = os.path.join(test_dir, "TestProfile")),
+ patch("os.makedirs") as mock_makedirs,
+ patch("os.path.exists", return_value = False),
+ patch("builtins.open", mock_open()),
+ patch("json.dump"),
+ ):
# This would be called during browser session creation
profile_dir = os.path.join(test_dir, "TestProfile")
mock_makedirs.assert_not_called() # Not called yet
@@ -2228,18 +2294,17 @@ class TestWebScrapingMixinProfileHandling:
os.makedirs(profile_dir, exist_ok = True)
mock_makedirs.assert_called_with(profile_dir, exist_ok = True)
- def test_profile_directory_creation_with_preferences_file(
- self, scraper_with_profile_config:WebScrapingMixin, tmp_path:Path
- ) -> None:
+ def test_profile_directory_creation_with_preferences_file(self, scraper_with_profile_config:WebScrapingMixin, tmp_path:Path) -> None:
"""Test profile directory creation with preferences file when it doesn't exist."""
test_dir = str(tmp_path / "test-profile")
scraper_with_profile_config.browser_config.user_data_dir = test_dir
- with patch("os.makedirs") as mock_makedirs, \
- patch("os.path.exists", return_value = False), \
- patch("builtins.open", mock_open()) as mock_file, \
- patch("json.dump") as mock_json_dump:
-
+ with (
+ patch("os.makedirs") as mock_makedirs,
+ patch("os.path.exists", return_value = False),
+ patch("builtins.open", mock_open()) as mock_file,
+ patch("json.dump") as mock_json_dump,
+ ):
# Simulate the profile creation logic
profile_dir = os.path.join(test_dir, "TestProfile")
prefs_file = os.path.join(profile_dir, "Preferences")
@@ -2255,18 +2320,17 @@ class TestWebScrapingMixinProfileHandling:
mock_file.assert_called_with(prefs_file, "w", encoding = "UTF-8")
mock_json_dump.assert_called()
- def test_profile_directory_creation_with_existing_preferences_file(
- self, scraper_with_profile_config:WebScrapingMixin, tmp_path:Path
- ) -> None:
+ def test_profile_directory_creation_with_existing_preferences_file(self, scraper_with_profile_config:WebScrapingMixin, tmp_path:Path) -> None:
"""Test profile directory creation when preferences file already exists."""
test_dir = str(tmp_path / "test-profile")
scraper_with_profile_config.browser_config.user_data_dir = test_dir
- with patch("os.makedirs") as mock_makedirs, \
- patch("os.path.exists", return_value = True), \
- patch("builtins.open", mock_open()) as mock_file, \
- patch("json.dump") as mock_json_dump:
-
+ with (
+ patch("os.makedirs") as mock_makedirs,
+ patch("os.path.exists", return_value = True),
+ patch("builtins.open", mock_open()) as mock_file,
+ patch("json.dump") as mock_json_dump,
+ ):
# Simulate the profile creation logic
profile_dir = os.path.join(test_dir, "TestProfile")
@@ -2278,20 +2342,19 @@ class TestWebScrapingMixinProfileHandling:
mock_file.assert_not_called()
mock_json_dump.assert_not_called()
- def test_profile_directory_creation_with_edge_browser(
- self, scraper_with_profile_config:WebScrapingMixin, tmp_path:Path
- ) -> None:
+ def test_profile_directory_creation_with_edge_browser(self, scraper_with_profile_config:WebScrapingMixin, tmp_path:Path) -> None:
"""Test profile directory creation with Edge browser configuration."""
test_dir = str(tmp_path / "test-profile")
scraper_with_profile_config.browser_config.user_data_dir = test_dir
scraper_with_profile_config.browser_config.binary_location = "/usr/bin/microsoft-edge"
- with patch("os.makedirs") as mock_makedirs, \
- patch("os.path.exists", return_value = False), \
- patch("builtins.open", mock_open()), \
- patch("json.dump"), \
- patch("os.environ", {"MSEDGEDRIVER_TELEMETRY_OPTOUT": "1"}):
-
+ with (
+ patch("os.makedirs") as mock_makedirs,
+ patch("os.path.exists", return_value = False),
+ patch("builtins.open", mock_open()),
+ patch("json.dump"),
+ patch("os.environ", {"MSEDGEDRIVER_TELEMETRY_OPTOUT": "1"}),
+ ):
# Simulate the profile creation logic
profile_dir = os.path.join(test_dir, "TestProfile")
@@ -2299,19 +2362,13 @@ class TestWebScrapingMixinProfileHandling:
os.makedirs(profile_dir, exist_ok = True)
mock_makedirs.assert_called_with(profile_dir, exist_ok = True)
- def test_profile_directory_creation_with_private_window(
- self, scraper_with_profile_config:WebScrapingMixin, tmp_path:Path
- ) -> None:
+ def test_profile_directory_creation_with_private_window(self, scraper_with_profile_config:WebScrapingMixin, tmp_path:Path) -> None:
"""Test profile directory creation with private window configuration."""
test_dir = str(tmp_path / "test-profile")
scraper_with_profile_config.browser_config.user_data_dir = test_dir
scraper_with_profile_config.browser_config.use_private_window = True
- with patch("os.makedirs") as mock_makedirs, \
- patch("os.path.exists", return_value = False), \
- patch("builtins.open", mock_open()), \
- patch("json.dump"):
-
+ with patch("os.makedirs") as mock_makedirs, patch("os.path.exists", return_value = False), patch("builtins.open", mock_open()), patch("json.dump"):
# Simulate the profile creation logic
profile_dir = os.path.join(test_dir, "TestProfile")
@@ -2319,16 +2376,12 @@ class TestWebScrapingMixinProfileHandling:
os.makedirs(profile_dir, exist_ok = True)
mock_makedirs.assert_called_with(profile_dir, exist_ok = True)
- def test_profile_directory_creation_without_user_data_dir(
- self, scraper_with_profile_config:WebScrapingMixin
- ) -> None:
+ def test_profile_directory_creation_without_user_data_dir(self, scraper_with_profile_config:WebScrapingMixin) -> None:
"""Test profile directory handling when user_data_dir is not configured."""
scraper_with_profile_config.browser_config.user_data_dir = None
# Should not create profile directories when user_data_dir is None
- with patch("os.path.join") as mock_join, \
- patch("os.makedirs") as mock_makedirs:
-
+ with patch("os.path.join") as mock_join, patch("os.makedirs") as mock_makedirs:
# The profile creation logic should not be called
mock_join.assert_not_called()
mock_makedirs.assert_not_called()