feat: capture publish failure diagnostics with screenshot and logs (#802)

This commit is contained in:
Jens
2026-02-01 08:17:14 +01:00
committed by GitHub
parent 96f465d5bc
commit b3d5a4b228
10 changed files with 795 additions and 57 deletions

View File

@@ -30,6 +30,7 @@ reviews:
in_progress_fortune: false
poem: false
# Path filters to focus on important files
path_filters:
# Source code
@@ -88,6 +89,7 @@ reviews:
13. Use type hints for all function parameters and return values
14. Use structured logging with context and appropriate log levels.
15. Log message strings should be plain English without `_()` (TranslatingLogger handles translation); wrap non-log user-facing strings with `_()` and add translations
16. NEVER flag PEP 8 whitespace/spacing issues (autopep8 handles these automatically via pdm run format)
- path: "tests/**/*.py"
instructions: |
TESTING RULES:
@@ -106,6 +108,7 @@ reviews:
13. Focus on minimal health checks, not full user workflows
14. Include SPDX license headers
15. Use descriptive test names in English
16. NEVER flag PEP 8 whitespace/spacing issues (autopep8 handles these automatically via pdm run format)
- path: "scripts/**/*.py"
instructions: |
SCRIPT RULES:

View File

@@ -220,15 +220,27 @@ login:
### diagnostics
Diagnostics configuration for troubleshooting login detection issues.
Diagnostics configuration for troubleshooting login detection issues and publish failures.
```yaml
diagnostics:
login_detection_capture: false # Capture screenshot + HTML when login state is UNKNOWN
capture_on:
login_detection: false # Capture screenshot + HTML when login state is UNKNOWN
publish: false # Capture screenshot + HTML + JSON on each failed publish attempt (timeouts/protocol errors)
capture_log_copy: false # Copy entire bot log file when diagnostics are captured (may duplicate log content)
pause_on_login_detection_failure: false # Pause for manual inspection (interactive only)
output_dir: "" # Custom output directory (default: portable .temp/diagnostics, xdg cache/diagnostics)
output_dir: "" # Custom output directory (see "Output locations (default)" below)
```
**Migration Note:**
Old diagnostics keys have been renamed/moved. Update configs and CI/automation accordingly:
- `login_detection_capture``capture_on.login_detection`
- `publish_error_capture``capture_on.publish`
`capture_log_copy` is a new top-level flag. It may copy the same log multiple times during a single run if multiple diagnostic events are triggered.
**Login Detection Behavior:**
The bot uses a layered approach to detect login state, prioritizing stealth over reliability:
@@ -249,8 +261,12 @@ The bot uses a layered approach to detect login state, prioritizing stealth over
**Optional diagnostics:**
- Enable `login_detection_capture` to capture screenshots and HTML dumps when state is `UNKNOWN`
- Enable `pause_on_login_detection_failure` to pause the bot for manual inspection (interactive sessions only; requires `login_detection_capture=true`)
- Enable `capture_on.login_detection` to capture screenshots and HTML dumps when state is `UNKNOWN`
- Enable `capture_on.publish` to capture screenshots, HTML dumps, and JSON payloads for each failed publish attempt (e.g., attempts 13).
- Enable `capture_log_copy` to copy the entire bot log file when a diagnostic event triggers (e.g., `capture_on.publish` or `capture_on.login_detection`):
- If multiple diagnostics trigger in the same run, the log will be copied multiple times
- Review or redact artifacts before sharing publicly
- Enable `pause_on_login_detection_failure` to pause the bot for manual inspection in interactive sessions. This requires `capture_on.login_detection=true`; if this is not enabled, the runtime will fail startup with a validation error.
- Use custom `output_dir` to specify where artifacts are saved
**Output locations (default):**
@@ -259,7 +275,7 @@ The bot uses a layered approach to detect login state, prioritizing stealth over
- **System-wide mode (XDG)**: `~/.cache/kleinanzeigen-bot/diagnostics/` (Linux) or `~/Library/Caches/kleinanzeigen-bot/diagnostics/` (macOS)
- **Custom**: Path resolved relative to your `config.yaml` if `output_dir` is specified
> **⚠️ PII Warning:** HTML dumps may contain your account email or other personally identifiable information. Review files in the diagnostics output directory before sharing them publicly.
> **⚠️ PII Warning:** HTML dumps, JSON payloads, and log copies may contain PII. Typical examples include account email, ad titles/descriptions, contact info, and prices. Log copies are produced by `capture_log_copy` when diagnostics capture runs, such as `capture_on.publish` or `capture_on.login_detection`. Review or redact these artifacts before sharing them publicly.
## Installation Modes

View File

@@ -263,6 +263,25 @@
"title": "CaptchaConfig",
"type": "object"
},
"CaptureOnConfig": {
"description": "Configuration for which operations should trigger diagnostics capture.",
"properties": {
"login_detection": {
"default": false,
"description": "Capture screenshot and HTML when login state detection fails",
"title": "Login Detection",
"type": "boolean"
},
"publish": {
"default": false,
"description": "Capture screenshot, HTML, and JSON on publish failures",
"title": "Publish",
"type": "boolean"
}
},
"title": "CaptureOnConfig",
"type": "object"
},
"ContactDefaults": {
"properties": {
"name": {
@@ -369,15 +388,19 @@
},
"DiagnosticsConfig": {
"properties": {
"login_detection_capture": {
"capture_on": {
"$ref": "#/$defs/CaptureOnConfig",
"description": "Enable diagnostics capture for specific operations."
},
"capture_log_copy": {
"default": false,
"description": "If true, capture diagnostics artifacts (screenshot + HTML) when login detection returns UNKNOWN.",
"title": "Login Detection Capture",
"description": "If true, copy the entire bot log file when diagnostics are captured (may duplicate log content).",
"title": "Capture Log Copy",
"type": "boolean"
},
"pause_on_login_detection_failure": {
"default": false,
"description": "If true, pause (interactive runs only) after capturing login detection diagnostics so that user can inspect the browser. Requires login_detection_capture to be enabled.",
"description": "If true, pause (interactive runs only) after capturing login detection diagnostics so that user can inspect the browser. Requires capture_on.login_detection to be enabled.",
"title": "Pause On Login Detection Failure",
"type": "boolean"
},
@@ -520,7 +543,7 @@
},
"email_verification": {
"default": 4.0,
"description": "Timeout for eMail verification prompts.",
"description": "Timeout for email verification prompts.",
"minimum": 0.1,
"title": "Email Verification",
"type": "number"

View File

@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import atexit, asyncio, enum, json, os, re, secrets, signal, sys, textwrap # isort: skip
import atexit, asyncio, enum, json, os, re, signal, sys, textwrap # isort: skip
import getopt # pylint: disable=deprecated-module
import urllib.parse as urllib_parse
from datetime import datetime
@@ -19,7 +19,7 @@ from ._version import __version__
from .model.ad_model import MAX_DESCRIPTION_LENGTH, Ad, AdPartial, Contact, calculate_auto_price
from .model.config_model import Config
from .update_checker import UpdateChecker
from .utils import dicts, error_handlers, loggers, misc, xdg_paths
from .utils import diagnostics, dicts, error_handlers, loggers, misc, xdg_paths
from .utils.exceptions import CaptchaEncountered
from .utils.files import abspath
from .utils.i18n import Locale, get_current_locale, pluralize, set_current_locale
@@ -31,6 +31,8 @@ from .utils.web_scraping_mixin import By, Element, Is, WebScrapingMixin
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
LOG.setLevel(loggers.INFO)
PUBLISH_MAX_RETRIES:Final[int] = 3
colorama.just_fix_windows_console()
@@ -961,8 +963,8 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
return (Path.cwd() / ".temp" / "diagnostics").resolve()
async def _capture_login_detection_diagnostics_if_enabled(self) -> None:
diagnostics = getattr(self.config, "diagnostics", None)
if diagnostics is None or not diagnostics.login_detection_capture:
cfg = getattr(self.config, "diagnostics", None)
if cfg is None or not cfg.capture_on.login_detection:
return
if self._login_detection_diagnostics_captured:
@@ -975,35 +977,79 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
self._login_detection_diagnostics_captured = True
try:
out_dir = self._diagnostics_output_dir()
out_dir.mkdir(parents = True, exist_ok = True)
# Intentionally no username/PII in filename.
ts = misc.now().strftime("%Y%m%dT%H%M%S")
suffix = secrets.token_hex(4)
base = f"login_detection_unknown_{ts}_{suffix}"
screenshot_path = out_dir / f"{base}.png"
html_path = out_dir / f"{base}.html"
try:
await page.save_screenshot(str(screenshot_path))
except Exception as exc: # noqa: BLE001
LOG.debug("Login diagnostics screenshot capture failed: %s", exc)
try:
html = await page.get_content()
html_path.write_text(html, encoding = "utf-8")
except Exception as exc: # noqa: BLE001
LOG.debug("Login diagnostics HTML capture failed: %s", exc)
await diagnostics.capture_diagnostics(
output_dir = self._diagnostics_output_dir(),
base_prefix = "login_detection_unknown",
page = page,
)
except Exception as exc: # noqa: BLE001
LOG.debug("Login diagnostics capture failed: %s", exc)
LOG.debug(
"Login diagnostics capture failed (output_dir=%s, base_prefix=%s): %s",
self._diagnostics_output_dir(),
"login_detection_unknown",
exc,
)
if getattr(diagnostics, "pause_on_login_detection_failure", False) and getattr(sys.stdin, "isatty", lambda: False)():
if cfg.pause_on_login_detection_failure and getattr(sys.stdin, "isatty", lambda: False)():
LOG.warning("############################################")
LOG.warning("# Login detection returned UNKNOWN. Browser is paused for manual inspection.")
LOG.warning("############################################")
await ainput(_("Press a key to continue..."))
async def _capture_publish_error_diagnostics_if_enabled(
self,
ad_cfg:Ad,
ad_cfg_orig:dict[str, Any],
ad_file:str,
attempt:int,
exc:Exception,
) -> None:
"""Capture publish failure diagnostics when enabled and a page is available.
Runs only if cfg.capture_on.publish is enabled and self.page is set.
Uses the ad configuration and publish attempt details to write screenshot, HTML,
JSON payload, and optional log copy for debugging.
"""
cfg = getattr(self.config, "diagnostics", None)
if cfg is None or not cfg.capture_on.publish:
return
page = getattr(self, "page", None)
if page is None:
return
# Use the ad filename (without extension) as identifier
ad_file_stem = Path(ad_file).stem
json_payload = {
"timestamp": misc.now().isoformat(timespec = "seconds"),
"attempt": attempt,
"page_url": getattr(page, "url", None),
"exception": {
"type": exc.__class__.__name__,
"message": str(exc),
"repr": repr(exc),
},
"ad_file": ad_file,
"ad_title": ad_cfg.title,
"ad_config_effective": ad_cfg.model_dump(mode = "json"),
"ad_config_original": ad_cfg_orig,
}
try:
await diagnostics.capture_diagnostics(
output_dir = self._diagnostics_output_dir(),
base_prefix = "publish_error",
attempt = attempt,
subject = ad_file_stem,
page = page,
json_payload = json_payload,
log_file_path = self.log_file_path,
copy_log = cfg.capture_log_copy,
)
except Exception as error: # noqa: BLE001
LOG.warning("Diagnostics capture failed during publish error handling: %s", error)
async def is_logged_in(self, *, include_probe:bool = True) -> bool:
# Use login_detection timeout (10s default) instead of default (5s)
# to allow sufficient time for client-side JavaScript rendering after page load.
@@ -1298,7 +1344,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
async def publish_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None:
count = 0
failed_count = 0
max_retries = 3
max_retries = PUBLISH_MAX_RETRIES
published_ads = await self._fetch_published_ads()
@@ -1321,6 +1367,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
except asyncio.CancelledError:
raise # Respect task cancellation
except (TimeoutError, ProtocolException) as ex:
await self._capture_publish_error_diagnostics_if_enabled(ad_cfg, ad_cfg_orig, ad_file, attempt, ex)
if attempt < max_retries:
LOG.warning("Attempt %s/%s failed for '%s': %s. Retrying...", attempt, max_retries, ad_cfg.title, ex)
await self.web_sleep(2) # Wait before retry

View File

@@ -11,10 +11,12 @@ from pydantic import AfterValidator, Field, model_validator
from typing_extensions import deprecated
from kleinanzeigen_bot.model.update_check_model import UpdateCheckConfig
from kleinanzeigen_bot.utils import dicts
from kleinanzeigen_bot.utils import dicts, loggers
from kleinanzeigen_bot.utils.misc import get_attr
from kleinanzeigen_bot.utils.pydantics import ContextualModel
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
_MAX_PERCENTAGE:Final[int] = 100
@@ -195,25 +197,73 @@ class TimeoutConfig(ContextualModel):
return base * self.multiplier * backoff
class DiagnosticsConfig(ContextualModel):
login_detection_capture:bool = Field(
class CaptureOnConfig(ContextualModel):
"""Configuration for which operations should trigger diagnostics capture."""
login_detection:bool = Field(
default = False,
description = "If true, capture diagnostics artifacts (screenshot + HTML) when login detection returns UNKNOWN.",
description = "Capture screenshot and HTML when login state detection fails",
)
publish:bool = Field(
default = False,
description = "Capture screenshot, HTML, and JSON on publish failures",
)
class DiagnosticsConfig(ContextualModel):
capture_on:CaptureOnConfig = Field(
default_factory = CaptureOnConfig,
description = "Enable diagnostics capture for specific operations.",
)
capture_log_copy:bool = Field(
default = False,
description = "If true, copy the entire bot log file when diagnostics are captured (may duplicate log content).",
)
pause_on_login_detection_failure:bool = Field(
default = False,
description = "If true, pause (interactive runs only) after capturing login detection diagnostics "
"so that user can inspect the browser. Requires login_detection_capture to be enabled.",
"so that user can inspect the browser. Requires capture_on.login_detection to be enabled.",
)
output_dir:str | None = Field(
default = None,
description = "Optional output directory for diagnostics artifacts. If omitted, a safe default is used based on installation mode.",
)
@model_validator(mode = "before")
@classmethod
def migrate_legacy_diagnostics_keys(cls, data:dict[str, Any]) -> dict[str, Any]:
"""Migrate legacy login_detection_capture and publish_error_capture keys."""
# Migrate legacy login_detection_capture -> capture_on.login_detection
# Only migrate if the new key is not already explicitly set
if "login_detection_capture" in data:
LOG.warning("Deprecated: 'login_detection_capture' is replaced by 'capture_on.login_detection'. Please update your config.")
if "capture_on" not in data or data["capture_on"] is None:
data["capture_on"] = {}
if isinstance(data["capture_on"], dict) and "login_detection" not in data["capture_on"]:
data["capture_on"]["login_detection"] = data.pop("login_detection_capture")
else:
# Remove legacy key but don't overwrite explicit new value
data.pop("login_detection_capture")
# Migrate legacy publish_error_capture -> capture_on.publish
# Only migrate if the new key is not already explicitly set
if "publish_error_capture" in data:
LOG.warning("Deprecated: 'publish_error_capture' is replaced by 'capture_on.publish'. Please update your config.")
if "capture_on" not in data or data["capture_on"] is None:
data["capture_on"] = {}
if isinstance(data["capture_on"], dict) and "publish" not in data["capture_on"]:
data["capture_on"]["publish"] = data.pop("publish_error_capture")
else:
# Remove legacy key but don't overwrite explicit new value
data.pop("publish_error_capture")
return data
@model_validator(mode = "after")
def _validate_pause_requires_capture(self) -> "DiagnosticsConfig":
if self.pause_on_login_detection_failure and not self.login_detection_capture:
raise ValueError(_("pause_on_login_detection_failure requires login_detection_capture to be enabled"))
if self.pause_on_login_detection_failure and not self.capture_on.login_detection:
raise ValueError(_("pause_on_login_detection_failure requires capture_on.login_detection to be enabled"))
return self

View File

@@ -75,6 +75,9 @@ kleinanzeigen_bot/__init__.py:
"# Login detection returned UNKNOWN. Browser is paused for manual inspection.": "# Login-Erkennung ergab UNKNOWN. Browser ist zur manuellen Prüfung angehalten."
"Press a key to continue...": "Eine Taste drücken, um fortzufahren..."
_capture_publish_error_diagnostics_if_enabled:
"Diagnostics capture failed during publish error handling: %s": "Diagnose-Erfassung fehlgeschlagen während der Veröffentlichung-Fehlerbehandlung: %s"
login:
"Checking if already logged in...": "Überprüfe, ob bereits eingeloggt..."
"Current page URL after opening homepage: %s": "Aktuelle Seiten-URL nach dem Öffnen der Startseite: %s"
@@ -619,10 +622,13 @@ kleinanzeigen_bot/model/config_model.py:
"amount must be specified when auto_price_reduction is enabled": "amount muss angegeben werden, wenn auto_price_reduction aktiviert ist"
"min_price must be specified when auto_price_reduction is enabled": "min_price muss angegeben werden, wenn auto_price_reduction aktiviert ist"
"Percentage reduction amount must not exceed %s": "Prozentuale Reduktionsmenge darf %s nicht überschreiten"
migrate_legacy_diagnostics_keys:
"Deprecated: 'login_detection_capture' is replaced by 'capture_on.login_detection'. Please update your config.": "Veraltet: 'login_detection_capture' wurde durch 'capture_on.login_detection' ersetzt. Bitte aktualisieren Sie Ihre Konfiguration."
"Deprecated: 'publish_error_capture' is replaced by 'capture_on.publish'. Please update your config.": "Veraltet: 'publish_error_capture' wurde durch 'capture_on.publish' ersetzt. Bitte aktualisieren Sie Ihre Konfiguration."
_validate_glob_pattern:
"must be a non-empty, non-blank glob pattern": "muss ein nicht-leeres Glob-Muster sein"
_validate_pause_requires_capture:
"pause_on_login_detection_failure requires login_detection_capture to be enabled": "pause_on_login_detection_failure erfordert, dass login_detection_capture aktiviert ist"
"pause_on_login_detection_failure requires capture_on.login_detection to be enabled": "pause_on_login_detection_failure erfordert, dass capture_on.login_detection aktiviert ist"
#################################################
kleinanzeigen_bot/model/ad_model.py:
@@ -656,6 +662,21 @@ kleinanzeigen_bot/model/update_check_state.py:
"Invalid interval format or unsupported unit: %s. Using default interval for this run.": "Ungültiges Intervallformat oder nicht unterstützte Einheit: %s. Es wird das Standardintervall für diesen Durchlauf verwendet."
"Negative interval: %s. Minimum interval is 1d. Using default interval for this run.": "Negatives Intervall: %s. Das Mindestintervall beträgt 1 Tag. Es wird das Standardintervall für diesen Durchlauf verwendet."
#################################################
kleinanzeigen_bot/utils/diagnostics.py:
#################################################
_copy_log_sync:
"Log file not found for diagnostics copy: %s": "Logdatei nicht gefunden für Diagnosekopie: %s"
capture_diagnostics:
"Diagnostics screenshot capture failed: %s": "Diagnose-Screenshot-Erfassung fehlgeschlagen: %s"
"Diagnostics HTML capture failed: %s": "Diagnose-HTML-Erfassung fehlgeschlagen: %s"
"Diagnostics JSON capture failed: %s": "Diagnose-JSON-Erfassung fehlgeschlagen: %s"
"Diagnostics log copy failed: %s": "Diagnose-Log-Kopie fehlgeschlagen: %s"
"Diagnostics saved: %s": "Diagnosedaten gespeichert: %s"
"Diagnostics capture attempted but no artifacts were saved (all captures failed)": "Diagnoseerfassung versucht, aber keine Artefakte gespeichert (alle Erfassungen fehlgeschlagen)"
"Diagnostics capture failed: %s": "Diagnoseerfassung fehlgeschlagen: %s"
#################################################
kleinanzeigen_bot/utils/xdg_paths.py:
#################################################

View File

@@ -0,0 +1,135 @@
# SPDX-FileCopyrightText: © Jens Bergmann and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import asyncio, json, re, secrets, shutil # isort: skip
from pathlib import Path
from typing import Any, Final
from kleinanzeigen_bot.utils import loggers, misc
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
class CaptureResult:
"""Result of a diagnostics capture attempt."""
def __init__(self) -> None:
self.saved_artifacts:list[Path] = []
def add_saved(self, path:Path) -> None:
"""Add a successfully saved artifact."""
self.saved_artifacts.append(path)
def has_any(self) -> bool:
"""Check if any artifacts were saved."""
return bool(self.saved_artifacts)
def _write_json_sync(json_path:Path, json_payload:dict[str, Any]) -> None:
"""Synchronous helper to write JSON to file."""
with json_path.open("w", encoding = "utf-8") as handle:
json.dump(json_payload, handle, indent = 2, default = str)
handle.write("\n")
def _copy_log_sync(log_file_path:str, log_path:Path) -> bool:
"""Synchronous helper to copy log file. Returns True if copy succeeded."""
log_source = Path(log_file_path)
if not log_source.exists():
LOG.warning("Log file not found for diagnostics copy: %s", log_file_path)
return False
loggers.flush_all_handlers()
shutil.copy2(log_source, log_path)
return True
async def capture_diagnostics(
*,
output_dir:Path,
base_prefix:str,
attempt:int | None = None,
subject:str | None = None,
page:Any | None = None,
json_payload:dict[str, Any] | None = None,
log_file_path:str | None = None,
copy_log:bool = False,
) -> CaptureResult:
"""Capture diagnostics artifacts for a given operation.
Args:
output_dir: The output directory for diagnostics artifacts
base_prefix: Base filename prefix (e.g., 'login_detection_unknown', 'publish_error')
attempt: Optional attempt number for retry operations
subject: Optional subject identifier (e.g., ad token)
page: Optional page object with save_screenshot and get_content methods
json_payload: Optional JSON data to save
log_file_path: Optional log file path to copy
copy_log: Whether to copy log file
Returns:
CaptureResult containing the list of successfully saved artifacts
"""
result = CaptureResult()
try:
await asyncio.to_thread(output_dir.mkdir, parents = True, exist_ok = True)
ts = misc.now().strftime("%Y%m%dT%H%M%S")
suffix = secrets.token_hex(4)
base = f"{base_prefix}_{ts}_{suffix}"
if attempt is not None:
base = f"{base}_attempt{attempt}"
if subject:
safe_subject = re.sub(r"[^A-Za-z0-9_-]", "_", subject)
base = f"{base}_{safe_subject}"
screenshot_path = output_dir / f"{base}.png"
html_path = output_dir / f"{base}.html"
json_path = output_dir / f"{base}.json"
log_path = output_dir / f"{base}.log"
if page:
try:
await page.save_screenshot(str(screenshot_path))
result.add_saved(screenshot_path)
except Exception as exc: # noqa: BLE001
LOG.debug("Diagnostics screenshot capture failed: %s", exc)
try:
html = await page.get_content()
await asyncio.to_thread(html_path.write_text, html, encoding = "utf-8")
result.add_saved(html_path)
except Exception as exc: # noqa: BLE001
LOG.debug("Diagnostics HTML capture failed: %s", exc)
if json_payload is not None:
try:
await asyncio.to_thread(_write_json_sync, json_path, json_payload)
result.add_saved(json_path)
except Exception as exc: # noqa: BLE001
LOG.debug("Diagnostics JSON capture failed: %s", exc)
if copy_log and log_file_path:
try:
copy_succeeded = await asyncio.to_thread(_copy_log_sync, log_file_path, log_path)
if copy_succeeded:
result.add_saved(log_path)
except Exception as exc: # noqa: BLE001
LOG.debug("Diagnostics log copy failed: %s", exc)
# Determine if any capture was actually requested
capture_requested = page is not None or json_payload is not None or (copy_log and log_file_path)
if result.has_any():
artifacts_str = " ".join(map(str, result.saved_artifacts))
LOG.info("Diagnostics saved: %s", artifacts_str)
elif capture_requested:
LOG.warning("Diagnostics capture attempted but no artifacts were saved (all captures failed)")
else:
LOG.debug("No diagnostics capture requested")
except Exception as exc: # noqa: BLE001
LOG.debug("Diagnostics capture failed: %s", exc)
return result

View File

@@ -87,7 +87,7 @@ def test_timeout_config_resolve_falls_back_to_default() -> None:
def test_diagnostics_pause_requires_capture_validation() -> None:
"""
Unit: DiagnosticsConfig validator ensures pause_on_login_detection_failure
requires login_detection_capture to be enabled.
requires capture_on.login_detection to be enabled.
"""
minimal_cfg = {
"ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}},
@@ -95,12 +95,98 @@ def test_diagnostics_pause_requires_capture_validation() -> None:
"publishing": {"delete_old_ads": "BEFORE_PUBLISH", "delete_old_ads_by_title": False},
}
valid_cfg = {**minimal_cfg, "diagnostics": {"login_detection_capture": True, "pause_on_login_detection_failure": True}}
valid_cfg = {**minimal_cfg, "diagnostics": {"capture_on": {"login_detection": True}, "pause_on_login_detection_failure": True}}
config = Config.model_validate(valid_cfg)
assert config.diagnostics is not None
assert config.diagnostics.pause_on_login_detection_failure is True
assert config.diagnostics.login_detection_capture is True
assert config.diagnostics.capture_on.login_detection is True
invalid_cfg = {**minimal_cfg, "diagnostics": {"login_detection_capture": False, "pause_on_login_detection_failure": True}}
with pytest.raises(ValueError, match = "pause_on_login_detection_failure requires login_detection_capture to be enabled"):
invalid_cfg = {**minimal_cfg, "diagnostics": {"capture_on": {"login_detection": False}, "pause_on_login_detection_failure": True}}
with pytest.raises(ValueError, match = "pause_on_login_detection_failure requires capture_on.login_detection to be enabled"):
Config.model_validate(invalid_cfg)
def test_diagnostics_legacy_login_detection_capture_migration_when_capture_on_exists() -> None:
"""
Unit: Test that legacy login_detection_capture is removed but doesn't overwrite explicit capture_on.login_detection.
"""
minimal_cfg = {
"ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}},
"login": {"username": "dummy", "password": "dummy"}, # noqa: S105
}
# When capture_on.login_detection is explicitly set to False, legacy True should be ignored
cfg_with_explicit = {
**minimal_cfg,
"diagnostics": {
"login_detection_capture": True, # legacy key
"capture_on": {"login_detection": False}, # explicit new key set to False
},
}
config = Config.model_validate(cfg_with_explicit)
assert config.diagnostics is not None
assert config.diagnostics.capture_on.login_detection is False # explicit value preserved
def test_diagnostics_legacy_publish_error_capture_migration_when_capture_on_exists() -> None:
"""
Unit: Test that legacy publish_error_capture is removed but doesn't overwrite explicit capture_on.publish.
"""
minimal_cfg = {
"ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}},
"login": {"username": "dummy", "password": "dummy"}, # noqa: S105
}
# When capture_on.publish is explicitly set to False, legacy True should be ignored
cfg_with_explicit = {
**minimal_cfg,
"diagnostics": {
"publish_error_capture": True, # legacy key
"capture_on": {"publish": False}, # explicit new key set to False
},
}
config = Config.model_validate(cfg_with_explicit)
assert config.diagnostics is not None
assert config.diagnostics.capture_on.publish is False # explicit value preserved
def test_diagnostics_legacy_login_detection_capture_migration_when_capture_on_is_none() -> None:
"""
Unit: Test that legacy login_detection_capture is migrated when capture_on is None.
"""
minimal_cfg = {
"ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}},
"login": {"username": "dummy", "password": "dummy"}, # noqa: S105
}
cfg_with_null_capture_on = {
**minimal_cfg,
"diagnostics": {
"login_detection_capture": True, # legacy key
"capture_on": None, # capture_on is explicitly None
},
}
config = Config.model_validate(cfg_with_null_capture_on)
assert config.diagnostics is not None
assert config.diagnostics.capture_on.login_detection is True # legacy value migrated
def test_diagnostics_legacy_publish_error_capture_migration_when_capture_on_is_none() -> None:
"""
Unit: Test that legacy publish_error_capture is migrated when capture_on is None.
"""
minimal_cfg = {
"ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}},
"login": {"username": "dummy", "password": "dummy"}, # noqa: S105
}
cfg_with_null_capture_on = {
**minimal_cfg,
"diagnostics": {
"publish_error_capture": True, # legacy key
"capture_on": None, # capture_on is explicitly None
},
}
config = Config.model_validate(cfg_with_null_capture_on)
assert config.diagnostics is not None
assert config.diagnostics.capture_on.publish is True # legacy value migrated

View File

@@ -0,0 +1,224 @@
# SPDX-FileCopyrightText: © 2025 Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock
import pytest
from kleinanzeigen_bot.utils import diagnostics as diagnostics_module
from kleinanzeigen_bot.utils.diagnostics import capture_diagnostics
@pytest.mark.unit
class TestDiagnosticsCapture:
"""Tests for diagnostics capture functionality."""
@pytest.mark.asyncio
async def test_capture_diagnostics_creates_output_dir(self, tmp_path:Path) -> None:
"""Test that capture_diagnostics creates output directory."""
mock_page = AsyncMock()
output_dir = tmp_path / "diagnostics"
_ = await capture_diagnostics(
output_dir = output_dir,
base_prefix = "test",
page = mock_page,
)
# Verify directory was created
assert output_dir.exists()
@pytest.mark.asyncio
async def test_capture_diagnostics_creates_screenshot(self, tmp_path:Path) -> None:
"""Test that capture_diagnostics creates screenshot file."""
mock_page = AsyncMock()
mock_page.save_screenshot = AsyncMock()
output_dir = tmp_path / "diagnostics"
result = await capture_diagnostics(
output_dir = output_dir,
base_prefix = "test",
page = mock_page,
)
# Verify screenshot file was created and page method was called
assert len(result.saved_artifacts) == 1
assert result.saved_artifacts[0].suffix == ".png"
mock_page.save_screenshot.assert_awaited_once()
@pytest.mark.asyncio
async def test_capture_diagnostics_creates_html(self, tmp_path:Path) -> None:
"""Test that capture_diagnostics creates HTML file."""
mock_page = AsyncMock()
mock_page.get_content = AsyncMock(return_value = "<html></html>")
output_dir = tmp_path / "diagnostics"
result = await capture_diagnostics(
output_dir = output_dir,
base_prefix = "test",
page = mock_page,
)
# Verify HTML file was created along with screenshot
assert len(result.saved_artifacts) == 2
assert any(a.suffix == ".html" for a in result.saved_artifacts)
@pytest.mark.asyncio
async def test_capture_diagnostics_creates_json(self, tmp_path:Path) -> None:
"""Test that capture_diagnostics creates JSON file."""
mock_page = AsyncMock()
mock_page.get_content = AsyncMock(return_value = "<html></html>")
output_dir = tmp_path / "diagnostics"
result = await capture_diagnostics(
output_dir = output_dir,
base_prefix = "test",
page = mock_page,
json_payload = {"test": "data"},
)
# Verify JSON file was created along with HTML and screenshot
assert len(result.saved_artifacts) == 3
assert any(a.suffix == ".json" for a in result.saved_artifacts)
@pytest.mark.asyncio
async def test_capture_diagnostics_copies_log_file(self, tmp_path:Path) -> None:
"""Test that capture_diagnostics copies log file when enabled."""
log_file = tmp_path / "test.log"
log_file.write_text("test log content")
output_dir = tmp_path / "diagnostics"
result = await capture_diagnostics(
output_dir = output_dir,
base_prefix = "test",
page = None, # No page to avoid screenshot
log_file_path = str(log_file),
copy_log = True,
)
# Verify log was copied
assert len(result.saved_artifacts) == 1
assert result.saved_artifacts[0].suffix == ".log"
def test_copy_log_sync_returns_false_when_file_not_found(self, tmp_path:Path) -> None:
"""Test _copy_log_sync returns False when log file does not exist."""
non_existent_log = tmp_path / "non_existent.log"
log_path = tmp_path / "output.log"
result = diagnostics_module._copy_log_sync(str(non_existent_log), log_path)
assert result is False
assert not log_path.exists()
@pytest.mark.asyncio
async def test_capture_diagnostics_handles_screenshot_exception(self, tmp_path:Path, caplog:pytest.LogCaptureFixture) -> None:
"""Test that capture_diagnostics handles screenshot capture exceptions gracefully."""
mock_page = AsyncMock()
mock_page.save_screenshot = AsyncMock(side_effect = Exception("Screenshot failed"))
output_dir = tmp_path / "diagnostics"
result = await capture_diagnostics(
output_dir = output_dir,
base_prefix = "test",
page = mock_page,
)
# Verify no artifacts were saved due to exception
assert len(result.saved_artifacts) == 0
assert "Diagnostics screenshot capture failed" in caplog.text
@pytest.mark.asyncio
async def test_capture_diagnostics_handles_json_exception(self, tmp_path:Path, caplog:pytest.LogCaptureFixture, monkeypatch:pytest.MonkeyPatch) -> None:
"""Test that capture_diagnostics handles JSON write exceptions gracefully."""
mock_page = AsyncMock()
mock_page.get_content = AsyncMock(return_value = "<html></html>")
output_dir = tmp_path / "diagnostics"
# Mock _write_json_sync to raise an exception
monkeypatch.setattr(diagnostics_module, "_write_json_sync", MagicMock(side_effect = Exception("JSON write failed")))
result = await capture_diagnostics(
output_dir = output_dir,
base_prefix = "test",
page = mock_page,
json_payload = {"test": "data"},
)
# Verify screenshot and HTML were saved, but JSON failed
assert len(result.saved_artifacts) == 2
assert any(a.suffix == ".png" for a in result.saved_artifacts)
assert any(a.suffix == ".html" for a in result.saved_artifacts)
assert not any(a.suffix == ".json" for a in result.saved_artifacts)
assert "Diagnostics JSON capture failed" in caplog.text
@pytest.mark.asyncio
async def test_capture_diagnostics_handles_log_copy_exception(
self, tmp_path:Path, caplog:pytest.LogCaptureFixture, monkeypatch:pytest.MonkeyPatch
) -> None:
"""Test that capture_diagnostics handles log copy exceptions gracefully."""
# Create a log file
log_file = tmp_path / "test.log"
log_file.write_text("test log content")
output_dir = tmp_path / "diagnostics"
# Mock _copy_log_sync to raise an exception
original_copy_log = diagnostics_module._copy_log_sync
monkeypatch.setattr(diagnostics_module, "_copy_log_sync", MagicMock(side_effect = Exception("Copy failed")))
try:
result = await capture_diagnostics(
output_dir = output_dir,
base_prefix = "test",
page = None,
log_file_path = str(log_file),
copy_log = True,
)
# Verify no artifacts were saved due to exception
assert len(result.saved_artifacts) == 0
assert "Diagnostics log copy failed" in caplog.text
finally:
monkeypatch.setattr(diagnostics_module, "_copy_log_sync", original_copy_log)
@pytest.mark.asyncio
async def test_capture_diagnostics_logs_warning_when_all_captures_fail(
self, tmp_path:Path, caplog:pytest.LogCaptureFixture, monkeypatch:pytest.MonkeyPatch
) -> None:
"""Test warning is logged when capture is requested but all fail."""
mock_page = AsyncMock()
mock_page.save_screenshot = AsyncMock(side_effect = Exception("Screenshot failed"))
mock_page.get_content = AsyncMock(side_effect = Exception("HTML failed"))
# Mock JSON write to also fail
monkeypatch.setattr(diagnostics_module, "_write_json_sync", MagicMock(side_effect = Exception("JSON write failed")))
output_dir = tmp_path / "diagnostics"
result = await capture_diagnostics(
output_dir = output_dir,
base_prefix = "test",
page = mock_page,
json_payload = {"test": "data"},
)
# Verify no artifacts were saved
assert len(result.saved_artifacts) == 0
assert "Diagnostics capture attempted but no artifacts were saved" in caplog.text
@pytest.mark.asyncio
async def test_capture_diagnostics_logs_debug_when_no_capture_requested(self, tmp_path:Path, caplog:pytest.LogCaptureFixture) -> None:
"""Test debug is logged when no diagnostics capture is requested."""
output_dir = tmp_path / "diagnostics"
with caplog.at_level("DEBUG"):
_ = await capture_diagnostics(
output_dir = output_dir,
base_prefix = "test",
page = None,
json_payload = None,
copy_log = False,
)
assert "No diagnostics capture requested" in caplog.text

View File

@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: © Jens Bergmann and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import copy, io, json, logging, os, tempfile # isort: skip
import copy, fnmatch, io, json, logging, os, tempfile # isort: skip
from collections.abc import Generator
from contextlib import redirect_stdout
from datetime import timedelta
@@ -12,7 +12,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from pydantic import ValidationError
from kleinanzeigen_bot import LOG, AdUpdateStrategy, KleinanzeigenBot, LoginState, misc
from kleinanzeigen_bot import LOG, PUBLISH_MAX_RETRIES, AdUpdateStrategy, KleinanzeigenBot, LoginState, misc
from kleinanzeigen_bot._version import __version__
from kleinanzeigen_bot.model.ad_model import Ad
from kleinanzeigen_bot.model.config_model import AdDefaults, Config, DiagnosticsConfig, PublishingConfig
@@ -388,7 +388,7 @@ class TestKleinanzeigenBotAuthentication:
@pytest.mark.asyncio
async def test_get_login_state_unknown_captures_diagnostics_when_enabled(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None:
test_bot.config.diagnostics = DiagnosticsConfig.model_validate({"login_detection_capture": True, "output_dir": str(tmp_path)})
test_bot.config.diagnostics = DiagnosticsConfig.model_validate({"capture_on": {"login_detection": True}, "output_dir": str(tmp_path)})
page = MagicMock()
page.save_screenshot = AsyncMock()
@@ -406,7 +406,7 @@ class TestKleinanzeigenBotAuthentication:
@pytest.mark.asyncio
async def test_get_login_state_unknown_does_not_capture_diagnostics_when_disabled(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None:
test_bot.config.diagnostics = DiagnosticsConfig.model_validate({"login_detection_capture": False, "output_dir": str(tmp_path)})
test_bot.config.diagnostics = DiagnosticsConfig.model_validate({"capture_on": {"login_detection": False}, "output_dir": str(tmp_path)})
page = MagicMock()
page.save_screenshot = AsyncMock()
@@ -425,7 +425,7 @@ class TestKleinanzeigenBotAuthentication:
@pytest.mark.asyncio
async def test_get_login_state_unknown_pauses_for_inspection_when_enabled_and_interactive(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None:
test_bot.config.diagnostics = DiagnosticsConfig.model_validate(
{"login_detection_capture": True, "pause_on_login_detection_failure": True, "output_dir": str(tmp_path)}
{"capture_on": {"login_detection": True}, "pause_on_login_detection_failure": True, "output_dir": str(tmp_path)}
)
page = MagicMock()
@@ -453,7 +453,7 @@ class TestKleinanzeigenBotAuthentication:
@pytest.mark.asyncio
async def test_get_login_state_unknown_does_not_pause_when_non_interactive(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None:
test_bot.config.diagnostics = DiagnosticsConfig.model_validate(
{"login_detection_capture": True, "pause_on_login_detection_failure": True, "output_dir": str(tmp_path)}
{"capture_on": {"login_detection": True}, "pause_on_login_detection_failure": True, "output_dir": str(tmp_path)}
)
page = MagicMock()
@@ -624,6 +624,139 @@ class TestKleinanzeigenBotAuthentication:
assert mock_ainput.call_count == 0
class TestKleinanzeigenBotDiagnostics:
@pytest.fixture
def diagnostics_ad_config(self) -> dict[str, Any]:
return {
"active": True,
"type": "OFFER",
"title": "Test ad title",
"description": "Test description",
"category": "161/176/sonstige",
"price_type": "NEGOTIABLE",
"shipping_type": "PICKUP",
"sell_directly": False,
"contact": {
"name": "Tester",
"zipcode": "12345",
},
"republication_interval": 7,
}
@pytest.mark.unit
@pytest.mark.asyncio
async def test_publish_ads_captures_diagnostics_on_failures(
self,
test_bot:KleinanzeigenBot,
tmp_path:Path,
diagnostics_ad_config:dict[str, Any],
) -> None:
"""Ensure publish failures capture diagnostics artifacts."""
log_file_path = tmp_path / "test.log"
log_file_path.write_text("Test log content\n", encoding = "utf-8")
test_bot.log_file_path = str(log_file_path)
test_bot.config.diagnostics = DiagnosticsConfig.model_validate({"capture_on": {"publish": True}, "output_dir": str(tmp_path)})
page = MagicMock()
page.save_screenshot = AsyncMock()
page.get_content = AsyncMock(return_value = "<html></html>")
page.sleep = AsyncMock()
page.url = "https://example.com/fail"
test_bot.page = page
ad_cfg = Ad.model_validate(diagnostics_ad_config)
ad_cfg_orig = copy.deepcopy(diagnostics_ad_config)
ad_file = str(tmp_path / "ad_000001_Test.yml")
with (
patch.object(test_bot, "web_request", new_callable = AsyncMock, return_value = {"content": json.dumps({"ads": []})}),
patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("boom")),
):
await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)])
expected_retries = PUBLISH_MAX_RETRIES
assert page.save_screenshot.await_count == expected_retries
assert page.get_content.await_count == expected_retries
entries = os.listdir(tmp_path)
html_files = [name for name in entries if fnmatch.fnmatch(name, "publish_error_*_attempt*_ad_000001_Test.html")]
json_files = [name for name in entries if fnmatch.fnmatch(name, "publish_error_*_attempt*_ad_000001_Test.json")]
assert len(html_files) == expected_retries
assert len(json_files) == expected_retries
@pytest.mark.unit
@pytest.mark.asyncio
async def test_publish_ads_captures_log_copy_when_enabled(
self,
test_bot:KleinanzeigenBot,
tmp_path:Path,
diagnostics_ad_config:dict[str, Any],
) -> None:
"""Ensure publish failures copy log file when capture_log_copy is enabled."""
log_file_path = tmp_path / "test.log"
log_file_path.write_text("Test log content\n", encoding = "utf-8")
test_bot.log_file_path = str(log_file_path)
test_bot.config.diagnostics = DiagnosticsConfig.model_validate({"capture_on": {"publish": True}, "capture_log_copy": True, "output_dir": str(tmp_path)})
page = MagicMock()
page.save_screenshot = AsyncMock()
page.get_content = AsyncMock(return_value = "<html></html>")
page.sleep = AsyncMock()
page.url = "https://example.com/fail"
test_bot.page = page
ad_cfg = Ad.model_validate(diagnostics_ad_config)
ad_cfg_orig = copy.deepcopy(diagnostics_ad_config)
ad_file = str(tmp_path / "ad_000001_Test.yml")
with (
patch.object(test_bot, "web_request", new_callable = AsyncMock, return_value = {"content": json.dumps({"ads": []})}),
patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("boom")),
):
await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)])
entries = os.listdir(tmp_path)
log_files = [name for name in entries if fnmatch.fnmatch(name, "publish_error_*_attempt*_ad_000001_Test.log")]
assert len(log_files) == PUBLISH_MAX_RETRIES
@pytest.mark.unit
@pytest.mark.asyncio
async def test_publish_ads_does_not_capture_diagnostics_when_disabled(
self,
test_bot:KleinanzeigenBot,
tmp_path:Path,
diagnostics_ad_config:dict[str, Any],
) -> None:
"""Ensure diagnostics are not captured when disabled."""
test_bot.config.diagnostics = DiagnosticsConfig.model_validate({"capture_on": {"publish": False}, "output_dir": str(tmp_path)})
page = MagicMock()
page.save_screenshot = AsyncMock()
page.get_content = AsyncMock(return_value = "<html></html>")
page.sleep = AsyncMock()
page.url = "https://example.com/fail"
test_bot.page = page
ad_cfg = Ad.model_validate(diagnostics_ad_config)
ad_cfg_orig = copy.deepcopy(diagnostics_ad_config)
ad_file = str(tmp_path / "ad_000001_Test.yml")
with (
patch.object(test_bot, "web_request", new_callable = AsyncMock, return_value = {"content": json.dumps({"ads": []})}),
patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("boom")),
):
await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)])
page.save_screenshot.assert_not_called()
page.get_content.assert_not_called()
entries = os.listdir(tmp_path)
html_files = [name for name in entries if fnmatch.fnmatch(name, "publish_error_*_attempt*_ad_000001_Test.html")]
json_files = [name for name in entries if fnmatch.fnmatch(name, "publish_error_*_attempt*_ad_000001_Test.json")]
assert not html_files
assert not json_files
class TestKleinanzeigenBotLocalization:
"""Tests for localization and help text."""