diff --git a/.coderabbit.yaml b/.coderabbit.yaml index 64cc6dd..47ef52d 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -30,6 +30,7 @@ reviews: in_progress_fortune: false poem: false + # Path filters to focus on important files path_filters: # Source code @@ -88,6 +89,7 @@ reviews: 13. Use type hints for all function parameters and return values 14. Use structured logging with context and appropriate log levels. 15. Log message strings should be plain English without `_()` (TranslatingLogger handles translation); wrap non-log user-facing strings with `_()` and add translations + 16. NEVER flag PEP 8 whitespace/spacing issues (autopep8 handles these automatically via pdm run format) - path: "tests/**/*.py" instructions: | TESTING RULES: @@ -106,6 +108,7 @@ reviews: 13. Focus on minimal health checks, not full user workflows 14. Include SPDX license headers 15. Use descriptive test names in English + 16. NEVER flag PEP 8 whitespace/spacing issues (autopep8 handles these automatically via pdm run format) - path: "scripts/**/*.py" instructions: | SCRIPT RULES: diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 161c176..a6c53f4 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -220,15 +220,27 @@ login: ### diagnostics -Diagnostics configuration for troubleshooting login detection issues. +Diagnostics configuration for troubleshooting login detection issues and publish failures. ```yaml diagnostics: - login_detection_capture: false # Capture screenshot + HTML when login state is UNKNOWN + capture_on: + login_detection: false # Capture screenshot + HTML when login state is UNKNOWN + publish: false # Capture screenshot + HTML + JSON on each failed publish attempt (timeouts/protocol errors) + capture_log_copy: false # Copy entire bot log file when diagnostics are captured (may duplicate log content) pause_on_login_detection_failure: false # Pause for manual inspection (interactive only) - output_dir: "" # Custom output directory (default: portable .temp/diagnostics, xdg cache/diagnostics) + output_dir: "" # Custom output directory (see "Output locations (default)" below) ``` +**Migration Note:** + +Old diagnostics keys have been renamed/moved. Update configs and CI/automation accordingly: + +- `login_detection_capture` → `capture_on.login_detection` +- `publish_error_capture` → `capture_on.publish` + +`capture_log_copy` is a new top-level flag. It may copy the same log multiple times during a single run if multiple diagnostic events are triggered. + **Login Detection Behavior:** The bot uses a layered approach to detect login state, prioritizing stealth over reliability: @@ -249,8 +261,12 @@ The bot uses a layered approach to detect login state, prioritizing stealth over **Optional diagnostics:** -- Enable `login_detection_capture` to capture screenshots and HTML dumps when state is `UNKNOWN` -- Enable `pause_on_login_detection_failure` to pause the bot for manual inspection (interactive sessions only; requires `login_detection_capture=true`) +- Enable `capture_on.login_detection` to capture screenshots and HTML dumps when state is `UNKNOWN` +- Enable `capture_on.publish` to capture screenshots, HTML dumps, and JSON payloads for each failed publish attempt (e.g., attempts 1–3). +- Enable `capture_log_copy` to copy the entire bot log file when a diagnostic event triggers (e.g., `capture_on.publish` or `capture_on.login_detection`): + - If multiple diagnostics trigger in the same run, the log will be copied multiple times + - Review or redact artifacts before sharing publicly +- Enable `pause_on_login_detection_failure` to pause the bot for manual inspection in interactive sessions. This requires `capture_on.login_detection=true`; if this is not enabled, the runtime will fail startup with a validation error. - Use custom `output_dir` to specify where artifacts are saved **Output locations (default):** @@ -259,7 +275,7 @@ The bot uses a layered approach to detect login state, prioritizing stealth over - **System-wide mode (XDG)**: `~/.cache/kleinanzeigen-bot/diagnostics/` (Linux) or `~/Library/Caches/kleinanzeigen-bot/diagnostics/` (macOS) - **Custom**: Path resolved relative to your `config.yaml` if `output_dir` is specified -> **⚠️ PII Warning:** HTML dumps may contain your account email or other personally identifiable information. Review files in the diagnostics output directory before sharing them publicly. +> **⚠️ PII Warning:** HTML dumps, JSON payloads, and log copies may contain PII. Typical examples include account email, ad titles/descriptions, contact info, and prices. Log copies are produced by `capture_log_copy` when diagnostics capture runs, such as `capture_on.publish` or `capture_on.login_detection`. Review or redact these artifacts before sharing them publicly. ## Installation Modes diff --git a/schemas/config.schema.json b/schemas/config.schema.json index 573dda4..13aeac0 100644 --- a/schemas/config.schema.json +++ b/schemas/config.schema.json @@ -263,6 +263,25 @@ "title": "CaptchaConfig", "type": "object" }, + "CaptureOnConfig": { + "description": "Configuration for which operations should trigger diagnostics capture.", + "properties": { + "login_detection": { + "default": false, + "description": "Capture screenshot and HTML when login state detection fails", + "title": "Login Detection", + "type": "boolean" + }, + "publish": { + "default": false, + "description": "Capture screenshot, HTML, and JSON on publish failures", + "title": "Publish", + "type": "boolean" + } + }, + "title": "CaptureOnConfig", + "type": "object" + }, "ContactDefaults": { "properties": { "name": { @@ -369,15 +388,19 @@ }, "DiagnosticsConfig": { "properties": { - "login_detection_capture": { + "capture_on": { + "$ref": "#/$defs/CaptureOnConfig", + "description": "Enable diagnostics capture for specific operations." + }, + "capture_log_copy": { "default": false, - "description": "If true, capture diagnostics artifacts (screenshot + HTML) when login detection returns UNKNOWN.", - "title": "Login Detection Capture", + "description": "If true, copy the entire bot log file when diagnostics are captured (may duplicate log content).", + "title": "Capture Log Copy", "type": "boolean" }, "pause_on_login_detection_failure": { "default": false, - "description": "If true, pause (interactive runs only) after capturing login detection diagnostics so that user can inspect the browser. Requires login_detection_capture to be enabled.", + "description": "If true, pause (interactive runs only) after capturing login detection diagnostics so that user can inspect the browser. Requires capture_on.login_detection to be enabled.", "title": "Pause On Login Detection Failure", "type": "boolean" }, @@ -520,7 +543,7 @@ }, "email_verification": { "default": 4.0, - "description": "Timeout for eMail verification prompts.", + "description": "Timeout for email verification prompts.", "minimum": 0.1, "title": "Email Verification", "type": "number" diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py index bc3044d..89df8bf 100644 --- a/src/kleinanzeigen_bot/__init__.py +++ b/src/kleinanzeigen_bot/__init__.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: © Sebastian Thomschke and contributors # SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ -import atexit, asyncio, enum, json, os, re, secrets, signal, sys, textwrap # isort: skip +import atexit, asyncio, enum, json, os, re, signal, sys, textwrap # isort: skip import getopt # pylint: disable=deprecated-module import urllib.parse as urllib_parse from datetime import datetime @@ -19,7 +19,7 @@ from ._version import __version__ from .model.ad_model import MAX_DESCRIPTION_LENGTH, Ad, AdPartial, Contact, calculate_auto_price from .model.config_model import Config from .update_checker import UpdateChecker -from .utils import dicts, error_handlers, loggers, misc, xdg_paths +from .utils import diagnostics, dicts, error_handlers, loggers, misc, xdg_paths from .utils.exceptions import CaptchaEncountered from .utils.files import abspath from .utils.i18n import Locale, get_current_locale, pluralize, set_current_locale @@ -31,6 +31,8 @@ from .utils.web_scraping_mixin import By, Element, Is, WebScrapingMixin LOG:Final[loggers.Logger] = loggers.get_logger(__name__) LOG.setLevel(loggers.INFO) +PUBLISH_MAX_RETRIES:Final[int] = 3 + colorama.just_fix_windows_console() @@ -961,8 +963,8 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 return (Path.cwd() / ".temp" / "diagnostics").resolve() async def _capture_login_detection_diagnostics_if_enabled(self) -> None: - diagnostics = getattr(self.config, "diagnostics", None) - if diagnostics is None or not diagnostics.login_detection_capture: + cfg = getattr(self.config, "diagnostics", None) + if cfg is None or not cfg.capture_on.login_detection: return if self._login_detection_diagnostics_captured: @@ -975,35 +977,79 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 self._login_detection_diagnostics_captured = True try: - out_dir = self._diagnostics_output_dir() - out_dir.mkdir(parents = True, exist_ok = True) - - # Intentionally no username/PII in filename. - ts = misc.now().strftime("%Y%m%dT%H%M%S") - suffix = secrets.token_hex(4) - base = f"login_detection_unknown_{ts}_{suffix}" - screenshot_path = out_dir / f"{base}.png" - html_path = out_dir / f"{base}.html" - - try: - await page.save_screenshot(str(screenshot_path)) - except Exception as exc: # noqa: BLE001 - LOG.debug("Login diagnostics screenshot capture failed: %s", exc) - - try: - html = await page.get_content() - html_path.write_text(html, encoding = "utf-8") - except Exception as exc: # noqa: BLE001 - LOG.debug("Login diagnostics HTML capture failed: %s", exc) + await diagnostics.capture_diagnostics( + output_dir = self._diagnostics_output_dir(), + base_prefix = "login_detection_unknown", + page = page, + ) except Exception as exc: # noqa: BLE001 - LOG.debug("Login diagnostics capture failed: %s", exc) + LOG.debug( + "Login diagnostics capture failed (output_dir=%s, base_prefix=%s): %s", + self._diagnostics_output_dir(), + "login_detection_unknown", + exc, + ) - if getattr(diagnostics, "pause_on_login_detection_failure", False) and getattr(sys.stdin, "isatty", lambda: False)(): + if cfg.pause_on_login_detection_failure and getattr(sys.stdin, "isatty", lambda: False)(): LOG.warning("############################################") LOG.warning("# Login detection returned UNKNOWN. Browser is paused for manual inspection.") LOG.warning("############################################") await ainput(_("Press a key to continue...")) + async def _capture_publish_error_diagnostics_if_enabled( + self, + ad_cfg:Ad, + ad_cfg_orig:dict[str, Any], + ad_file:str, + attempt:int, + exc:Exception, + ) -> None: + """Capture publish failure diagnostics when enabled and a page is available. + + Runs only if cfg.capture_on.publish is enabled and self.page is set. + Uses the ad configuration and publish attempt details to write screenshot, HTML, + JSON payload, and optional log copy for debugging. + """ + cfg = getattr(self.config, "diagnostics", None) + if cfg is None or not cfg.capture_on.publish: + return + + page = getattr(self, "page", None) + if page is None: + return + + # Use the ad filename (without extension) as identifier + ad_file_stem = Path(ad_file).stem + + json_payload = { + "timestamp": misc.now().isoformat(timespec = "seconds"), + "attempt": attempt, + "page_url": getattr(page, "url", None), + "exception": { + "type": exc.__class__.__name__, + "message": str(exc), + "repr": repr(exc), + }, + "ad_file": ad_file, + "ad_title": ad_cfg.title, + "ad_config_effective": ad_cfg.model_dump(mode = "json"), + "ad_config_original": ad_cfg_orig, + } + + try: + await diagnostics.capture_diagnostics( + output_dir = self._diagnostics_output_dir(), + base_prefix = "publish_error", + attempt = attempt, + subject = ad_file_stem, + page = page, + json_payload = json_payload, + log_file_path = self.log_file_path, + copy_log = cfg.capture_log_copy, + ) + except Exception as error: # noqa: BLE001 + LOG.warning("Diagnostics capture failed during publish error handling: %s", error) + async def is_logged_in(self, *, include_probe:bool = True) -> bool: # Use login_detection timeout (10s default) instead of default (5s) # to allow sufficient time for client-side JavaScript rendering after page load. @@ -1298,7 +1344,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 async def publish_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None: count = 0 failed_count = 0 - max_retries = 3 + max_retries = PUBLISH_MAX_RETRIES published_ads = await self._fetch_published_ads() @@ -1321,6 +1367,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 except asyncio.CancelledError: raise # Respect task cancellation except (TimeoutError, ProtocolException) as ex: + await self._capture_publish_error_diagnostics_if_enabled(ad_cfg, ad_cfg_orig, ad_file, attempt, ex) if attempt < max_retries: LOG.warning("Attempt %s/%s failed for '%s': %s. Retrying...", attempt, max_retries, ad_cfg.title, ex) await self.web_sleep(2) # Wait before retry diff --git a/src/kleinanzeigen_bot/model/config_model.py b/src/kleinanzeigen_bot/model/config_model.py index 1881842..1d538fd 100644 --- a/src/kleinanzeigen_bot/model/config_model.py +++ b/src/kleinanzeigen_bot/model/config_model.py @@ -11,10 +11,12 @@ from pydantic import AfterValidator, Field, model_validator from typing_extensions import deprecated from kleinanzeigen_bot.model.update_check_model import UpdateCheckConfig -from kleinanzeigen_bot.utils import dicts +from kleinanzeigen_bot.utils import dicts, loggers from kleinanzeigen_bot.utils.misc import get_attr from kleinanzeigen_bot.utils.pydantics import ContextualModel +LOG:Final[loggers.Logger] = loggers.get_logger(__name__) + _MAX_PERCENTAGE:Final[int] = 100 @@ -195,25 +197,73 @@ class TimeoutConfig(ContextualModel): return base * self.multiplier * backoff -class DiagnosticsConfig(ContextualModel): - login_detection_capture:bool = Field( +class CaptureOnConfig(ContextualModel): + """Configuration for which operations should trigger diagnostics capture.""" + + login_detection:bool = Field( default = False, - description = "If true, capture diagnostics artifacts (screenshot + HTML) when login detection returns UNKNOWN.", + description = "Capture screenshot and HTML when login state detection fails", + ) + publish:bool = Field( + default = False, + description = "Capture screenshot, HTML, and JSON on publish failures", + ) + + +class DiagnosticsConfig(ContextualModel): + capture_on:CaptureOnConfig = Field( + default_factory = CaptureOnConfig, + description = "Enable diagnostics capture for specific operations.", + ) + capture_log_copy:bool = Field( + default = False, + description = "If true, copy the entire bot log file when diagnostics are captured (may duplicate log content).", ) pause_on_login_detection_failure:bool = Field( default = False, description = "If true, pause (interactive runs only) after capturing login detection diagnostics " - "so that user can inspect the browser. Requires login_detection_capture to be enabled.", + "so that user can inspect the browser. Requires capture_on.login_detection to be enabled.", ) output_dir:str | None = Field( default = None, description = "Optional output directory for diagnostics artifacts. If omitted, a safe default is used based on installation mode.", ) + @model_validator(mode = "before") + @classmethod + def migrate_legacy_diagnostics_keys(cls, data:dict[str, Any]) -> dict[str, Any]: + """Migrate legacy login_detection_capture and publish_error_capture keys.""" + + # Migrate legacy login_detection_capture -> capture_on.login_detection + # Only migrate if the new key is not already explicitly set + if "login_detection_capture" in data: + LOG.warning("Deprecated: 'login_detection_capture' is replaced by 'capture_on.login_detection'. Please update your config.") + if "capture_on" not in data or data["capture_on"] is None: + data["capture_on"] = {} + if isinstance(data["capture_on"], dict) and "login_detection" not in data["capture_on"]: + data["capture_on"]["login_detection"] = data.pop("login_detection_capture") + else: + # Remove legacy key but don't overwrite explicit new value + data.pop("login_detection_capture") + + # Migrate legacy publish_error_capture -> capture_on.publish + # Only migrate if the new key is not already explicitly set + if "publish_error_capture" in data: + LOG.warning("Deprecated: 'publish_error_capture' is replaced by 'capture_on.publish'. Please update your config.") + if "capture_on" not in data or data["capture_on"] is None: + data["capture_on"] = {} + if isinstance(data["capture_on"], dict) and "publish" not in data["capture_on"]: + data["capture_on"]["publish"] = data.pop("publish_error_capture") + else: + # Remove legacy key but don't overwrite explicit new value + data.pop("publish_error_capture") + + return data + @model_validator(mode = "after") def _validate_pause_requires_capture(self) -> "DiagnosticsConfig": - if self.pause_on_login_detection_failure and not self.login_detection_capture: - raise ValueError(_("pause_on_login_detection_failure requires login_detection_capture to be enabled")) + if self.pause_on_login_detection_failure and not self.capture_on.login_detection: + raise ValueError(_("pause_on_login_detection_failure requires capture_on.login_detection to be enabled")) return self diff --git a/src/kleinanzeigen_bot/resources/translations.de.yaml b/src/kleinanzeigen_bot/resources/translations.de.yaml index 1f155b6..9189e81 100644 --- a/src/kleinanzeigen_bot/resources/translations.de.yaml +++ b/src/kleinanzeigen_bot/resources/translations.de.yaml @@ -75,6 +75,9 @@ kleinanzeigen_bot/__init__.py: "# Login detection returned UNKNOWN. Browser is paused for manual inspection.": "# Login-Erkennung ergab UNKNOWN. Browser ist zur manuellen Prüfung angehalten." "Press a key to continue...": "Eine Taste drücken, um fortzufahren..." + _capture_publish_error_diagnostics_if_enabled: + "Diagnostics capture failed during publish error handling: %s": "Diagnose-Erfassung fehlgeschlagen während der Veröffentlichung-Fehlerbehandlung: %s" + login: "Checking if already logged in...": "Überprüfe, ob bereits eingeloggt..." "Current page URL after opening homepage: %s": "Aktuelle Seiten-URL nach dem Öffnen der Startseite: %s" @@ -619,10 +622,13 @@ kleinanzeigen_bot/model/config_model.py: "amount must be specified when auto_price_reduction is enabled": "amount muss angegeben werden, wenn auto_price_reduction aktiviert ist" "min_price must be specified when auto_price_reduction is enabled": "min_price muss angegeben werden, wenn auto_price_reduction aktiviert ist" "Percentage reduction amount must not exceed %s": "Prozentuale Reduktionsmenge darf %s nicht überschreiten" + migrate_legacy_diagnostics_keys: + "Deprecated: 'login_detection_capture' is replaced by 'capture_on.login_detection'. Please update your config.": "Veraltet: 'login_detection_capture' wurde durch 'capture_on.login_detection' ersetzt. Bitte aktualisieren Sie Ihre Konfiguration." + "Deprecated: 'publish_error_capture' is replaced by 'capture_on.publish'. Please update your config.": "Veraltet: 'publish_error_capture' wurde durch 'capture_on.publish' ersetzt. Bitte aktualisieren Sie Ihre Konfiguration." _validate_glob_pattern: "must be a non-empty, non-blank glob pattern": "muss ein nicht-leeres Glob-Muster sein" _validate_pause_requires_capture: - "pause_on_login_detection_failure requires login_detection_capture to be enabled": "pause_on_login_detection_failure erfordert, dass login_detection_capture aktiviert ist" + "pause_on_login_detection_failure requires capture_on.login_detection to be enabled": "pause_on_login_detection_failure erfordert, dass capture_on.login_detection aktiviert ist" ################################################# kleinanzeigen_bot/model/ad_model.py: @@ -656,6 +662,21 @@ kleinanzeigen_bot/model/update_check_state.py: "Invalid interval format or unsupported unit: %s. Using default interval for this run.": "Ungültiges Intervallformat oder nicht unterstützte Einheit: %s. Es wird das Standardintervall für diesen Durchlauf verwendet." "Negative interval: %s. Minimum interval is 1d. Using default interval for this run.": "Negatives Intervall: %s. Das Mindestintervall beträgt 1 Tag. Es wird das Standardintervall für diesen Durchlauf verwendet." +################################################# +kleinanzeigen_bot/utils/diagnostics.py: +################################################# + _copy_log_sync: + "Log file not found for diagnostics copy: %s": "Logdatei nicht gefunden für Diagnosekopie: %s" + + capture_diagnostics: + "Diagnostics screenshot capture failed: %s": "Diagnose-Screenshot-Erfassung fehlgeschlagen: %s" + "Diagnostics HTML capture failed: %s": "Diagnose-HTML-Erfassung fehlgeschlagen: %s" + "Diagnostics JSON capture failed: %s": "Diagnose-JSON-Erfassung fehlgeschlagen: %s" + "Diagnostics log copy failed: %s": "Diagnose-Log-Kopie fehlgeschlagen: %s" + "Diagnostics saved: %s": "Diagnosedaten gespeichert: %s" + "Diagnostics capture attempted but no artifacts were saved (all captures failed)": "Diagnoseerfassung versucht, aber keine Artefakte gespeichert (alle Erfassungen fehlgeschlagen)" + "Diagnostics capture failed: %s": "Diagnoseerfassung fehlgeschlagen: %s" + ################################################# kleinanzeigen_bot/utils/xdg_paths.py: ################################################# diff --git a/src/kleinanzeigen_bot/utils/diagnostics.py b/src/kleinanzeigen_bot/utils/diagnostics.py new file mode 100644 index 0000000..eb28e3d --- /dev/null +++ b/src/kleinanzeigen_bot/utils/diagnostics.py @@ -0,0 +1,135 @@ +# SPDX-FileCopyrightText: © Jens Bergmann and contributors +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ +import asyncio, json, re, secrets, shutil # isort: skip +from pathlib import Path +from typing import Any, Final + +from kleinanzeigen_bot.utils import loggers, misc + +LOG:Final[loggers.Logger] = loggers.get_logger(__name__) + + +class CaptureResult: + """Result of a diagnostics capture attempt.""" + + def __init__(self) -> None: + self.saved_artifacts:list[Path] = [] + + def add_saved(self, path:Path) -> None: + """Add a successfully saved artifact.""" + self.saved_artifacts.append(path) + + def has_any(self) -> bool: + """Check if any artifacts were saved.""" + return bool(self.saved_artifacts) + + +def _write_json_sync(json_path:Path, json_payload:dict[str, Any]) -> None: + """Synchronous helper to write JSON to file.""" + with json_path.open("w", encoding = "utf-8") as handle: + json.dump(json_payload, handle, indent = 2, default = str) + handle.write("\n") + + +def _copy_log_sync(log_file_path:str, log_path:Path) -> bool: + """Synchronous helper to copy log file. Returns True if copy succeeded.""" + log_source = Path(log_file_path) + if not log_source.exists(): + LOG.warning("Log file not found for diagnostics copy: %s", log_file_path) + return False + loggers.flush_all_handlers() + shutil.copy2(log_source, log_path) + return True + + +async def capture_diagnostics( + *, + output_dir:Path, + base_prefix:str, + attempt:int | None = None, + subject:str | None = None, + page:Any | None = None, + json_payload:dict[str, Any] | None = None, + log_file_path:str | None = None, + copy_log:bool = False, +) -> CaptureResult: + """Capture diagnostics artifacts for a given operation. + + Args: + output_dir: The output directory for diagnostics artifacts + base_prefix: Base filename prefix (e.g., 'login_detection_unknown', 'publish_error') + attempt: Optional attempt number for retry operations + subject: Optional subject identifier (e.g., ad token) + page: Optional page object with save_screenshot and get_content methods + json_payload: Optional JSON data to save + log_file_path: Optional log file path to copy + copy_log: Whether to copy log file + + Returns: + CaptureResult containing the list of successfully saved artifacts + """ + result = CaptureResult() + + try: + await asyncio.to_thread(output_dir.mkdir, parents = True, exist_ok = True) + + ts = misc.now().strftime("%Y%m%dT%H%M%S") + suffix = secrets.token_hex(4) + base = f"{base_prefix}_{ts}_{suffix}" + + if attempt is not None: + base = f"{base}_attempt{attempt}" + + if subject: + safe_subject = re.sub(r"[^A-Za-z0-9_-]", "_", subject) + base = f"{base}_{safe_subject}" + + screenshot_path = output_dir / f"{base}.png" + html_path = output_dir / f"{base}.html" + json_path = output_dir / f"{base}.json" + log_path = output_dir / f"{base}.log" + + if page: + try: + await page.save_screenshot(str(screenshot_path)) + result.add_saved(screenshot_path) + except Exception as exc: # noqa: BLE001 + LOG.debug("Diagnostics screenshot capture failed: %s", exc) + + try: + html = await page.get_content() + await asyncio.to_thread(html_path.write_text, html, encoding = "utf-8") + result.add_saved(html_path) + except Exception as exc: # noqa: BLE001 + LOG.debug("Diagnostics HTML capture failed: %s", exc) + + if json_payload is not None: + try: + await asyncio.to_thread(_write_json_sync, json_path, json_payload) + result.add_saved(json_path) + except Exception as exc: # noqa: BLE001 + LOG.debug("Diagnostics JSON capture failed: %s", exc) + + if copy_log and log_file_path: + try: + copy_succeeded = await asyncio.to_thread(_copy_log_sync, log_file_path, log_path) + if copy_succeeded: + result.add_saved(log_path) + except Exception as exc: # noqa: BLE001 + LOG.debug("Diagnostics log copy failed: %s", exc) + + # Determine if any capture was actually requested + capture_requested = page is not None or json_payload is not None or (copy_log and log_file_path) + + if result.has_any(): + artifacts_str = " ".join(map(str, result.saved_artifacts)) + LOG.info("Diagnostics saved: %s", artifacts_str) + elif capture_requested: + LOG.warning("Diagnostics capture attempted but no artifacts were saved (all captures failed)") + else: + LOG.debug("No diagnostics capture requested") + except Exception as exc: # noqa: BLE001 + LOG.debug("Diagnostics capture failed: %s", exc) + + return result diff --git a/tests/unit/test_config_model.py b/tests/unit/test_config_model.py index f4fa6dc..6b644da 100644 --- a/tests/unit/test_config_model.py +++ b/tests/unit/test_config_model.py @@ -87,7 +87,7 @@ def test_timeout_config_resolve_falls_back_to_default() -> None: def test_diagnostics_pause_requires_capture_validation() -> None: """ Unit: DiagnosticsConfig validator ensures pause_on_login_detection_failure - requires login_detection_capture to be enabled. + requires capture_on.login_detection to be enabled. """ minimal_cfg = { "ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}}, @@ -95,12 +95,98 @@ def test_diagnostics_pause_requires_capture_validation() -> None: "publishing": {"delete_old_ads": "BEFORE_PUBLISH", "delete_old_ads_by_title": False}, } - valid_cfg = {**minimal_cfg, "diagnostics": {"login_detection_capture": True, "pause_on_login_detection_failure": True}} + valid_cfg = {**minimal_cfg, "diagnostics": {"capture_on": {"login_detection": True}, "pause_on_login_detection_failure": True}} config = Config.model_validate(valid_cfg) assert config.diagnostics is not None assert config.diagnostics.pause_on_login_detection_failure is True - assert config.diagnostics.login_detection_capture is True + assert config.diagnostics.capture_on.login_detection is True - invalid_cfg = {**minimal_cfg, "diagnostics": {"login_detection_capture": False, "pause_on_login_detection_failure": True}} - with pytest.raises(ValueError, match = "pause_on_login_detection_failure requires login_detection_capture to be enabled"): + invalid_cfg = {**minimal_cfg, "diagnostics": {"capture_on": {"login_detection": False}, "pause_on_login_detection_failure": True}} + with pytest.raises(ValueError, match = "pause_on_login_detection_failure requires capture_on.login_detection to be enabled"): Config.model_validate(invalid_cfg) + + +def test_diagnostics_legacy_login_detection_capture_migration_when_capture_on_exists() -> None: + """ + Unit: Test that legacy login_detection_capture is removed but doesn't overwrite explicit capture_on.login_detection. + """ + minimal_cfg = { + "ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}}, + "login": {"username": "dummy", "password": "dummy"}, # noqa: S105 + } + + # When capture_on.login_detection is explicitly set to False, legacy True should be ignored + cfg_with_explicit = { + **minimal_cfg, + "diagnostics": { + "login_detection_capture": True, # legacy key + "capture_on": {"login_detection": False}, # explicit new key set to False + }, + } + config = Config.model_validate(cfg_with_explicit) + assert config.diagnostics is not None + assert config.diagnostics.capture_on.login_detection is False # explicit value preserved + + +def test_diagnostics_legacy_publish_error_capture_migration_when_capture_on_exists() -> None: + """ + Unit: Test that legacy publish_error_capture is removed but doesn't overwrite explicit capture_on.publish. + """ + minimal_cfg = { + "ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}}, + "login": {"username": "dummy", "password": "dummy"}, # noqa: S105 + } + + # When capture_on.publish is explicitly set to False, legacy True should be ignored + cfg_with_explicit = { + **minimal_cfg, + "diagnostics": { + "publish_error_capture": True, # legacy key + "capture_on": {"publish": False}, # explicit new key set to False + }, + } + config = Config.model_validate(cfg_with_explicit) + assert config.diagnostics is not None + assert config.diagnostics.capture_on.publish is False # explicit value preserved + + +def test_diagnostics_legacy_login_detection_capture_migration_when_capture_on_is_none() -> None: + """ + Unit: Test that legacy login_detection_capture is migrated when capture_on is None. + """ + minimal_cfg = { + "ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}}, + "login": {"username": "dummy", "password": "dummy"}, # noqa: S105 + } + + cfg_with_null_capture_on = { + **minimal_cfg, + "diagnostics": { + "login_detection_capture": True, # legacy key + "capture_on": None, # capture_on is explicitly None + }, + } + config = Config.model_validate(cfg_with_null_capture_on) + assert config.diagnostics is not None + assert config.diagnostics.capture_on.login_detection is True # legacy value migrated + + +def test_diagnostics_legacy_publish_error_capture_migration_when_capture_on_is_none() -> None: + """ + Unit: Test that legacy publish_error_capture is migrated when capture_on is None. + """ + minimal_cfg = { + "ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}}, + "login": {"username": "dummy", "password": "dummy"}, # noqa: S105 + } + + cfg_with_null_capture_on = { + **minimal_cfg, + "diagnostics": { + "publish_error_capture": True, # legacy key + "capture_on": None, # capture_on is explicitly None + }, + } + config = Config.model_validate(cfg_with_null_capture_on) + assert config.diagnostics is not None + assert config.diagnostics.capture_on.publish is True # legacy value migrated diff --git a/tests/unit/test_diagnostics.py b/tests/unit/test_diagnostics.py new file mode 100644 index 0000000..0774d3c --- /dev/null +++ b/tests/unit/test_diagnostics.py @@ -0,0 +1,224 @@ +# SPDX-FileCopyrightText: © 2025 Sebastian Thomschke and contributors +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from kleinanzeigen_bot.utils import diagnostics as diagnostics_module +from kleinanzeigen_bot.utils.diagnostics import capture_diagnostics + + +@pytest.mark.unit +class TestDiagnosticsCapture: + """Tests for diagnostics capture functionality.""" + + @pytest.mark.asyncio + async def test_capture_diagnostics_creates_output_dir(self, tmp_path:Path) -> None: + """Test that capture_diagnostics creates output directory.""" + mock_page = AsyncMock() + + output_dir = tmp_path / "diagnostics" + _ = await capture_diagnostics( + output_dir = output_dir, + base_prefix = "test", + page = mock_page, + ) + + # Verify directory was created + assert output_dir.exists() + + @pytest.mark.asyncio + async def test_capture_diagnostics_creates_screenshot(self, tmp_path:Path) -> None: + """Test that capture_diagnostics creates screenshot file.""" + mock_page = AsyncMock() + mock_page.save_screenshot = AsyncMock() + + output_dir = tmp_path / "diagnostics" + result = await capture_diagnostics( + output_dir = output_dir, + base_prefix = "test", + page = mock_page, + ) + + # Verify screenshot file was created and page method was called + assert len(result.saved_artifacts) == 1 + assert result.saved_artifacts[0].suffix == ".png" + mock_page.save_screenshot.assert_awaited_once() + + @pytest.mark.asyncio + async def test_capture_diagnostics_creates_html(self, tmp_path:Path) -> None: + """Test that capture_diagnostics creates HTML file.""" + mock_page = AsyncMock() + mock_page.get_content = AsyncMock(return_value = "") + + output_dir = tmp_path / "diagnostics" + result = await capture_diagnostics( + output_dir = output_dir, + base_prefix = "test", + page = mock_page, + ) + + # Verify HTML file was created along with screenshot + assert len(result.saved_artifacts) == 2 + assert any(a.suffix == ".html" for a in result.saved_artifacts) + + @pytest.mark.asyncio + async def test_capture_diagnostics_creates_json(self, tmp_path:Path) -> None: + """Test that capture_diagnostics creates JSON file.""" + mock_page = AsyncMock() + mock_page.get_content = AsyncMock(return_value = "") + + output_dir = tmp_path / "diagnostics" + result = await capture_diagnostics( + output_dir = output_dir, + base_prefix = "test", + page = mock_page, + json_payload = {"test": "data"}, + ) + + # Verify JSON file was created along with HTML and screenshot + assert len(result.saved_artifacts) == 3 + assert any(a.suffix == ".json" for a in result.saved_artifacts) + + @pytest.mark.asyncio + async def test_capture_diagnostics_copies_log_file(self, tmp_path:Path) -> None: + """Test that capture_diagnostics copies log file when enabled.""" + log_file = tmp_path / "test.log" + log_file.write_text("test log content") + + output_dir = tmp_path / "diagnostics" + result = await capture_diagnostics( + output_dir = output_dir, + base_prefix = "test", + page = None, # No page to avoid screenshot + log_file_path = str(log_file), + copy_log = True, + ) + + # Verify log was copied + assert len(result.saved_artifacts) == 1 + assert result.saved_artifacts[0].suffix == ".log" + + def test_copy_log_sync_returns_false_when_file_not_found(self, tmp_path:Path) -> None: + """Test _copy_log_sync returns False when log file does not exist.""" + non_existent_log = tmp_path / "non_existent.log" + log_path = tmp_path / "output.log" + + result = diagnostics_module._copy_log_sync(str(non_existent_log), log_path) + + assert result is False + assert not log_path.exists() + + @pytest.mark.asyncio + async def test_capture_diagnostics_handles_screenshot_exception(self, tmp_path:Path, caplog:pytest.LogCaptureFixture) -> None: + """Test that capture_diagnostics handles screenshot capture exceptions gracefully.""" + mock_page = AsyncMock() + mock_page.save_screenshot = AsyncMock(side_effect = Exception("Screenshot failed")) + + output_dir = tmp_path / "diagnostics" + result = await capture_diagnostics( + output_dir = output_dir, + base_prefix = "test", + page = mock_page, + ) + + # Verify no artifacts were saved due to exception + assert len(result.saved_artifacts) == 0 + assert "Diagnostics screenshot capture failed" in caplog.text + + @pytest.mark.asyncio + async def test_capture_diagnostics_handles_json_exception(self, tmp_path:Path, caplog:pytest.LogCaptureFixture, monkeypatch:pytest.MonkeyPatch) -> None: + """Test that capture_diagnostics handles JSON write exceptions gracefully.""" + mock_page = AsyncMock() + mock_page.get_content = AsyncMock(return_value = "") + + output_dir = tmp_path / "diagnostics" + + # Mock _write_json_sync to raise an exception + monkeypatch.setattr(diagnostics_module, "_write_json_sync", MagicMock(side_effect = Exception("JSON write failed"))) + + result = await capture_diagnostics( + output_dir = output_dir, + base_prefix = "test", + page = mock_page, + json_payload = {"test": "data"}, + ) + + # Verify screenshot and HTML were saved, but JSON failed + assert len(result.saved_artifacts) == 2 + assert any(a.suffix == ".png" for a in result.saved_artifacts) + assert any(a.suffix == ".html" for a in result.saved_artifacts) + assert not any(a.suffix == ".json" for a in result.saved_artifacts) + assert "Diagnostics JSON capture failed" in caplog.text + + @pytest.mark.asyncio + async def test_capture_diagnostics_handles_log_copy_exception( + self, tmp_path:Path, caplog:pytest.LogCaptureFixture, monkeypatch:pytest.MonkeyPatch + ) -> None: + """Test that capture_diagnostics handles log copy exceptions gracefully.""" + # Create a log file + log_file = tmp_path / "test.log" + log_file.write_text("test log content") + + output_dir = tmp_path / "diagnostics" + + # Mock _copy_log_sync to raise an exception + original_copy_log = diagnostics_module._copy_log_sync + monkeypatch.setattr(diagnostics_module, "_copy_log_sync", MagicMock(side_effect = Exception("Copy failed"))) + + try: + result = await capture_diagnostics( + output_dir = output_dir, + base_prefix = "test", + page = None, + log_file_path = str(log_file), + copy_log = True, + ) + + # Verify no artifacts were saved due to exception + assert len(result.saved_artifacts) == 0 + assert "Diagnostics log copy failed" in caplog.text + finally: + monkeypatch.setattr(diagnostics_module, "_copy_log_sync", original_copy_log) + + @pytest.mark.asyncio + async def test_capture_diagnostics_logs_warning_when_all_captures_fail( + self, tmp_path:Path, caplog:pytest.LogCaptureFixture, monkeypatch:pytest.MonkeyPatch + ) -> None: + """Test warning is logged when capture is requested but all fail.""" + mock_page = AsyncMock() + mock_page.save_screenshot = AsyncMock(side_effect = Exception("Screenshot failed")) + mock_page.get_content = AsyncMock(side_effect = Exception("HTML failed")) + + # Mock JSON write to also fail + monkeypatch.setattr(diagnostics_module, "_write_json_sync", MagicMock(side_effect = Exception("JSON write failed"))) + + output_dir = tmp_path / "diagnostics" + result = await capture_diagnostics( + output_dir = output_dir, + base_prefix = "test", + page = mock_page, + json_payload = {"test": "data"}, + ) + + # Verify no artifacts were saved + assert len(result.saved_artifacts) == 0 + assert "Diagnostics capture attempted but no artifacts were saved" in caplog.text + + @pytest.mark.asyncio + async def test_capture_diagnostics_logs_debug_when_no_capture_requested(self, tmp_path:Path, caplog:pytest.LogCaptureFixture) -> None: + """Test debug is logged when no diagnostics capture is requested.""" + output_dir = tmp_path / "diagnostics" + + with caplog.at_level("DEBUG"): + _ = await capture_diagnostics( + output_dir = output_dir, + base_prefix = "test", + page = None, + json_payload = None, + copy_log = False, + ) + + assert "No diagnostics capture requested" in caplog.text diff --git a/tests/unit/test_init.py b/tests/unit/test_init.py index 9ffaf76..f16eb43 100644 --- a/tests/unit/test_init.py +++ b/tests/unit/test_init.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: © Jens Bergmann and contributors # SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ -import copy, io, json, logging, os, tempfile # isort: skip +import copy, fnmatch, io, json, logging, os, tempfile # isort: skip from collections.abc import Generator from contextlib import redirect_stdout from datetime import timedelta @@ -12,7 +12,7 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest from pydantic import ValidationError -from kleinanzeigen_bot import LOG, AdUpdateStrategy, KleinanzeigenBot, LoginState, misc +from kleinanzeigen_bot import LOG, PUBLISH_MAX_RETRIES, AdUpdateStrategy, KleinanzeigenBot, LoginState, misc from kleinanzeigen_bot._version import __version__ from kleinanzeigen_bot.model.ad_model import Ad from kleinanzeigen_bot.model.config_model import AdDefaults, Config, DiagnosticsConfig, PublishingConfig @@ -388,7 +388,7 @@ class TestKleinanzeigenBotAuthentication: @pytest.mark.asyncio async def test_get_login_state_unknown_captures_diagnostics_when_enabled(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None: - test_bot.config.diagnostics = DiagnosticsConfig.model_validate({"login_detection_capture": True, "output_dir": str(tmp_path)}) + test_bot.config.diagnostics = DiagnosticsConfig.model_validate({"capture_on": {"login_detection": True}, "output_dir": str(tmp_path)}) page = MagicMock() page.save_screenshot = AsyncMock() @@ -406,7 +406,7 @@ class TestKleinanzeigenBotAuthentication: @pytest.mark.asyncio async def test_get_login_state_unknown_does_not_capture_diagnostics_when_disabled(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None: - test_bot.config.diagnostics = DiagnosticsConfig.model_validate({"login_detection_capture": False, "output_dir": str(tmp_path)}) + test_bot.config.diagnostics = DiagnosticsConfig.model_validate({"capture_on": {"login_detection": False}, "output_dir": str(tmp_path)}) page = MagicMock() page.save_screenshot = AsyncMock() @@ -425,7 +425,7 @@ class TestKleinanzeigenBotAuthentication: @pytest.mark.asyncio async def test_get_login_state_unknown_pauses_for_inspection_when_enabled_and_interactive(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None: test_bot.config.diagnostics = DiagnosticsConfig.model_validate( - {"login_detection_capture": True, "pause_on_login_detection_failure": True, "output_dir": str(tmp_path)} + {"capture_on": {"login_detection": True}, "pause_on_login_detection_failure": True, "output_dir": str(tmp_path)} ) page = MagicMock() @@ -453,7 +453,7 @@ class TestKleinanzeigenBotAuthentication: @pytest.mark.asyncio async def test_get_login_state_unknown_does_not_pause_when_non_interactive(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None: test_bot.config.diagnostics = DiagnosticsConfig.model_validate( - {"login_detection_capture": True, "pause_on_login_detection_failure": True, "output_dir": str(tmp_path)} + {"capture_on": {"login_detection": True}, "pause_on_login_detection_failure": True, "output_dir": str(tmp_path)} ) page = MagicMock() @@ -624,6 +624,139 @@ class TestKleinanzeigenBotAuthentication: assert mock_ainput.call_count == 0 +class TestKleinanzeigenBotDiagnostics: + @pytest.fixture + def diagnostics_ad_config(self) -> dict[str, Any]: + return { + "active": True, + "type": "OFFER", + "title": "Test ad title", + "description": "Test description", + "category": "161/176/sonstige", + "price_type": "NEGOTIABLE", + "shipping_type": "PICKUP", + "sell_directly": False, + "contact": { + "name": "Tester", + "zipcode": "12345", + }, + "republication_interval": 7, + } + + @pytest.mark.unit + @pytest.mark.asyncio + async def test_publish_ads_captures_diagnostics_on_failures( + self, + test_bot:KleinanzeigenBot, + tmp_path:Path, + diagnostics_ad_config:dict[str, Any], + ) -> None: + """Ensure publish failures capture diagnostics artifacts.""" + log_file_path = tmp_path / "test.log" + log_file_path.write_text("Test log content\n", encoding = "utf-8") + test_bot.log_file_path = str(log_file_path) + + test_bot.config.diagnostics = DiagnosticsConfig.model_validate({"capture_on": {"publish": True}, "output_dir": str(tmp_path)}) + + page = MagicMock() + page.save_screenshot = AsyncMock() + page.get_content = AsyncMock(return_value = "") + page.sleep = AsyncMock() + page.url = "https://example.com/fail" + test_bot.page = page + + ad_cfg = Ad.model_validate(diagnostics_ad_config) + ad_cfg_orig = copy.deepcopy(diagnostics_ad_config) + ad_file = str(tmp_path / "ad_000001_Test.yml") + + with ( + patch.object(test_bot, "web_request", new_callable = AsyncMock, return_value = {"content": json.dumps({"ads": []})}), + patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("boom")), + ): + await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)]) + + expected_retries = PUBLISH_MAX_RETRIES + assert page.save_screenshot.await_count == expected_retries + assert page.get_content.await_count == expected_retries + entries = os.listdir(tmp_path) + html_files = [name for name in entries if fnmatch.fnmatch(name, "publish_error_*_attempt*_ad_000001_Test.html")] + json_files = [name for name in entries if fnmatch.fnmatch(name, "publish_error_*_attempt*_ad_000001_Test.json")] + assert len(html_files) == expected_retries + assert len(json_files) == expected_retries + + @pytest.mark.unit + @pytest.mark.asyncio + async def test_publish_ads_captures_log_copy_when_enabled( + self, + test_bot:KleinanzeigenBot, + tmp_path:Path, + diagnostics_ad_config:dict[str, Any], + ) -> None: + """Ensure publish failures copy log file when capture_log_copy is enabled.""" + log_file_path = tmp_path / "test.log" + log_file_path.write_text("Test log content\n", encoding = "utf-8") + test_bot.log_file_path = str(log_file_path) + + test_bot.config.diagnostics = DiagnosticsConfig.model_validate({"capture_on": {"publish": True}, "capture_log_copy": True, "output_dir": str(tmp_path)}) + + page = MagicMock() + page.save_screenshot = AsyncMock() + page.get_content = AsyncMock(return_value = "") + page.sleep = AsyncMock() + page.url = "https://example.com/fail" + test_bot.page = page + + ad_cfg = Ad.model_validate(diagnostics_ad_config) + ad_cfg_orig = copy.deepcopy(diagnostics_ad_config) + ad_file = str(tmp_path / "ad_000001_Test.yml") + + with ( + patch.object(test_bot, "web_request", new_callable = AsyncMock, return_value = {"content": json.dumps({"ads": []})}), + patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("boom")), + ): + await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)]) + + entries = os.listdir(tmp_path) + log_files = [name for name in entries if fnmatch.fnmatch(name, "publish_error_*_attempt*_ad_000001_Test.log")] + assert len(log_files) == PUBLISH_MAX_RETRIES + + @pytest.mark.unit + @pytest.mark.asyncio + async def test_publish_ads_does_not_capture_diagnostics_when_disabled( + self, + test_bot:KleinanzeigenBot, + tmp_path:Path, + diagnostics_ad_config:dict[str, Any], + ) -> None: + """Ensure diagnostics are not captured when disabled.""" + test_bot.config.diagnostics = DiagnosticsConfig.model_validate({"capture_on": {"publish": False}, "output_dir": str(tmp_path)}) + + page = MagicMock() + page.save_screenshot = AsyncMock() + page.get_content = AsyncMock(return_value = "") + page.sleep = AsyncMock() + page.url = "https://example.com/fail" + test_bot.page = page + + ad_cfg = Ad.model_validate(diagnostics_ad_config) + ad_cfg_orig = copy.deepcopy(diagnostics_ad_config) + ad_file = str(tmp_path / "ad_000001_Test.yml") + + with ( + patch.object(test_bot, "web_request", new_callable = AsyncMock, return_value = {"content": json.dumps({"ads": []})}), + patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("boom")), + ): + await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)]) + + page.save_screenshot.assert_not_called() + page.get_content.assert_not_called() + entries = os.listdir(tmp_path) + html_files = [name for name in entries if fnmatch.fnmatch(name, "publish_error_*_attempt*_ad_000001_Test.html")] + json_files = [name for name in entries if fnmatch.fnmatch(name, "publish_error_*_attempt*_ad_000001_Test.json")] + assert not html_files + assert not json_files + + class TestKleinanzeigenBotLocalization: """Tests for localization and help text."""