feat: capture publish failure diagnostics with screenshot and logs (#802)

2026-03-12 18:41:50 +01:00 · 2026-02-01 08:17:14 +01:00
parent 96f465d5bc
commit b3d5a4b228
10 changed files with 795 additions and 57 deletions
--- a/src/kleinanzeigen_bot/init.py
+++ b/src/kleinanzeigen_bot/init.py
@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
-import atexit, asyncio, enum, json, os, re, secrets, signal, sys, textwrap  # isort: skip
+import atexit, asyncio, enum, json, os, re, signal, sys, textwrap  # isort: skip
 import getopt  # pylint: disable=deprecated-module
 import urllib.parse as urllib_parse
 from datetime import datetime
@@ -19,7 +19,7 @@ from ._version import __version__
 from .model.ad_model import MAX_DESCRIPTION_LENGTH, Ad, AdPartial, Contact, calculate_auto_price
 from .model.config_model import Config
 from .update_checker import UpdateChecker
-from .utils import dicts, error_handlers, loggers, misc, xdg_paths
+from .utils import diagnostics, dicts, error_handlers, loggers, misc, xdg_paths
 from .utils.exceptions import CaptchaEncountered
 from .utils.files import abspath
 from .utils.i18n import Locale, get_current_locale, pluralize, set_current_locale
@@ -31,6 +31,8 @@ from .utils.web_scraping_mixin import By, Element, Is, WebScrapingMixin
 LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
 LOG.setLevel(loggers.INFO)

+PUBLISH_MAX_RETRIES:Final[int] = 3
+
 colorama.just_fix_windows_console()


@@ -961,8 +963,8 @@ class KleinanzeigenBot(WebScrapingMixin):  # noqa: PLR0904
        return (Path.cwd() / ".temp" / "diagnostics").resolve()

    async def _capture_login_detection_diagnostics_if_enabled(self) -> None:
-        diagnostics = getattr(self.config, "diagnostics", None)
-        if diagnostics is None or not diagnostics.login_detection_capture:
+        cfg = getattr(self.config, "diagnostics", None)
+        if cfg is None or not cfg.capture_on.login_detection:
            return

        if self._login_detection_diagnostics_captured:
@@ -975,35 +977,79 @@ class KleinanzeigenBot(WebScrapingMixin):  # noqa: PLR0904
        self._login_detection_diagnostics_captured = True

        try:
-            out_dir = self._diagnostics_output_dir()
-            out_dir.mkdir(parents = True, exist_ok = True)
-
-            # Intentionally no username/PII in filename.
-            ts = misc.now().strftime("%Y%m%dT%H%M%S")
-            suffix = secrets.token_hex(4)
-            base = f"login_detection_unknown_{ts}_{suffix}"
-            screenshot_path = out_dir / f"{base}.png"
-            html_path = out_dir / f"{base}.html"
-
-            try:
-                await page.save_screenshot(str(screenshot_path))
-            except Exception as exc:  # noqa: BLE001
-                LOG.debug("Login diagnostics screenshot capture failed: %s", exc)
-
-            try:
-                html = await page.get_content()
-                html_path.write_text(html, encoding = "utf-8")
-            except Exception as exc:  # noqa: BLE001
-                LOG.debug("Login diagnostics HTML capture failed: %s", exc)
+            await diagnostics.capture_diagnostics(
+                output_dir = self._diagnostics_output_dir(),
+                base_prefix = "login_detection_unknown",
+                page = page,
+            )
        except Exception as exc:  # noqa: BLE001
-            LOG.debug("Login diagnostics capture failed: %s", exc)
+            LOG.debug(
+                "Login diagnostics capture failed (output_dir=%s, base_prefix=%s): %s",
+                self._diagnostics_output_dir(),
+                "login_detection_unknown",
+                exc,
+            )

-        if getattr(diagnostics, "pause_on_login_detection_failure", False) and getattr(sys.stdin, "isatty", lambda: False)():
+        if cfg.pause_on_login_detection_failure and getattr(sys.stdin, "isatty", lambda: False)():
            LOG.warning("############################################")
            LOG.warning("# Login detection returned UNKNOWN. Browser is paused for manual inspection.")
            LOG.warning("############################################")
            await ainput(_("Press a key to continue..."))

+    async def _capture_publish_error_diagnostics_if_enabled(
+        self,
+        ad_cfg:Ad,
+        ad_cfg_orig:dict[str, Any],
+        ad_file:str,
+        attempt:int,
+        exc:Exception,
+    ) -> None:
+        """Capture publish failure diagnostics when enabled and a page is available.
+
+        Runs only if cfg.capture_on.publish is enabled and self.page is set.
+        Uses the ad configuration and publish attempt details to write screenshot, HTML,
+        JSON payload, and optional log copy for debugging.
+        """
+        cfg = getattr(self.config, "diagnostics", None)
+        if cfg is None or not cfg.capture_on.publish:
+            return
+
+        page = getattr(self, "page", None)
+        if page is None:
+            return
+
+        # Use the ad filename (without extension) as identifier
+        ad_file_stem = Path(ad_file).stem
+
+        json_payload = {
+            "timestamp": misc.now().isoformat(timespec = "seconds"),
+            "attempt": attempt,
+            "page_url": getattr(page, "url", None),
+            "exception": {
+                "type": exc.__class__.__name__,
+                "message": str(exc),
+                "repr": repr(exc),
+            },
+            "ad_file": ad_file,
+            "ad_title": ad_cfg.title,
+            "ad_config_effective": ad_cfg.model_dump(mode = "json"),
+            "ad_config_original": ad_cfg_orig,
+        }
+
+        try:
+            await diagnostics.capture_diagnostics(
+                output_dir = self._diagnostics_output_dir(),
+                base_prefix = "publish_error",
+                attempt = attempt,
+                subject = ad_file_stem,
+                page = page,
+                json_payload = json_payload,
+                log_file_path = self.log_file_path,
+                copy_log = cfg.capture_log_copy,
+            )
+        except Exception as error:  # noqa: BLE001
+            LOG.warning("Diagnostics capture failed during publish error handling: %s", error)
+
    async def is_logged_in(self, *, include_probe:bool = True) -> bool:
        # Use login_detection timeout (10s default) instead of default (5s)
        # to allow sufficient time for client-side JavaScript rendering after page load.
@@ -1298,7 +1344,7 @@ class KleinanzeigenBot(WebScrapingMixin):  # noqa: PLR0904
    async def publish_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None:
        count = 0
        failed_count = 0
-        max_retries = 3
+        max_retries = PUBLISH_MAX_RETRIES

        published_ads = await self._fetch_published_ads()

@@ -1321,6 +1367,7 @@ class KleinanzeigenBot(WebScrapingMixin):  # noqa: PLR0904
                except asyncio.CancelledError:
                    raise  # Respect task cancellation
                except (TimeoutError, ProtocolException) as ex:
+                    await self._capture_publish_error_diagnostics_if_enabled(ad_cfg, ad_cfg_orig, ad_file, attempt, ex)
                    if attempt < max_retries:
                        LOG.warning("Attempt %s/%s failed for '%s': %s. Retrying...", attempt, max_retries, ad_cfg.title, ex)
                        await self.web_sleep(2)  # Wait before retry
--- a/src/kleinanzeigen_bot/model/config_model.py
+++ b/src/kleinanzeigen_bot/model/config_model.py
@@ -11,10 +11,12 @@ from pydantic import AfterValidator, Field, model_validator
 from typing_extensions import deprecated

 from kleinanzeigen_bot.model.update_check_model import UpdateCheckConfig
-from kleinanzeigen_bot.utils import dicts
+from kleinanzeigen_bot.utils import dicts, loggers
 from kleinanzeigen_bot.utils.misc import get_attr
 from kleinanzeigen_bot.utils.pydantics import ContextualModel

+LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
+
 _MAX_PERCENTAGE:Final[int] = 100


@@ -195,25 +197,73 @@ class TimeoutConfig(ContextualModel):
        return base * self.multiplier * backoff


-class DiagnosticsConfig(ContextualModel):
-    login_detection_capture:bool = Field(
+class CaptureOnConfig(ContextualModel):
+    """Configuration for which operations should trigger diagnostics capture."""
+
+    login_detection:bool = Field(
        default = False,
-        description = "If true, capture diagnostics artifacts (screenshot + HTML) when login detection returns UNKNOWN.",
+        description = "Capture screenshot and HTML when login state detection fails",
+    )
+    publish:bool = Field(
+        default = False,
+        description = "Capture screenshot, HTML, and JSON on publish failures",
+    )
+
+
+class DiagnosticsConfig(ContextualModel):
+    capture_on:CaptureOnConfig = Field(
+        default_factory = CaptureOnConfig,
+        description = "Enable diagnostics capture for specific operations.",
+    )
+    capture_log_copy:bool = Field(
+        default = False,
+        description = "If true, copy the entire bot log file when diagnostics are captured (may duplicate log content).",
    )
    pause_on_login_detection_failure:bool = Field(
        default = False,
        description = "If true, pause (interactive runs only) after capturing login detection diagnostics "
-        "so that user can inspect the browser. Requires login_detection_capture to be enabled.",
+        "so that user can inspect the browser. Requires capture_on.login_detection to be enabled.",
    )
    output_dir:str | None = Field(
        default = None,
        description = "Optional output directory for diagnostics artifacts. If omitted, a safe default is used based on installation mode.",
    )

+    @model_validator(mode = "before")
+    @classmethod
+    def migrate_legacy_diagnostics_keys(cls, data:dict[str, Any]) -> dict[str, Any]:
+        """Migrate legacy login_detection_capture and publish_error_capture keys."""
+
+        # Migrate legacy login_detection_capture -> capture_on.login_detection
+        # Only migrate if the new key is not already explicitly set
+        if "login_detection_capture" in data:
+            LOG.warning("Deprecated: 'login_detection_capture' is replaced by 'capture_on.login_detection'. Please update your config.")
+            if "capture_on" not in data or data["capture_on"] is None:
+                data["capture_on"] = {}
+            if isinstance(data["capture_on"], dict) and "login_detection" not in data["capture_on"]:
+                data["capture_on"]["login_detection"] = data.pop("login_detection_capture")
+            else:
+                # Remove legacy key but don't overwrite explicit new value
+                data.pop("login_detection_capture")
+
+        # Migrate legacy publish_error_capture -> capture_on.publish
+        # Only migrate if the new key is not already explicitly set
+        if "publish_error_capture" in data:
+            LOG.warning("Deprecated: 'publish_error_capture' is replaced by 'capture_on.publish'. Please update your config.")
+            if "capture_on" not in data or data["capture_on"] is None:
+                data["capture_on"] = {}
+            if isinstance(data["capture_on"], dict) and "publish" not in data["capture_on"]:
+                data["capture_on"]["publish"] = data.pop("publish_error_capture")
+            else:
+                # Remove legacy key but don't overwrite explicit new value
+                data.pop("publish_error_capture")
+
+        return data
+
    @model_validator(mode = "after")
    def _validate_pause_requires_capture(self) -> "DiagnosticsConfig":
-        if self.pause_on_login_detection_failure and not self.login_detection_capture:
-            raise ValueError(_("pause_on_login_detection_failure requires login_detection_capture to be enabled"))
+        if self.pause_on_login_detection_failure and not self.capture_on.login_detection:
+            raise ValueError(_("pause_on_login_detection_failure requires capture_on.login_detection to be enabled"))
        return self


--- a/src/kleinanzeigen_bot/resources/translations.de.yaml
+++ b/src/kleinanzeigen_bot/resources/translations.de.yaml
@@ -75,6 +75,9 @@ kleinanzeigen_bot/__init__.py:
    "# Login detection returned UNKNOWN. Browser is paused for manual inspection.": "# Login-Erkennung ergab UNKNOWN. Browser ist zur manuellen Prüfung angehalten."
    "Press a key to continue...": "Eine Taste drücken, um fortzufahren..."

+  _capture_publish_error_diagnostics_if_enabled:
+    "Diagnostics capture failed during publish error handling: %s": "Diagnose-Erfassung fehlgeschlagen während der Veröffentlichung-Fehlerbehandlung: %s"
+
  login:
    "Checking if already logged in...": "Überprüfe, ob bereits eingeloggt..."
    "Current page URL after opening homepage: %s": "Aktuelle Seiten-URL nach dem Öffnen der Startseite: %s"
@@ -619,10 +622,13 @@ kleinanzeigen_bot/model/config_model.py:
    "amount must be specified when auto_price_reduction is enabled": "amount muss angegeben werden, wenn auto_price_reduction aktiviert ist"
    "min_price must be specified when auto_price_reduction is enabled": "min_price muss angegeben werden, wenn auto_price_reduction aktiviert ist"
    "Percentage reduction amount must not exceed %s": "Prozentuale Reduktionsmenge darf %s nicht überschreiten"
+  migrate_legacy_diagnostics_keys:
+    "Deprecated: 'login_detection_capture' is replaced by 'capture_on.login_detection'. Please update your config.": "Veraltet: 'login_detection_capture' wurde durch 'capture_on.login_detection' ersetzt. Bitte aktualisieren Sie Ihre Konfiguration."
+    "Deprecated: 'publish_error_capture' is replaced by 'capture_on.publish'. Please update your config.": "Veraltet: 'publish_error_capture' wurde durch 'capture_on.publish' ersetzt. Bitte aktualisieren Sie Ihre Konfiguration."
  _validate_glob_pattern:
    "must be a non-empty, non-blank glob pattern": "muss ein nicht-leeres Glob-Muster sein"
  _validate_pause_requires_capture:
-    "pause_on_login_detection_failure requires login_detection_capture to be enabled": "pause_on_login_detection_failure erfordert, dass login_detection_capture aktiviert ist"
+    "pause_on_login_detection_failure requires capture_on.login_detection to be enabled": "pause_on_login_detection_failure erfordert, dass capture_on.login_detection aktiviert ist"

 #################################################
 kleinanzeigen_bot/model/ad_model.py:
@@ -656,6 +662,21 @@ kleinanzeigen_bot/model/update_check_state.py:
    "Invalid interval format or unsupported unit: %s. Using default interval for this run.": "Ungültiges Intervallformat oder nicht unterstützte Einheit: %s. Es wird das Standardintervall für diesen Durchlauf verwendet."
    "Negative interval: %s. Minimum interval is 1d. Using default interval for this run.": "Negatives Intervall: %s. Das Mindestintervall beträgt 1 Tag. Es wird das Standardintervall für diesen Durchlauf verwendet."

+#################################################
+kleinanzeigen_bot/utils/diagnostics.py:
+#################################################
+  _copy_log_sync:
+    "Log file not found for diagnostics copy: %s": "Logdatei nicht gefunden für Diagnosekopie: %s"
+
+  capture_diagnostics:
+    "Diagnostics screenshot capture failed: %s": "Diagnose-Screenshot-Erfassung fehlgeschlagen: %s"
+    "Diagnostics HTML capture failed: %s": "Diagnose-HTML-Erfassung fehlgeschlagen: %s"
+    "Diagnostics JSON capture failed: %s": "Diagnose-JSON-Erfassung fehlgeschlagen: %s"
+    "Diagnostics log copy failed: %s": "Diagnose-Log-Kopie fehlgeschlagen: %s"
+    "Diagnostics saved: %s": "Diagnosedaten gespeichert: %s"
+    "Diagnostics capture attempted but no artifacts were saved (all captures failed)": "Diagnoseerfassung versucht, aber keine Artefakte gespeichert (alle Erfassungen fehlgeschlagen)"
+    "Diagnostics capture failed: %s": "Diagnoseerfassung fehlgeschlagen: %s"
+
 #################################################
 kleinanzeigen_bot/utils/xdg_paths.py:
 #################################################
--- a/src/kleinanzeigen_bot/utils/diagnostics.py
+++ b/src/kleinanzeigen_bot/utils/diagnostics.py
@@ -0,0 +1,135 @@
+# SPDX-FileCopyrightText: © Jens Bergmann and contributors
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
+import asyncio, json, re, secrets, shutil  # isort: skip
+from pathlib import Path
+from typing import Any, Final
+
+from kleinanzeigen_bot.utils import loggers, misc
+
+LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
+
+
+class CaptureResult:
+    """Result of a diagnostics capture attempt."""
+
+    def __init__(self) -> None:
+        self.saved_artifacts:list[Path] = []
+
+    def add_saved(self, path:Path) -> None:
+        """Add a successfully saved artifact."""
+        self.saved_artifacts.append(path)
+
+    def has_any(self) -> bool:
+        """Check if any artifacts were saved."""
+        return bool(self.saved_artifacts)
+
+
+def _write_json_sync(json_path:Path, json_payload:dict[str, Any]) -> None:
+    """Synchronous helper to write JSON to file."""
+    with json_path.open("w", encoding = "utf-8") as handle:
+        json.dump(json_payload, handle, indent = 2, default = str)
+        handle.write("\n")
+
+
+def _copy_log_sync(log_file_path:str, log_path:Path) -> bool:
+    """Synchronous helper to copy log file. Returns True if copy succeeded."""
+    log_source = Path(log_file_path)
+    if not log_source.exists():
+        LOG.warning("Log file not found for diagnostics copy: %s", log_file_path)
+        return False
+    loggers.flush_all_handlers()
+    shutil.copy2(log_source, log_path)
+    return True
+
+
+async def capture_diagnostics(
+    *,
+    output_dir:Path,
+    base_prefix:str,
+    attempt:int | None = None,
+    subject:str | None = None,
+    page:Any | None = None,
+    json_payload:dict[str, Any] | None = None,
+    log_file_path:str | None = None,
+    copy_log:bool = False,
+) -> CaptureResult:
+    """Capture diagnostics artifacts for a given operation.
+
+    Args:
+        output_dir: The output directory for diagnostics artifacts
+        base_prefix: Base filename prefix (e.g., 'login_detection_unknown', 'publish_error')
+        attempt: Optional attempt number for retry operations
+        subject: Optional subject identifier (e.g., ad token)
+        page: Optional page object with save_screenshot and get_content methods
+        json_payload: Optional JSON data to save
+        log_file_path: Optional log file path to copy
+        copy_log: Whether to copy log file
+
+    Returns:
+        CaptureResult containing the list of successfully saved artifacts
+    """
+    result = CaptureResult()
+
+    try:
+        await asyncio.to_thread(output_dir.mkdir, parents = True, exist_ok = True)
+
+        ts = misc.now().strftime("%Y%m%dT%H%M%S")
+        suffix = secrets.token_hex(4)
+        base = f"{base_prefix}_{ts}_{suffix}"
+
+        if attempt is not None:
+            base = f"{base}_attempt{attempt}"
+
+        if subject:
+            safe_subject = re.sub(r"[^A-Za-z0-9_-]", "_", subject)
+            base = f"{base}_{safe_subject}"
+
+        screenshot_path = output_dir / f"{base}.png"
+        html_path = output_dir / f"{base}.html"
+        json_path = output_dir / f"{base}.json"
+        log_path = output_dir / f"{base}.log"
+
+        if page:
+            try:
+                await page.save_screenshot(str(screenshot_path))
+                result.add_saved(screenshot_path)
+            except Exception as exc:  # noqa: BLE001
+                LOG.debug("Diagnostics screenshot capture failed: %s", exc)
+
+            try:
+                html = await page.get_content()
+                await asyncio.to_thread(html_path.write_text, html, encoding = "utf-8")
+                result.add_saved(html_path)
+            except Exception as exc:  # noqa: BLE001
+                LOG.debug("Diagnostics HTML capture failed: %s", exc)
+
+        if json_payload is not None:
+            try:
+                await asyncio.to_thread(_write_json_sync, json_path, json_payload)
+                result.add_saved(json_path)
+            except Exception as exc:  # noqa: BLE001
+                LOG.debug("Diagnostics JSON capture failed: %s", exc)
+
+        if copy_log and log_file_path:
+            try:
+                copy_succeeded = await asyncio.to_thread(_copy_log_sync, log_file_path, log_path)
+                if copy_succeeded:
+                    result.add_saved(log_path)
+            except Exception as exc:  # noqa: BLE001
+                LOG.debug("Diagnostics log copy failed: %s", exc)
+
+        # Determine if any capture was actually requested
+        capture_requested = page is not None or json_payload is not None or (copy_log and log_file_path)
+
+        if result.has_any():
+            artifacts_str = " ".join(map(str, result.saved_artifacts))
+            LOG.info("Diagnostics saved: %s", artifacts_str)
+        elif capture_requested:
+            LOG.warning("Diagnostics capture attempted but no artifacts were saved (all captures failed)")
+        else:
+            LOG.debug("No diagnostics capture requested")
+    except Exception as exc:  # noqa: BLE001
+        LOG.debug("Diagnostics capture failed: %s", exc)
+
+    return result