diff --git a/README.md b/README.md index d763a37..b1e0c94 100644 --- a/README.md +++ b/README.md @@ -318,7 +318,7 @@ timeouts: captcha_detection: 2.0 # Timeout for captcha iframe detection sms_verification: 4.0 # Timeout for SMS verification banners gdpr_prompt: 10.0 # Timeout when handling GDPR dialogs - login_detection: 10.0 # Timeout for detecting existing login session via DOM elements + login_detection: 10.0 # Timeout for DOM-based login detection fallback (auth probe is tried first) publishing_result: 300.0 # Timeout for publishing status checks publishing_confirmation: 20.0 # Timeout for publish confirmation redirect image_upload: 30.0 # Timeout for image upload and server-side processing @@ -383,9 +383,47 @@ update_check: login: username: "" password: "" + +# diagnostics (optional) - see "Login Detection Behavior" section below for usage details +diagnostics: + login_detection_capture: false # Capture screenshot + HTML when login state is UNKNOWN + pause_on_login_detection_failure: false # Pause for manual inspection (interactive only) + output_dir: "" # Custom output directory (default: portable .temp/diagnostics, xdg cache/diagnostics) ``` -Slow networks or sluggish remote browsers often just need a higher `timeouts.multiplier`, while truly problematic selectors can get explicit values directly under `timeouts`. Remember to regenerate the schemas after changing the configuration model so editors stay in sync. +Slow networks or sluggish remote browsers often just need a higher `timeouts.multiplier`, while truly problematic selectors can get explicit values directly under `timeouts`. + +> **Developer Note:** Remember to regenerate the schemas after changing the configuration model so editors stay in sync. + +### Login Detection Behavior + +The bot uses a **server-side auth probe** to detect login state more reliably: + +1. **Auth probe (primary method)**: Sends a GET request to `{root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT` + - Returns `LOGGED_IN` if response is HTTP 200 with valid JSON containing `"ads"` key + - Returns `LOGGED_OUT` if response is HTTP 401/403 or HTML contains login markers + - Returns `UNKNOWN` on timeouts, assertion failures, or unexpected response bodies + +2. **DOM fallback**: Only consulted when auth probe returns `UNKNOWN` + - Looks for `.mr-medium` element containing username + - Falls back to `#user-email` ID + - Uses `login_detection` timeout (default: 10.0 seconds) + +This approach reduces unnecessary re-login attempts because the server-side probe is not affected by client-side rendering delays (SPA hydration) or A/B test variations, though it may return UNKNOWN and fall back to DOM-based checks. + +**⚠️ PII Warning:** HTML dumps may contain your account email or other personally identifiable information. Review files in the diagnostics output directory before sharing them publicly. + +**Optional diagnostics** help troubleshoot login detection issues: + +- Enable `login_detection_capture` to capture screenshots and HTML dumps when state is `UNKNOWN` +- Enable `pause_on_login_detection_failure` to pause the bot for manual inspection (interactive sessions only; requires `login_detection_capture=true`) +- Use custom `output_dir` to specify where artifacts are saved + +**Output locations (default):** + +- **Portable mode**: `./.temp/diagnostics/` +- **System-wide mode (XDG)**: `~/.cache/kleinanzeigen-bot/diagnostics/` (Linux) or `~/Library/Caches/kleinanzeigen-bot/diagnostics/` (macOS) +- **Custom**: Path resolved relative to your `config.yaml` if `output_dir` is specified ### 2) Ad configuration diff --git a/docs/BROWSER_TROUBLESHOOTING.md b/docs/BROWSER_TROUBLESHOOTING.md index 7ed5bd6..0d1e03e 100644 --- a/docs/BROWSER_TROUBLESHOOTING.md +++ b/docs/BROWSER_TROUBLESHOOTING.md @@ -78,20 +78,48 @@ The bot will also provide specific instructions on how to fix your configuration - Intermittent (50/50) login detection behavior - More common with profiles unused for 20+ days +**How login detection works:** +The bot checks your login status using a fast server request first, with a fallback to checking page elements if needed. + +The bot uses a **server-side auth probe** as the primary method to detect login state: + +1. **Auth probe (preferred)**: Sends a GET request to `{root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT` + - Returns `LOGGED_IN` if the response is HTTP 200 with valid JSON containing `"ads"` key + - Returns `LOGGED_OUT` if response is HTTP 401/403 or HTML contains login markers + - Returns `UNKNOWN` on timeouts, assertion failures, or unexpected response bodies + +2. **DOM fallback**: Only used when the auth probe returns `UNKNOWN` + - Looks for `.mr-medium` element containing username + - Falls back to `#user-email` ID + - Uses the `login_detection` timeout (default: 10.0 seconds with effective timeout with retry/backoff) + +3. **Diagnostics capture**: If the state remains `UNKNOWN` and `diagnostics.login_detection_capture` is enabled + - Captures a screenshot and HTML dump for troubleshooting + - Pauses for manual inspection if `diagnostics.pause_on_login_detection_failure` is enabled and running in an interactive terminal + **What `login_detection` controls:** - Maximum time (seconds) to wait for user profile DOM elements when checking if already logged in -- Default: `10.0` seconds (provides ~22.5s total with retry/backoff) +- Default: `10.0` seconds (effective timeout with retry/backoff) - Used at startup before attempting login +- Note: With the new auth probe, this timeout only applies to the DOM fallback path **When to increase `login_detection`:** - Frequent unnecessary re-logins despite being authenticated - Slow or unstable network connection - Using browser profiles that haven't been active for weeks +> **⚠️ PII Warning:** HTML dumps captured by diagnostics may contain your account email or other personally identifiable information. Review files in the diagnostics output directory before sharing them publicly. + **Example:** ```yaml timeouts: login_detection: 15.0 # For slower networks or old sessions + +# Enable diagnostics when troubleshooting login detection issues +diagnostics: + login_detection_capture: true # Capture artifacts on UNKNOWN state + pause_on_login_detection_failure: true # Pause for inspection (interactive only) + output_dir: "./diagnostics" # Custom output directory (optional) ``` ## Common Issues and Solutions diff --git a/schemas/config.schema.json b/schemas/config.schema.json index e1be95a..b8d8d92 100644 --- a/schemas/config.schema.json +++ b/schemas/config.schema.json @@ -50,7 +50,7 @@ } ], "default": null, - "description": " suffix for the ad description", + "description": "suffix for the ad description", "title": "Description Suffix" }, "price_type": { @@ -367,6 +367,37 @@ "title": "DescriptionAffixes", "type": "object" }, + "DiagnosticsConfig": { + "properties": { + "login_detection_capture": { + "default": false, + "description": "If true, capture diagnostics artifacts (screenshot + HTML) when login detection returns UNKNOWN.", + "title": "Login Detection Capture", + "type": "boolean" + }, + "pause_on_login_detection_failure": { + "default": false, + "description": "If true, pause (interactive runs only) after capturing login detection diagnostics so that user can inspect the browser. Requires login_detection_capture to be enabled.", + "title": "Pause On Login Detection Failure", + "type": "boolean" + }, + "output_dir": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Optional output directory for diagnostics artifacts. If omitted, a safe default is used based on installation mode.", + "title": "Output Dir" + } + }, + "title": "DiagnosticsConfig", + "type": "object" + }, "DownloadConfig": { "properties": { "include_all_matching_shipping_options": { @@ -624,6 +655,9 @@ }, "properties": { "ad_files": { + "default": [ + "./**/ad_*.{json,yml,yaml}" + ], "description": "\nglob (wildcard) patterns to select ad configuration files\nif relative paths are specified, then they are relative to this configuration file\n", "items": { "type": "string" @@ -668,6 +702,18 @@ "timeouts": { "$ref": "#/$defs/TimeoutConfig", "description": "Centralized timeout configuration." + }, + "diagnostics": { + "anyOf": [ + { + "$ref": "#/$defs/DiagnosticsConfig" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Optional failure-only diagnostics capture." } }, "title": "Config", diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py index edf6dd9..50a5cba 100644 --- a/src/kleinanzeigen_bot/__init__.py +++ b/src/kleinanzeigen_bot/__init__.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: © Sebastian Thomschke and contributors # SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ -import atexit, asyncio, enum, json, os, re, signal, sys, textwrap # isort: skip +import atexit, asyncio, enum, json, os, re, secrets, signal, sys, textwrap # isort: skip import getopt # pylint: disable=deprecated-module import urllib.parse as urllib_parse from datetime import datetime @@ -39,6 +39,12 @@ class AdUpdateStrategy(enum.Enum): MODIFY = enum.auto() +class LoginState(enum.Enum): + LOGGED_IN = enum.auto() + LOGGED_OUT = enum.auto() + UNKNOWN = enum.auto() + + def _repost_cycle_ready(ad_cfg:Ad, ad_file_relative:str) -> bool: """ Check if the repost cycle delay has been satisfied. @@ -55,7 +61,7 @@ def _repost_cycle_ready(ad_cfg:Ad, ad_file_relative:str) -> bool: if total_reposts <= delay_reposts: remaining = (delay_reposts + 1) - total_reposts LOG.info( - "Auto price reduction delayed for [%s]: waiting %s more reposts (completed %s, applied %s reductions)", + _("Auto price reduction delayed for [%s]: waiting %s more reposts (completed %s, applied %s reductions)"), ad_file_relative, max(remaining, 1), # Clamp to 1 to avoid showing "0 more reposts" when at threshold total_reposts, @@ -64,7 +70,9 @@ def _repost_cycle_ready(ad_cfg:Ad, ad_file_relative:str) -> bool: return False if eligible_cycles <= applied_cycles: - LOG.debug("Auto price reduction already applied for [%s]: %s reductions match %s eligible reposts", ad_file_relative, applied_cycles, eligible_cycles) + LOG.debug( + _("Auto price reduction already applied for [%s]: %s reductions match %s eligible reposts"), ad_file_relative, applied_cycles, eligible_cycles + ) return False return True @@ -175,6 +183,8 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 self.ads_selector = "due" self.keep_old_ads = False + self._login_detection_diagnostics_captured:bool = False + def __del__(self) -> None: if self.file_log: self.file_log.close() @@ -802,10 +812,15 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 if getattr(self, "page", None) is not None: LOG.debug("Current page URL after opening homepage: %s", self.page.url) - if await self.is_logged_in(): + state = await self.get_login_state() + if state == LoginState.LOGGED_IN: LOG.info("Already logged in as [%s]. Skipping login.", self.config.login.username) return + if state == LoginState.UNKNOWN: + LOG.warning("Login state is UNKNOWN - cannot determine if already logged in. Skipping login attempt.") + return + LOG.info("Opening login page...") await self.web_open(f"{self.root_url}/m-einloggen.html?targetUrl=/") @@ -813,12 +828,18 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 await self.handle_after_login_logic() # Sometimes a second login is required - if not await self.is_logged_in(): + state = await self.get_login_state() + if state == LoginState.UNKNOWN: + LOG.warning("Login state is UNKNOWN after first login attempt - cannot determine login status. Aborting login process.") + return + + if state == LoginState.LOGGED_OUT: LOG.debug("First login attempt did not succeed, trying second login attempt") await self.fill_login_data_and_send() await self.handle_after_login_logic() - if await self.is_logged_in(): + state = await self.get_login_state() + if state == LoginState.LOGGED_IN: LOG.debug("Second login attempt succeeded") else: LOG.warning("Second login attempt also failed - login may not have succeeded") @@ -859,7 +880,120 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 # GDPR banner not shown within timeout. pass - async def is_logged_in(self) -> bool: + async def _auth_probe_login_state(self) -> LoginState: + """Probe an auth-required endpoint to classify login state. + + The probe is non-mutating (GET request). It is used as a primary method by + get_login_state() to classify login state, falling back to DOM checks only when + the probe returns UNKNOWN. + """ + + url = f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT" + try: + response = await self.web_request(url, valid_response_codes = [200, 401, 403]) + except (TimeoutError, AssertionError): + # AssertionError can occur when web_request() fails to parse the response (e.g., unexpected content type) + # Treat both timeout and assertion failures as UNKNOWN to avoid false assumptions about login state + return LoginState.UNKNOWN + + status_code = response.get("statusCode") + if status_code in {401, 403}: + return LoginState.LOGGED_OUT + + content = response.get("content", "") + if not isinstance(content, str): + return LoginState.UNKNOWN + + try: + payload = json.loads(content) + except json.JSONDecodeError: + lowered = content.lower() + if "m-einloggen" in lowered or "login-email" in lowered or "login-password" in lowered or "login-form" in lowered: + return LoginState.LOGGED_OUT + return LoginState.UNKNOWN + + if isinstance(payload, dict) and "ads" in payload: + return LoginState.LOGGED_IN + + return LoginState.UNKNOWN + + async def get_login_state(self) -> LoginState: + """Determine current login state using layered detection. + + Order: + 1) Server-side auth probe via `_auth_probe_login_state` (preferred) + 2) DOM-based check via `is_logged_in(include_probe=False)` + 3) If still inconclusive, capture diagnostics via + `_capture_login_detection_diagnostics_if_enabled` and return `UNKNOWN` + """ + # Prefer the deterministic, server-side auth probe first. + # SPA/hydration delays can cause DOM-based checks to temporarily miss login indicators. + state = await self._auth_probe_login_state() + if state != LoginState.UNKNOWN: + return state + + # Fall back to DOM-based checks only when the probe is inconclusive. + if await self.is_logged_in(include_probe = False): + return LoginState.LOGGED_IN + + await self._capture_login_detection_diagnostics_if_enabled() + return LoginState.UNKNOWN + + def _diagnostics_output_dir(self) -> Path: + diagnostics = getattr(self.config, "diagnostics", None) + if diagnostics is not None and diagnostics.output_dir and diagnostics.output_dir.strip(): + return Path(abspath(diagnostics.output_dir, relative_to = self.config_file_path)).resolve() + + if self.installation_mode_or_portable == "xdg": + return xdg_paths.get_xdg_base_dir("cache") / "diagnostics" + + return (Path.cwd() / ".temp" / "diagnostics").resolve() + + async def _capture_login_detection_diagnostics_if_enabled(self) -> None: + diagnostics = getattr(self.config, "diagnostics", None) + if diagnostics is None or not diagnostics.login_detection_capture: + return + + if self._login_detection_diagnostics_captured: + return + + page = getattr(self, "page", None) + if page is None: + return + + self._login_detection_diagnostics_captured = True + + try: + out_dir = self._diagnostics_output_dir() + out_dir.mkdir(parents = True, exist_ok = True) + + # Intentionally no username/PII in filename. + ts = misc.now().strftime("%Y%m%dT%H%M%S") + suffix = secrets.token_hex(4) + base = f"login_detection_unknown_{ts}_{suffix}" + screenshot_path = out_dir / f"{base}.png" + html_path = out_dir / f"{base}.html" + + try: + await page.save_screenshot(str(screenshot_path)) + except Exception as exc: # noqa: BLE001 + LOG.debug("Login diagnostics screenshot capture failed: %s", exc) + + try: + html = await page.get_content() + html_path.write_text(html, encoding = "utf-8") + except Exception as exc: # noqa: BLE001 + LOG.debug("Login diagnostics HTML capture failed: %s", exc) + except Exception as exc: # noqa: BLE001 + LOG.debug("Login diagnostics capture failed: %s", exc) + + if getattr(diagnostics, "pause_on_login_detection_failure", False) and getattr(sys.stdin, "isatty", lambda: False)(): + LOG.warning("############################################") + LOG.warning("# Login detection returned UNKNOWN. Browser is paused for manual inspection.") + LOG.warning("############################################") + await ainput(_("Press a key to continue...")) + + async def is_logged_in(self, *, include_probe:bool = True) -> bool: # Use login_detection timeout (10s default) instead of default (5s) # to allow sufficient time for client-side JavaScript rendering after page load. # This is especially important for older sessions (20+ days) that require @@ -867,7 +1001,11 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 login_check_timeout = self._timeout("login_detection") effective_timeout = self._effective_timeout("login_detection") username = self.config.login.username.lower() - LOG.debug("Starting login detection (timeout: %.1fs base, %.1fs effective with multiplier/backoff)", login_check_timeout, effective_timeout) + LOG.debug( + "Starting login detection (timeout: %.1fs base, %.1fs effective with multiplier/backoff)", + login_check_timeout, + effective_timeout, + ) # Try to find the standard element first try: @@ -887,7 +1025,15 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 except TimeoutError: LOG.debug("Timeout waiting for #user-email element after %.1fs", effective_timeout) - LOG.debug("No login detected - neither .mr-medium nor #user-email found with username") + if not include_probe: + LOG.debug("No login detected - neither .mr-medium nor #user-email found with username") + return False + + state = await self._auth_probe_login_state() + if state == LoginState.LOGGED_IN: + return True + + LOG.debug("No login detected - DOM elements not found and server probe returned %s", state.name) return False async def delete_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None: @@ -1384,7 +1530,10 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 await self.web_input(By.ID, "postad-phonenumber", contact.phone) except TimeoutError: LOG.warning( - "Phone number field not present on page. This is expected for many private accounts; commercial accounts may still support phone numbers." + _( + "Phone number field not present on page. This is expected for many private accounts; " + "commercial accounts may still support phone numbers." + ) ) async def update_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None: diff --git a/src/kleinanzeigen_bot/model/config_model.py b/src/kleinanzeigen_bot/model/config_model.py index ad803cb..f7ba421 100644 --- a/src/kleinanzeigen_bot/model/config_model.py +++ b/src/kleinanzeigen_bot/model/config_model.py @@ -194,6 +194,28 @@ class TimeoutConfig(ContextualModel): return base * self.multiplier * backoff +class DiagnosticsConfig(ContextualModel): + login_detection_capture:bool = Field( + default = False, + description = "If true, capture diagnostics artifacts (screenshot + HTML) when login detection returns UNKNOWN.", + ) + pause_on_login_detection_failure:bool = Field( + default = False, + description = "If true, pause (interactive runs only) after capturing login detection diagnostics " + "so that user can inspect the browser. Requires login_detection_capture to be enabled.", + ) + output_dir:str | None = Field( + default = None, + description = "Optional output directory for diagnostics artifacts. If omitted, a safe default is used based on installation mode.", + ) + + @model_validator(mode = "after") + def _validate_pause_requires_capture(self) -> "DiagnosticsConfig": + if self.pause_on_login_detection_failure and not self.login_detection_capture: + raise ValueError(_("pause_on_login_detection_failure requires login_detection_capture to be enabled")) + return self + + def _validate_glob_pattern(v:str) -> str: if not v.strip(): raise ValueError("must be a non-empty, non-blank glob pattern") @@ -206,12 +228,13 @@ GlobPattern = Annotated[str, AfterValidator(_validate_glob_pattern)] class Config(ContextualModel): ad_files:list[GlobPattern] = Field( default_factory = lambda: ["./**/ad_*.{json,yml,yaml}"], - min_items = 1, + json_schema_extra = {"default": ["./**/ad_*.{json,yml,yaml}"]}, + min_length = 1, description = """ glob (wildcard) patterns to select ad configuration files if relative paths are specified, then they are relative to this configuration file """, - ) # type: ignore[call-overload] + ) ad_defaults:AdDefaults = Field(default_factory = AdDefaults, description = "Default values for ads, can be overwritten in each ad configuration file") @@ -235,6 +258,7 @@ Example: captcha:CaptchaConfig = Field(default_factory = CaptchaConfig) update_check:UpdateCheckConfig = Field(default_factory = UpdateCheckConfig, description = "Update check configuration") timeouts:TimeoutConfig = Field(default_factory = TimeoutConfig, description = "Centralized timeout configuration.") + diagnostics:DiagnosticsConfig | None = Field(default = None, description = "Optional failure-only diagnostics capture.") def with_values(self, values:dict[str, Any]) -> Config: return Config.model_validate(dicts.apply_defaults(copy.deepcopy(values), defaults = self.model_dump())) diff --git a/src/kleinanzeigen_bot/resources/translations.de.yaml b/src/kleinanzeigen_bot/resources/translations.de.yaml index 4a70845..392c404 100644 --- a/src/kleinanzeigen_bot/resources/translations.de.yaml +++ b/src/kleinanzeigen_bot/resources/translations.de.yaml @@ -59,11 +59,17 @@ kleinanzeigen_bot/__init__.py: "Captcha recognized - auto-restart enabled, abort run...": "Captcha erkannt - Auto-Neustart aktiviert, Durchlauf wird beendet..." "Press a key to continue...": "Eine Taste drücken, um fortzufahren..." + _capture_login_detection_diagnostics_if_enabled: + "# Login detection returned UNKNOWN. Browser is paused for manual inspection.": "# Login-Erkennung ergab UNKNOWN. Browser ist zur manuellen Prüfung angehalten." + "Press a key to continue...": "Eine Taste drücken, um fortzufahren..." + login: "Checking if already logged in...": "Überprüfe, ob bereits eingeloggt..." "Current page URL after opening homepage: %s": "Aktuelle Seiten-URL nach dem Öffnen der Startseite: %s" "Already logged in as [%s]. Skipping login.": "Bereits eingeloggt als [%s]. Überspringe Anmeldung." "Opening login page...": "Öffne Anmeldeseite..." + "Login state is UNKNOWN - cannot determine if already logged in. Skipping login attempt.": "Login-Status ist UNKNOWN - kann nicht bestimmt werden, ob bereits eingeloggt ist. Überspringe Anmeldeversuch." + "Login state is UNKNOWN after first login attempt - cannot determine login status. Aborting login process.": "Login-Status ist UNKNOWN nach dem ersten Anmeldeversuch - kann Login-Status nicht bestimmen. Breche Anmeldeprozess ab." "First login attempt did not succeed, trying second login attempt": "Erster Anmeldeversuch war nicht erfolgreich, versuche zweiten Anmeldeversuch" "Second login attempt succeeded": "Zweiter Anmeldeversuch erfolgreich" "Second login attempt also failed - login may not have succeeded": "Zweiter Anmeldeversuch ebenfalls fehlgeschlagen - Anmeldung möglicherweise nicht erfolgreich" @@ -75,6 +81,7 @@ kleinanzeigen_bot/__init__.py: "Login detected via #user-email element": "Login erkannt über #user-email Element" "Timeout waiting for #user-email element after %.1fs": "Timeout beim Warten auf #user-email Element nach %.1fs" "No login detected - neither .mr-medium nor #user-email found with username": "Kein Login erkannt - weder .mr-medium noch #user-email mit Benutzername gefunden" + "No login detected - DOM elements not found and server probe returned %s": "Kein Login erkannt - DOM-Elemente nicht gefunden und Server-Probe ergab %s" handle_after_login_logic: "# Device verification message detected. Please follow the instruction displayed in the Browser.": "# Nachricht zur Geräteverifizierung erkannt. Bitte den Anweisungen im Browser folgen." @@ -596,6 +603,8 @@ kleinanzeigen_bot/model/config_model.py: "amount must be specified when auto_price_reduction is enabled": "amount muss angegeben werden, wenn auto_price_reduction aktiviert ist" "min_price must be specified when auto_price_reduction is enabled": "min_price muss angegeben werden, wenn auto_price_reduction aktiviert ist" "Percentage reduction amount must not exceed %s": "Prozentuale Reduktionsmenge darf %s nicht überschreiten" + _validate_pause_requires_capture: + "pause_on_login_detection_failure requires login_detection_capture to be enabled": "pause_on_login_detection_failure erfordert, dass login_detection_capture aktiviert ist" ################################################# kleinanzeigen_bot/model/ad_model.py: diff --git a/tests/unit/test_config_model.py b/tests/unit/test_config_model.py index 0e48ecb..f4fa6dc 100644 --- a/tests/unit/test_config_model.py +++ b/tests/unit/test_config_model.py @@ -7,61 +7,27 @@ from kleinanzeigen_bot.model.config_model import AdDefaults, Config, TimeoutConf def test_migrate_legacy_description_prefix() -> None: - assert AdDefaults.model_validate({ - }).description_prefix is None + assert AdDefaults.model_validate({}).description_prefix is None - assert AdDefaults.model_validate({ - "description_prefix": "Prefix" - }).description_prefix == "Prefix" + assert AdDefaults.model_validate({"description_prefix": "Prefix"}).description_prefix == "Prefix" - assert AdDefaults.model_validate({ - "description_prefix": "Prefix", - "description": { - "prefix": "Legacy Prefix" - } - }).description_prefix == "Prefix" + assert AdDefaults.model_validate({"description_prefix": "Prefix", "description": {"prefix": "Legacy Prefix"}}).description_prefix == "Prefix" - assert AdDefaults.model_validate({ - "description": { - "prefix": "Legacy Prefix" - } - }).description_prefix == "Legacy Prefix" + assert AdDefaults.model_validate({"description": {"prefix": "Legacy Prefix"}}).description_prefix == "Legacy Prefix" - assert AdDefaults.model_validate({ - "description_prefix": "", - "description": { - "prefix": "Legacy Prefix" - } - }).description_prefix == "Legacy Prefix" + assert AdDefaults.model_validate({"description_prefix": "", "description": {"prefix": "Legacy Prefix"}}).description_prefix == "Legacy Prefix" def test_migrate_legacy_description_suffix() -> None: - assert AdDefaults.model_validate({ - }).description_suffix is None + assert AdDefaults.model_validate({}).description_suffix is None - assert AdDefaults.model_validate({ - "description_suffix": "Suffix" - }).description_suffix == "Suffix" + assert AdDefaults.model_validate({"description_suffix": "Suffix"}).description_suffix == "Suffix" - assert AdDefaults.model_validate({ - "description_suffix": "Suffix", - "description": { - "suffix": "Legacy Suffix" - } - }).description_suffix == "Suffix" + assert AdDefaults.model_validate({"description_suffix": "Suffix", "description": {"suffix": "Legacy Suffix"}}).description_suffix == "Suffix" - assert AdDefaults.model_validate({ - "description": { - "suffix": "Legacy Suffix" - } - }).description_suffix == "Legacy Suffix" + assert AdDefaults.model_validate({"description": {"suffix": "Legacy Suffix"}}).description_suffix == "Legacy Suffix" - assert AdDefaults.model_validate({ - "description_suffix": "", - "description": { - "suffix": "Legacy Suffix" - } - }).description_suffix == "Legacy Suffix" + assert AdDefaults.model_validate({"description_suffix": "", "description": {"suffix": "Legacy Suffix"}}).description_suffix == "Legacy Suffix" def test_minimal_config_validation() -> None: @@ -79,39 +45,32 @@ def test_minimal_config_validation() -> None: def test_timeout_config_defaults_and_effective_values() -> None: - cfg = Config.model_validate({ - "login": {"username": "dummy", "password": "dummy"}, # noqa: S105 - "timeouts": { - "multiplier": 2.0, - "pagination_initial": 12.0, - "retry_max_attempts": 3, - "retry_backoff_factor": 2.0 + cfg = Config.model_validate( + { + "login": {"username": "dummy", "password": "dummy"}, # noqa: S105 + "timeouts": {"multiplier": 2.0, "pagination_initial": 12.0, "retry_max_attempts": 3, "retry_backoff_factor": 2.0}, } - }) + ) timeouts = cfg.timeouts base = timeouts.resolve("pagination_initial") multiplier = timeouts.multiplier backoff = timeouts.retry_backoff_factor assert base == 12.0 - assert timeouts.effective("pagination_initial") == base * multiplier * (backoff ** 0) + assert timeouts.effective("pagination_initial") == base * multiplier * (backoff**0) # attempt 1 should apply backoff factor once in addition to multiplier - assert timeouts.effective("pagination_initial", attempt = 1) == base * multiplier * (backoff ** 1) + assert timeouts.effective("pagination_initial", attempt = 1) == base * multiplier * (backoff**1) def test_validate_glob_pattern_rejects_blank_strings() -> None: with pytest.raises(ValueError, match = "must be a non-empty, non-blank glob pattern"): - Config.model_validate({ - "ad_files": [" "], - "ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}}, - "login": {"username": "dummy", "password": "dummy"} - }) + Config.model_validate( + {"ad_files": [" "], "ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}}, "login": {"username": "dummy", "password": "dummy"}} + ) - cfg = Config.model_validate({ - "ad_files": ["*.yaml"], - "ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}}, - "login": {"username": "dummy", "password": "dummy"} - }) + cfg = Config.model_validate( + {"ad_files": ["*.yaml"], "ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}}, "login": {"username": "dummy", "password": "dummy"}} + ) assert cfg.ad_files == ["*.yaml"] @@ -123,3 +82,25 @@ def test_timeout_config_resolve_returns_specific_value() -> None: def test_timeout_config_resolve_falls_back_to_default() -> None: timeouts = TimeoutConfig(default = 3.0) assert timeouts.resolve("nonexistent_key") == 3.0 + + +def test_diagnostics_pause_requires_capture_validation() -> None: + """ + Unit: DiagnosticsConfig validator ensures pause_on_login_detection_failure + requires login_detection_capture to be enabled. + """ + minimal_cfg = { + "ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}}, + "login": {"username": "dummy", "password": "dummy"}, # noqa: S105 + "publishing": {"delete_old_ads": "BEFORE_PUBLISH", "delete_old_ads_by_title": False}, + } + + valid_cfg = {**minimal_cfg, "diagnostics": {"login_detection_capture": True, "pause_on_login_detection_failure": True}} + config = Config.model_validate(valid_cfg) + assert config.diagnostics is not None + assert config.diagnostics.pause_on_login_detection_failure is True + assert config.diagnostics.login_detection_capture is True + + invalid_cfg = {**minimal_cfg, "diagnostics": {"login_detection_capture": False, "pause_on_login_detection_failure": True}} + with pytest.raises(ValueError, match = "pause_on_login_detection_failure requires login_detection_capture to be enabled"): + Config.model_validate(invalid_cfg) diff --git a/tests/unit/test_init.py b/tests/unit/test_init.py index 662ee99..b66405a 100644 --- a/tests/unit/test_init.py +++ b/tests/unit/test_init.py @@ -12,10 +12,10 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest from pydantic import ValidationError -from kleinanzeigen_bot import LOG, AdUpdateStrategy, KleinanzeigenBot, misc +from kleinanzeigen_bot import LOG, AdUpdateStrategy, KleinanzeigenBot, LoginState, misc from kleinanzeigen_bot._version import __version__ from kleinanzeigen_bot.model.ad_model import Ad -from kleinanzeigen_bot.model.config_model import AdDefaults, Config, PublishingConfig +from kleinanzeigen_bot.model.config_model import AdDefaults, Config, DiagnosticsConfig, PublishingConfig from kleinanzeigen_bot.utils import dicts, loggers from kleinanzeigen_bot.utils.web_scraping_mixin import By, Element @@ -333,15 +333,151 @@ class TestKleinanzeigenBotAuthentication: @pytest.mark.asyncio async def test_is_logged_in_returns_false_when_not_logged_in(self, test_bot:KleinanzeigenBot) -> None: """Verify that login check returns false when not logged in.""" - with patch.object(test_bot, "web_text", side_effect = TimeoutError): + with ( + patch.object(test_bot, "web_text", side_effect = TimeoutError), + patch.object( + test_bot, + "web_request", + new_callable = AsyncMock, + return_value = {"statusCode": 200, "content": "login"}, + ), + ): assert await test_bot.is_logged_in() is False + @pytest.mark.asyncio + async def test_get_login_state_prefers_auth_probe_over_dom(self, test_bot:KleinanzeigenBot) -> None: + with ( + patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.LOGGED_IN) as probe, + patch.object(test_bot, "web_text", side_effect = AssertionError("DOM check must not run when probe is deterministic")) as web_text, + ): + assert await test_bot.get_login_state() == LoginState.LOGGED_IN + probe.assert_awaited_once() + web_text.assert_not_called() + + @pytest.mark.asyncio + async def test_get_login_state_falls_back_to_dom_when_probe_unknown(self, test_bot:KleinanzeigenBot) -> None: + with ( + patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN) as probe, + patch.object(test_bot, "web_text", new_callable = AsyncMock, return_value = "Welcome dummy_user") as web_text, + ): + assert await test_bot.get_login_state() == LoginState.LOGGED_IN + probe.assert_awaited_once() + web_text.assert_awaited_once() + + @pytest.mark.asyncio + async def test_get_login_state_prefers_logged_out_from_probe_over_dom(self, test_bot:KleinanzeigenBot) -> None: + with ( + patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.LOGGED_OUT) as probe, + patch.object(test_bot, "web_text", side_effect = AssertionError("DOM check must not run when probe is deterministic")) as web_text, + ): + assert await test_bot.get_login_state() == LoginState.LOGGED_OUT + probe.assert_awaited_once() + web_text.assert_not_called() + + @pytest.mark.asyncio + async def test_get_login_state_returns_unknown_when_probe_unknown_and_dom_inconclusive(self, test_bot:KleinanzeigenBot) -> None: + with ( + patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN) as probe, + patch.object(test_bot, "web_text", side_effect = TimeoutError) as web_text, + ): + assert await test_bot.get_login_state() == LoginState.UNKNOWN + probe.assert_awaited_once() + assert web_text.call_count == 2 + + @pytest.mark.asyncio + async def test_get_login_state_unknown_captures_diagnostics_when_enabled(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None: + test_bot.config.diagnostics = DiagnosticsConfig.model_validate({"login_detection_capture": True, "output_dir": str(tmp_path)}) + + page = MagicMock() + page.save_screenshot = AsyncMock() + page.get_content = AsyncMock(return_value = "") + test_bot.page = page + + with ( + patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN), + patch.object(test_bot, "web_text", side_effect = TimeoutError), + ): + assert await test_bot.get_login_state() == LoginState.UNKNOWN + + page.save_screenshot.assert_awaited_once() + page.get_content.assert_awaited_once() + + @pytest.mark.asyncio + async def test_get_login_state_unknown_does_not_capture_diagnostics_when_disabled(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None: + test_bot.config.diagnostics = DiagnosticsConfig.model_validate({"login_detection_capture": False, "output_dir": str(tmp_path)}) + + page = MagicMock() + page.save_screenshot = AsyncMock() + page.get_content = AsyncMock(return_value = "") + test_bot.page = page + + with ( + patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN), + patch.object(test_bot, "web_text", side_effect = TimeoutError), + ): + assert await test_bot.get_login_state() == LoginState.UNKNOWN + + page.save_screenshot.assert_not_called() + page.get_content.assert_not_called() + + @pytest.mark.asyncio + async def test_get_login_state_unknown_pauses_for_inspection_when_enabled_and_interactive(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None: + test_bot.config.diagnostics = DiagnosticsConfig.model_validate( + {"login_detection_capture": True, "pause_on_login_detection_failure": True, "output_dir": str(tmp_path)} + ) + + page = MagicMock() + page.save_screenshot = AsyncMock() + page.get_content = AsyncMock(return_value = "") + test_bot.page = page + + stdin_mock = MagicMock() + stdin_mock.isatty.return_value = True + + with ( + patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN), + patch.object(test_bot, "web_text", side_effect = TimeoutError), + patch("kleinanzeigen_bot.sys.stdin", stdin_mock), + patch("kleinanzeigen_bot.ainput", new_callable = AsyncMock) as mock_ainput, + ): + assert await test_bot.get_login_state() == LoginState.UNKNOWN + # Call twice to ensure the capture/pause guard triggers only once per process. + assert await test_bot.get_login_state() == LoginState.UNKNOWN + + page.save_screenshot.assert_awaited_once() + page.get_content.assert_awaited_once() + mock_ainput.assert_awaited_once() + + @pytest.mark.asyncio + async def test_get_login_state_unknown_does_not_pause_when_non_interactive(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None: + test_bot.config.diagnostics = DiagnosticsConfig.model_validate( + {"login_detection_capture": True, "pause_on_login_detection_failure": True, "output_dir": str(tmp_path)} + ) + + page = MagicMock() + page.save_screenshot = AsyncMock() + page.get_content = AsyncMock(return_value = "") + test_bot.page = page + + stdin_mock = MagicMock() + stdin_mock.isatty.return_value = False + + with ( + patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN), + patch.object(test_bot, "web_text", side_effect = TimeoutError), + patch("kleinanzeigen_bot.sys.stdin", stdin_mock), + patch("kleinanzeigen_bot.ainput", new_callable = AsyncMock) as mock_ainput, + ): + assert await test_bot.get_login_state() == LoginState.UNKNOWN + + mock_ainput.assert_not_called() + @pytest.mark.asyncio async def test_login_flow_completes_successfully(self, test_bot:KleinanzeigenBot) -> None: """Verify that normal login flow completes successfully.""" with ( patch.object(test_bot, "web_open") as mock_open, - patch.object(test_bot, "is_logged_in", side_effect = [False, True]) as mock_logged_in, + patch.object(test_bot, "get_login_state", new_callable = AsyncMock, side_effect = [LoginState.LOGGED_OUT, LoginState.LOGGED_IN]) as mock_logged_in, patch.object(test_bot, "web_find", side_effect = TimeoutError), patch.object(test_bot, "web_input") as mock_input, patch.object(test_bot, "web_click") as mock_click, @@ -358,7 +494,12 @@ class TestKleinanzeigenBotAuthentication: """Verify that login flow handles captcha correctly.""" with ( patch.object(test_bot, "web_open"), - patch.object(test_bot, "is_logged_in", side_effect = [False, False, True]), + patch.object( + test_bot, + "get_login_state", + new_callable = AsyncMock, + side_effect = [LoginState.LOGGED_OUT, LoginState.LOGGED_OUT, LoginState.LOGGED_IN], + ), patch.object(test_bot, "web_find") as mock_find, patch.object(test_bot, "web_input") as mock_input, patch.object(test_bot, "web_click") as mock_click,