mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 18:41:50 +01:00
fix: auth probe + diagnostics for UNKNOWN states (#791)
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||
import atexit, asyncio, enum, json, os, re, signal, sys, textwrap # isort: skip
|
||||
import atexit, asyncio, enum, json, os, re, secrets, signal, sys, textwrap # isort: skip
|
||||
import getopt # pylint: disable=deprecated-module
|
||||
import urllib.parse as urllib_parse
|
||||
from datetime import datetime
|
||||
@@ -39,6 +39,12 @@ class AdUpdateStrategy(enum.Enum):
|
||||
MODIFY = enum.auto()
|
||||
|
||||
|
||||
class LoginState(enum.Enum):
|
||||
LOGGED_IN = enum.auto()
|
||||
LOGGED_OUT = enum.auto()
|
||||
UNKNOWN = enum.auto()
|
||||
|
||||
|
||||
def _repost_cycle_ready(ad_cfg:Ad, ad_file_relative:str) -> bool:
|
||||
"""
|
||||
Check if the repost cycle delay has been satisfied.
|
||||
@@ -55,7 +61,7 @@ def _repost_cycle_ready(ad_cfg:Ad, ad_file_relative:str) -> bool:
|
||||
if total_reposts <= delay_reposts:
|
||||
remaining = (delay_reposts + 1) - total_reposts
|
||||
LOG.info(
|
||||
"Auto price reduction delayed for [%s]: waiting %s more reposts (completed %s, applied %s reductions)",
|
||||
_("Auto price reduction delayed for [%s]: waiting %s more reposts (completed %s, applied %s reductions)"),
|
||||
ad_file_relative,
|
||||
max(remaining, 1), # Clamp to 1 to avoid showing "0 more reposts" when at threshold
|
||||
total_reposts,
|
||||
@@ -64,7 +70,9 @@ def _repost_cycle_ready(ad_cfg:Ad, ad_file_relative:str) -> bool:
|
||||
return False
|
||||
|
||||
if eligible_cycles <= applied_cycles:
|
||||
LOG.debug("Auto price reduction already applied for [%s]: %s reductions match %s eligible reposts", ad_file_relative, applied_cycles, eligible_cycles)
|
||||
LOG.debug(
|
||||
_("Auto price reduction already applied for [%s]: %s reductions match %s eligible reposts"), ad_file_relative, applied_cycles, eligible_cycles
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
@@ -175,6 +183,8 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||||
self.ads_selector = "due"
|
||||
self.keep_old_ads = False
|
||||
|
||||
self._login_detection_diagnostics_captured:bool = False
|
||||
|
||||
def __del__(self) -> None:
|
||||
if self.file_log:
|
||||
self.file_log.close()
|
||||
@@ -802,10 +812,15 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||||
if getattr(self, "page", None) is not None:
|
||||
LOG.debug("Current page URL after opening homepage: %s", self.page.url)
|
||||
|
||||
if await self.is_logged_in():
|
||||
state = await self.get_login_state()
|
||||
if state == LoginState.LOGGED_IN:
|
||||
LOG.info("Already logged in as [%s]. Skipping login.", self.config.login.username)
|
||||
return
|
||||
|
||||
if state == LoginState.UNKNOWN:
|
||||
LOG.warning("Login state is UNKNOWN - cannot determine if already logged in. Skipping login attempt.")
|
||||
return
|
||||
|
||||
LOG.info("Opening login page...")
|
||||
await self.web_open(f"{self.root_url}/m-einloggen.html?targetUrl=/")
|
||||
|
||||
@@ -813,12 +828,18 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||||
await self.handle_after_login_logic()
|
||||
|
||||
# Sometimes a second login is required
|
||||
if not await self.is_logged_in():
|
||||
state = await self.get_login_state()
|
||||
if state == LoginState.UNKNOWN:
|
||||
LOG.warning("Login state is UNKNOWN after first login attempt - cannot determine login status. Aborting login process.")
|
||||
return
|
||||
|
||||
if state == LoginState.LOGGED_OUT:
|
||||
LOG.debug("First login attempt did not succeed, trying second login attempt")
|
||||
await self.fill_login_data_and_send()
|
||||
await self.handle_after_login_logic()
|
||||
|
||||
if await self.is_logged_in():
|
||||
state = await self.get_login_state()
|
||||
if state == LoginState.LOGGED_IN:
|
||||
LOG.debug("Second login attempt succeeded")
|
||||
else:
|
||||
LOG.warning("Second login attempt also failed - login may not have succeeded")
|
||||
@@ -859,7 +880,120 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||||
# GDPR banner not shown within timeout.
|
||||
pass
|
||||
|
||||
async def is_logged_in(self) -> bool:
|
||||
async def _auth_probe_login_state(self) -> LoginState:
|
||||
"""Probe an auth-required endpoint to classify login state.
|
||||
|
||||
The probe is non-mutating (GET request). It is used as a primary method by
|
||||
get_login_state() to classify login state, falling back to DOM checks only when
|
||||
the probe returns UNKNOWN.
|
||||
"""
|
||||
|
||||
url = f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"
|
||||
try:
|
||||
response = await self.web_request(url, valid_response_codes = [200, 401, 403])
|
||||
except (TimeoutError, AssertionError):
|
||||
# AssertionError can occur when web_request() fails to parse the response (e.g., unexpected content type)
|
||||
# Treat both timeout and assertion failures as UNKNOWN to avoid false assumptions about login state
|
||||
return LoginState.UNKNOWN
|
||||
|
||||
status_code = response.get("statusCode")
|
||||
if status_code in {401, 403}:
|
||||
return LoginState.LOGGED_OUT
|
||||
|
||||
content = response.get("content", "")
|
||||
if not isinstance(content, str):
|
||||
return LoginState.UNKNOWN
|
||||
|
||||
try:
|
||||
payload = json.loads(content)
|
||||
except json.JSONDecodeError:
|
||||
lowered = content.lower()
|
||||
if "m-einloggen" in lowered or "login-email" in lowered or "login-password" in lowered or "login-form" in lowered:
|
||||
return LoginState.LOGGED_OUT
|
||||
return LoginState.UNKNOWN
|
||||
|
||||
if isinstance(payload, dict) and "ads" in payload:
|
||||
return LoginState.LOGGED_IN
|
||||
|
||||
return LoginState.UNKNOWN
|
||||
|
||||
async def get_login_state(self) -> LoginState:
|
||||
"""Determine current login state using layered detection.
|
||||
|
||||
Order:
|
||||
1) Server-side auth probe via `_auth_probe_login_state` (preferred)
|
||||
2) DOM-based check via `is_logged_in(include_probe=False)`
|
||||
3) If still inconclusive, capture diagnostics via
|
||||
`_capture_login_detection_diagnostics_if_enabled` and return `UNKNOWN`
|
||||
"""
|
||||
# Prefer the deterministic, server-side auth probe first.
|
||||
# SPA/hydration delays can cause DOM-based checks to temporarily miss login indicators.
|
||||
state = await self._auth_probe_login_state()
|
||||
if state != LoginState.UNKNOWN:
|
||||
return state
|
||||
|
||||
# Fall back to DOM-based checks only when the probe is inconclusive.
|
||||
if await self.is_logged_in(include_probe = False):
|
||||
return LoginState.LOGGED_IN
|
||||
|
||||
await self._capture_login_detection_diagnostics_if_enabled()
|
||||
return LoginState.UNKNOWN
|
||||
|
||||
def _diagnostics_output_dir(self) -> Path:
|
||||
diagnostics = getattr(self.config, "diagnostics", None)
|
||||
if diagnostics is not None and diagnostics.output_dir and diagnostics.output_dir.strip():
|
||||
return Path(abspath(diagnostics.output_dir, relative_to = self.config_file_path)).resolve()
|
||||
|
||||
if self.installation_mode_or_portable == "xdg":
|
||||
return xdg_paths.get_xdg_base_dir("cache") / "diagnostics"
|
||||
|
||||
return (Path.cwd() / ".temp" / "diagnostics").resolve()
|
||||
|
||||
async def _capture_login_detection_diagnostics_if_enabled(self) -> None:
|
||||
diagnostics = getattr(self.config, "diagnostics", None)
|
||||
if diagnostics is None or not diagnostics.login_detection_capture:
|
||||
return
|
||||
|
||||
if self._login_detection_diagnostics_captured:
|
||||
return
|
||||
|
||||
page = getattr(self, "page", None)
|
||||
if page is None:
|
||||
return
|
||||
|
||||
self._login_detection_diagnostics_captured = True
|
||||
|
||||
try:
|
||||
out_dir = self._diagnostics_output_dir()
|
||||
out_dir.mkdir(parents = True, exist_ok = True)
|
||||
|
||||
# Intentionally no username/PII in filename.
|
||||
ts = misc.now().strftime("%Y%m%dT%H%M%S")
|
||||
suffix = secrets.token_hex(4)
|
||||
base = f"login_detection_unknown_{ts}_{suffix}"
|
||||
screenshot_path = out_dir / f"{base}.png"
|
||||
html_path = out_dir / f"{base}.html"
|
||||
|
||||
try:
|
||||
await page.save_screenshot(str(screenshot_path))
|
||||
except Exception as exc: # noqa: BLE001
|
||||
LOG.debug("Login diagnostics screenshot capture failed: %s", exc)
|
||||
|
||||
try:
|
||||
html = await page.get_content()
|
||||
html_path.write_text(html, encoding = "utf-8")
|
||||
except Exception as exc: # noqa: BLE001
|
||||
LOG.debug("Login diagnostics HTML capture failed: %s", exc)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
LOG.debug("Login diagnostics capture failed: %s", exc)
|
||||
|
||||
if getattr(diagnostics, "pause_on_login_detection_failure", False) and getattr(sys.stdin, "isatty", lambda: False)():
|
||||
LOG.warning("############################################")
|
||||
LOG.warning("# Login detection returned UNKNOWN. Browser is paused for manual inspection.")
|
||||
LOG.warning("############################################")
|
||||
await ainput(_("Press a key to continue..."))
|
||||
|
||||
async def is_logged_in(self, *, include_probe:bool = True) -> bool:
|
||||
# Use login_detection timeout (10s default) instead of default (5s)
|
||||
# to allow sufficient time for client-side JavaScript rendering after page load.
|
||||
# This is especially important for older sessions (20+ days) that require
|
||||
@@ -867,7 +1001,11 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||||
login_check_timeout = self._timeout("login_detection")
|
||||
effective_timeout = self._effective_timeout("login_detection")
|
||||
username = self.config.login.username.lower()
|
||||
LOG.debug("Starting login detection (timeout: %.1fs base, %.1fs effective with multiplier/backoff)", login_check_timeout, effective_timeout)
|
||||
LOG.debug(
|
||||
"Starting login detection (timeout: %.1fs base, %.1fs effective with multiplier/backoff)",
|
||||
login_check_timeout,
|
||||
effective_timeout,
|
||||
)
|
||||
|
||||
# Try to find the standard element first
|
||||
try:
|
||||
@@ -887,7 +1025,15 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||||
except TimeoutError:
|
||||
LOG.debug("Timeout waiting for #user-email element after %.1fs", effective_timeout)
|
||||
|
||||
LOG.debug("No login detected - neither .mr-medium nor #user-email found with username")
|
||||
if not include_probe:
|
||||
LOG.debug("No login detected - neither .mr-medium nor #user-email found with username")
|
||||
return False
|
||||
|
||||
state = await self._auth_probe_login_state()
|
||||
if state == LoginState.LOGGED_IN:
|
||||
return True
|
||||
|
||||
LOG.debug("No login detected - DOM elements not found and server probe returned %s", state.name)
|
||||
return False
|
||||
|
||||
async def delete_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None:
|
||||
@@ -1384,7 +1530,10 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||||
await self.web_input(By.ID, "postad-phonenumber", contact.phone)
|
||||
except TimeoutError:
|
||||
LOG.warning(
|
||||
"Phone number field not present on page. This is expected for many private accounts; commercial accounts may still support phone numbers."
|
||||
_(
|
||||
"Phone number field not present on page. This is expected for many private accounts; "
|
||||
"commercial accounts may still support phone numbers."
|
||||
)
|
||||
)
|
||||
|
||||
async def update_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None:
|
||||
|
||||
@@ -194,6 +194,28 @@ class TimeoutConfig(ContextualModel):
|
||||
return base * self.multiplier * backoff
|
||||
|
||||
|
||||
class DiagnosticsConfig(ContextualModel):
|
||||
login_detection_capture:bool = Field(
|
||||
default = False,
|
||||
description = "If true, capture diagnostics artifacts (screenshot + HTML) when login detection returns UNKNOWN.",
|
||||
)
|
||||
pause_on_login_detection_failure:bool = Field(
|
||||
default = False,
|
||||
description = "If true, pause (interactive runs only) after capturing login detection diagnostics "
|
||||
"so that user can inspect the browser. Requires login_detection_capture to be enabled.",
|
||||
)
|
||||
output_dir:str | None = Field(
|
||||
default = None,
|
||||
description = "Optional output directory for diagnostics artifacts. If omitted, a safe default is used based on installation mode.",
|
||||
)
|
||||
|
||||
@model_validator(mode = "after")
|
||||
def _validate_pause_requires_capture(self) -> "DiagnosticsConfig":
|
||||
if self.pause_on_login_detection_failure and not self.login_detection_capture:
|
||||
raise ValueError(_("pause_on_login_detection_failure requires login_detection_capture to be enabled"))
|
||||
return self
|
||||
|
||||
|
||||
def _validate_glob_pattern(v:str) -> str:
|
||||
if not v.strip():
|
||||
raise ValueError("must be a non-empty, non-blank glob pattern")
|
||||
@@ -206,12 +228,13 @@ GlobPattern = Annotated[str, AfterValidator(_validate_glob_pattern)]
|
||||
class Config(ContextualModel):
|
||||
ad_files:list[GlobPattern] = Field(
|
||||
default_factory = lambda: ["./**/ad_*.{json,yml,yaml}"],
|
||||
min_items = 1,
|
||||
json_schema_extra = {"default": ["./**/ad_*.{json,yml,yaml}"]},
|
||||
min_length = 1,
|
||||
description = """
|
||||
glob (wildcard) patterns to select ad configuration files
|
||||
if relative paths are specified, then they are relative to this configuration file
|
||||
""",
|
||||
) # type: ignore[call-overload]
|
||||
)
|
||||
|
||||
ad_defaults:AdDefaults = Field(default_factory = AdDefaults, description = "Default values for ads, can be overwritten in each ad configuration file")
|
||||
|
||||
@@ -235,6 +258,7 @@ Example:
|
||||
captcha:CaptchaConfig = Field(default_factory = CaptchaConfig)
|
||||
update_check:UpdateCheckConfig = Field(default_factory = UpdateCheckConfig, description = "Update check configuration")
|
||||
timeouts:TimeoutConfig = Field(default_factory = TimeoutConfig, description = "Centralized timeout configuration.")
|
||||
diagnostics:DiagnosticsConfig | None = Field(default = None, description = "Optional failure-only diagnostics capture.")
|
||||
|
||||
def with_values(self, values:dict[str, Any]) -> Config:
|
||||
return Config.model_validate(dicts.apply_defaults(copy.deepcopy(values), defaults = self.model_dump()))
|
||||
|
||||
@@ -59,11 +59,17 @@ kleinanzeigen_bot/__init__.py:
|
||||
"Captcha recognized - auto-restart enabled, abort run...": "Captcha erkannt - Auto-Neustart aktiviert, Durchlauf wird beendet..."
|
||||
"Press a key to continue...": "Eine Taste drücken, um fortzufahren..."
|
||||
|
||||
_capture_login_detection_diagnostics_if_enabled:
|
||||
"# Login detection returned UNKNOWN. Browser is paused for manual inspection.": "# Login-Erkennung ergab UNKNOWN. Browser ist zur manuellen Prüfung angehalten."
|
||||
"Press a key to continue...": "Eine Taste drücken, um fortzufahren..."
|
||||
|
||||
login:
|
||||
"Checking if already logged in...": "Überprüfe, ob bereits eingeloggt..."
|
||||
"Current page URL after opening homepage: %s": "Aktuelle Seiten-URL nach dem Öffnen der Startseite: %s"
|
||||
"Already logged in as [%s]. Skipping login.": "Bereits eingeloggt als [%s]. Überspringe Anmeldung."
|
||||
"Opening login page...": "Öffne Anmeldeseite..."
|
||||
"Login state is UNKNOWN - cannot determine if already logged in. Skipping login attempt.": "Login-Status ist UNKNOWN - kann nicht bestimmt werden, ob bereits eingeloggt ist. Überspringe Anmeldeversuch."
|
||||
"Login state is UNKNOWN after first login attempt - cannot determine login status. Aborting login process.": "Login-Status ist UNKNOWN nach dem ersten Anmeldeversuch - kann Login-Status nicht bestimmen. Breche Anmeldeprozess ab."
|
||||
"First login attempt did not succeed, trying second login attempt": "Erster Anmeldeversuch war nicht erfolgreich, versuche zweiten Anmeldeversuch"
|
||||
"Second login attempt succeeded": "Zweiter Anmeldeversuch erfolgreich"
|
||||
"Second login attempt also failed - login may not have succeeded": "Zweiter Anmeldeversuch ebenfalls fehlgeschlagen - Anmeldung möglicherweise nicht erfolgreich"
|
||||
@@ -75,6 +81,7 @@ kleinanzeigen_bot/__init__.py:
|
||||
"Login detected via #user-email element": "Login erkannt über #user-email Element"
|
||||
"Timeout waiting for #user-email element after %.1fs": "Timeout beim Warten auf #user-email Element nach %.1fs"
|
||||
"No login detected - neither .mr-medium nor #user-email found with username": "Kein Login erkannt - weder .mr-medium noch #user-email mit Benutzername gefunden"
|
||||
"No login detected - DOM elements not found and server probe returned %s": "Kein Login erkannt - DOM-Elemente nicht gefunden und Server-Probe ergab %s"
|
||||
|
||||
handle_after_login_logic:
|
||||
"# Device verification message detected. Please follow the instruction displayed in the Browser.": "# Nachricht zur Geräteverifizierung erkannt. Bitte den Anweisungen im Browser folgen."
|
||||
@@ -596,6 +603,8 @@ kleinanzeigen_bot/model/config_model.py:
|
||||
"amount must be specified when auto_price_reduction is enabled": "amount muss angegeben werden, wenn auto_price_reduction aktiviert ist"
|
||||
"min_price must be specified when auto_price_reduction is enabled": "min_price muss angegeben werden, wenn auto_price_reduction aktiviert ist"
|
||||
"Percentage reduction amount must not exceed %s": "Prozentuale Reduktionsmenge darf %s nicht überschreiten"
|
||||
_validate_pause_requires_capture:
|
||||
"pause_on_login_detection_failure requires login_detection_capture to be enabled": "pause_on_login_detection_failure erfordert, dass login_detection_capture aktiviert ist"
|
||||
|
||||
#################################################
|
||||
kleinanzeigen_bot/model/ad_model.py:
|
||||
|
||||
Reference in New Issue
Block a user