fix: Auth0-Login-Migration und GDPR-Banner-Fix (#870)

This commit is contained in:
klangborste
2026-03-15 07:55:52 +01:00
committed by GitHub
parent 62fd5f6003
commit 6e562164b8
4 changed files with 887 additions and 348 deletions

View File

@@ -38,7 +38,10 @@ _LOGIN_DETECTION_SELECTORS:Final[list[tuple["By", str]]] = [
(By.CLASS_NAME, "mr-medium"), (By.CLASS_NAME, "mr-medium"),
(By.ID, "user-email"), (By.ID, "user-email"),
] ]
_LOGIN_DETECTION_SELECTOR_LABELS:Final[tuple[str, ...]] = ("user_info_primary", "user_info_secondary") _LOGGED_OUT_CTA_SELECTORS:Final[list[tuple["By", str]]] = [
(By.CSS_SELECTOR, 'a[href*="einloggen"]'),
(By.CSS_SELECTOR, 'a[href*="/m-einloggen"]'),
]
colorama.just_fix_windows_console() colorama.just_fix_windows_console()
@@ -997,95 +1000,203 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
await ainput(_("Press a key to continue...")) await ainput(_("Press a key to continue..."))
except TimeoutError: except TimeoutError:
# No captcha detected within timeout. page_context = "login page" if is_login_page else "publish flow"
pass LOG.debug("No captcha detected within timeout on %s", page_context)
async def login(self) -> None: async def login(self) -> None:
sso_navigation_timeout = self._timeout("page_load")
pre_login_gdpr_timeout = self._timeout("quick_dom")
LOG.info("Checking if already logged in...") LOG.info("Checking if already logged in...")
await self.web_open(f"{self.root_url}") await self.web_open(f"{self.root_url}")
if getattr(self, "page", None) is not None: try:
LOG.debug("Current page URL after opening homepage: %s", self.page.url) await self._click_gdpr_banner(timeout = pre_login_gdpr_timeout)
except TimeoutError:
LOG.debug("No GDPR banner detected before login")
state = await self.get_login_state(capture_diagnostics = False)
if state == LoginState.LOGGED_IN:
LOG.info("Already logged in. Skipping login.")
return
LOG.debug("Navigating to SSO login page (Auth0)...")
# m-einloggen-sso.html triggers immediate server-side redirect to Auth0
# This avoids waiting for JS on m-einloggen.html which may not execute in headless mode
try:
await self.web_open(f"{self.root_url}/m-einloggen-sso.html", timeout = sso_navigation_timeout)
except TimeoutError:
LOG.warning("Timeout navigating to SSO login page after %.1fs", sso_navigation_timeout)
await self._capture_login_detection_diagnostics_if_enabled()
raise
self._login_detection_diagnostics_captured = False
try:
await self.fill_login_data_and_send()
await self.handle_after_login_logic()
except (AssertionError, TimeoutError):
# AssertionError is intentionally part of auth-boundary control flow so
# diagnostics are captured before the original error is re-raised.
await self._capture_login_detection_diagnostics_if_enabled()
raise
await self._dismiss_consent_banner() await self._dismiss_consent_banner()
state = await self.get_login_state() state = await self.get_login_state()
if state == LoginState.LOGGED_IN: if state == LoginState.LOGGED_IN:
LOG.info("Already logged in as [%s]. Skipping login.", self.config.login.username) LOG.info("Login confirmed.")
return return
if state == LoginState.UNKNOWN: current_url = self._current_page_url()
LOG.warning("Login state is UNKNOWN - cannot determine if already logged in. Skipping login attempt.") LOG.warning("Login state after attempt is %s (url=%s)", state.name, current_url)
await self._capture_login_detection_diagnostics_if_enabled()
raise AssertionError(_("Login could not be confirmed after Auth0 flow (state=%s, url=%s)") % (state.name, current_url))
def _current_page_url(self) -> str:
page = getattr(self, "page", None)
if page is None:
return "unknown"
url = getattr(page, "url", None)
if not isinstance(url, str) or not url:
return "unknown"
parsed = urllib_parse.urlparse(url)
host = parsed.hostname or parsed.netloc.split("@")[-1]
netloc = f"{host}:{parsed.port}" if parsed.port is not None and host else host
sanitized = urllib_parse.urlunparse((parsed.scheme, netloc, parsed.path, "", "", ""))
return sanitized or "unknown"
async def _wait_for_auth0_login_context(self) -> None:
redirect_timeout = self._timeout("login_detection")
try:
await self.web_await(
lambda: "login.kleinanzeigen.de" in self._current_page_url() or "/u/login" in self._current_page_url(),
timeout = redirect_timeout,
timeout_error_message = f"Auth0 redirect did not start within {redirect_timeout} seconds",
apply_multiplier = False,
)
except TimeoutError as ex:
current_url = self._current_page_url()
raise AssertionError(_("Auth0 redirect not detected (url=%s)") % current_url) from ex
async def _wait_for_auth0_password_step(self) -> None:
password_step_timeout = self._timeout("login_detection")
try:
await self.web_await(
lambda: "/u/login/password" in self._current_page_url(),
timeout = password_step_timeout,
timeout_error_message = f"Auth0 password page not reached within {password_step_timeout} seconds",
apply_multiplier = False,
)
except TimeoutError as ex:
current_url = self._current_page_url()
raise AssertionError(_("Auth0 password step not reached (url=%s)") % current_url) from ex
async def _wait_for_post_auth0_submit_transition(self) -> None:
post_submit_timeout = self._timeout("login_detection")
quick_dom_timeout = self._timeout("quick_dom")
fallback_max_ms = max(700, int(quick_dom_timeout * 1_000))
fallback_min_ms = max(300, fallback_max_ms // 2)
try:
await self.web_await(
lambda: self._is_valid_post_auth0_destination(self._current_page_url()),
timeout = post_submit_timeout,
timeout_error_message = f"Auth0 post-submit transition did not complete within {post_submit_timeout} seconds",
apply_multiplier = False,
)
return
except TimeoutError:
LOG.debug("Post-submit transition not detected via URL, checking logged-in selectors")
login_confirmed = False
try:
login_confirmed = await asyncio.wait_for(self.is_logged_in(include_probe = False), timeout = post_submit_timeout)
except (TimeoutError, asyncio.TimeoutError):
LOG.debug("Post-submit login verification did not complete within %.1fs", post_submit_timeout)
if login_confirmed:
return return
LOG.info("Opening login page...") LOG.debug("Auth0 post-submit verification remained inconclusive; applying bounded fallback pause")
await self.web_open(f"{self.root_url}/m-einloggen.html?targetUrl=/") await self.web_sleep(min_ms = fallback_min_ms, max_ms = fallback_max_ms)
await self.fill_login_data_and_send() try:
await self.handle_after_login_logic() if await asyncio.wait_for(self.is_logged_in(include_probe = False), timeout = quick_dom_timeout):
# Sometimes a second login is required
state = await self.get_login_state()
if state == LoginState.UNKNOWN:
LOG.warning("Login state is UNKNOWN after first login attempt - cannot determine login status. Aborting login process.")
return return
except (TimeoutError, asyncio.TimeoutError):
LOG.debug("Final post-submit login confirmation did not complete within %.1fs", quick_dom_timeout)
if state == LoginState.LOGGED_OUT: current_url = self._current_page_url()
LOG.debug("First login attempt did not succeed, trying second login attempt") raise TimeoutError(_("Auth0 post-submit verification remained inconclusive (url=%s)") % current_url)
await self.fill_login_data_and_send()
await self.handle_after_login_logic()
state = await self.get_login_state() def _is_valid_post_auth0_destination(self, url:str) -> bool:
if state == LoginState.LOGGED_IN: if not url or url in {"unknown", "about:blank"}:
LOG.debug("Second login attempt succeeded") return False
else:
LOG.warning("Second login attempt also failed - login may not have succeeded") parsed = urllib_parse.urlparse(url)
host = (parsed.hostname or "").lower()
path = parsed.path.lower()
if host != "kleinanzeigen.de" and not host.endswith(".kleinanzeigen.de"):
return False
if host == "login.kleinanzeigen.de":
return False
if path.startswith("/u/login"):
return False
return "error" not in path
async def fill_login_data_and_send(self) -> None: async def fill_login_data_and_send(self) -> None:
LOG.info("Logging in as [%s]...", self.config.login.username) """Auth0 2-step login via m-einloggen-sso.html (server-side redirect, no JS needed).
await self.web_input(By.ID, "login-email", self.config.login.username)
# clearing password input in case browser has stored login data set Step 1: /u/login/identifier - email
await self.web_input(By.ID, "login-password", "") Step 2: /u/login/password - password
await self.web_input(By.ID, "login-password", self.config.login.password) """
LOG.info("Logging in...")
await self._wait_for_auth0_login_context()
# Step 1: email identifier
LOG.debug("Auth0 Step 1: entering email...")
await self.web_input(By.ID, "username", self.config.login.username)
await self.web_click(By.CSS_SELECTOR, "button[type='submit']")
# Step 2: wait for password page then enter password
LOG.debug("Waiting for Auth0 password page...")
await self._wait_for_auth0_password_step()
LOG.debug("Auth0 Step 2: entering password...")
await self.web_input(By.CSS_SELECTOR, "input[type='password']", self.config.login.password)
await self.check_and_wait_for_captcha(is_login_page = True) await self.check_and_wait_for_captcha(is_login_page = True)
await self.web_click(By.CSS_SELECTOR, "button[type='submit']")
await self.web_click(By.CSS_SELECTOR, "form#login-form button[type='submit']") await self._wait_for_post_auth0_submit_transition()
LOG.debug("Auth0 login submitted.")
async def handle_after_login_logic(self) -> None: async def handle_after_login_logic(self) -> None:
try: try:
await self._check_sms_verification()
except TimeoutError:
LOG.debug("No SMS verification prompt detected after login")
try:
await self._check_email_verification()
except TimeoutError:
LOG.debug("No email verification prompt detected after login")
try:
LOG.debug("Handling GDPR disclaimer...")
await self._click_gdpr_banner()
except TimeoutError:
LOG.debug("GDPR banner not found or timed out")
async def _check_sms_verification(self) -> None:
sms_timeout = self._timeout("sms_verification") sms_timeout = self._timeout("sms_verification")
await self.web_find(By.TEXT, "Wir haben dir gerade einen 6-stelligen Code für die Telefonnummer", timeout = sms_timeout) await self.web_find(By.TEXT, "Wir haben dir gerade einen 6-stelligen Code für die Telefonnummer", timeout = sms_timeout)
LOG.warning("############################################") LOG.warning("############################################")
LOG.warning("# Device verification message detected. Please follow the instruction displayed in the Browser.") LOG.warning("# Device verification message detected. Please follow the instruction displayed in the Browser.")
LOG.warning("############################################") LOG.warning("############################################")
await ainput(_("Press ENTER when done...")) await ainput(_("Press ENTER when done..."))
except TimeoutError:
# No SMS verification prompt detected.
pass
try:
email_timeout = self._timeout("email_verification")
await self.web_find(By.TEXT, "Um dein Konto zu schützen haben wir dir eine E-Mail geschickt", timeout = email_timeout)
LOG.warning("############################################")
LOG.warning("# Device verification message detected. Please follow the instruction displayed in the Browser.")
LOG.warning("############################################")
await ainput(_("Press ENTER when done..."))
except TimeoutError:
# No email verification prompt detected.
pass
try:
LOG.info("Handling GDPR disclaimer...")
gdpr_timeout = self._timeout("gdpr_prompt")
await self.web_find(By.ID, "gdpr-banner-accept", timeout = gdpr_timeout)
await self.web_click(By.ID, "gdpr-banner-cmp-button")
await self.web_click(
By.XPATH, "//div[@id='ConsentManagementPage']//*//button//*[contains(., 'Alle ablehnen und fortfahren')]", timeout = gdpr_timeout
)
except TimeoutError:
# GDPR banner not shown within timeout.
pass
async def _dismiss_consent_banner(self) -> None: async def _dismiss_consent_banner(self) -> None:
"""Dismiss the GDPR/TCF consent banner if it is present. """Dismiss the GDPR/TCF consent banner if it is present.
@@ -1100,64 +1211,38 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
LOG.debug("Consent banner detected, clicking 'Alle akzeptieren'...") LOG.debug("Consent banner detected, clicking 'Alle akzeptieren'...")
await self.web_click(By.ID, "gdpr-banner-accept") await self.web_click(By.ID, "gdpr-banner-accept")
except TimeoutError: except TimeoutError:
pass # Banner not present; nothing to dismiss LOG.debug("Consent banner not present; continuing without dismissal")
async def _auth_probe_login_state(self) -> LoginState: async def _check_email_verification(self) -> None:
"""Probe an auth-required endpoint to classify login state. email_timeout = self._timeout("email_verification")
await self.web_find(By.TEXT, "Um dein Konto zu schützen haben wir dir eine E-Mail geschickt", timeout = email_timeout)
LOG.warning("############################################")
LOG.warning("# Device verification message detected. Please follow the instruction displayed in the Browser.")
LOG.warning("############################################")
await ainput(_("Press ENTER when done..."))
The probe is non-mutating (GET request). It is used as a fallback method by async def _click_gdpr_banner(self, *, timeout:float | None = None) -> None:
get_login_state() when DOM-based checks are inconclusive. gdpr_timeout = self._timeout("quick_dom") if timeout is None else timeout
""" await self.web_find(By.ID, "gdpr-banner-accept", timeout = gdpr_timeout)
await self.web_click(By.ID, "gdpr-banner-accept", timeout = gdpr_timeout)
url = f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT" async def get_login_state(self, *, capture_diagnostics:bool = True) -> LoginState:
try: """Determine current login state using DOM - first detection.
response = await self.web_request(url, valid_response_codes = [200, 401, 403])
except (TimeoutError, AssertionError):
# AssertionError can occur when web_request() fails to parse the response (e.g., unexpected content type)
# Treat both timeout and assertion failures as UNKNOWN to avoid false assumptions about login state
return LoginState.UNKNOWN
status_code = response.get("statusCode")
if status_code in {401, 403}:
return LoginState.LOGGED_OUT
content = response.get("content", "")
if not isinstance(content, str):
return LoginState.UNKNOWN
try:
payload = json.loads(content)
except json.JSONDecodeError:
lowered = content.lower()
if "m-einloggen" in lowered or "login-email" in lowered or "login-password" in lowered or "login-form" in lowered:
return LoginState.LOGGED_OUT
return LoginState.UNKNOWN
if isinstance(payload, dict) and "ads" in payload:
return LoginState.LOGGED_IN
return LoginState.UNKNOWN
async def get_login_state(self) -> LoginState:
"""Determine current login state using layered detection.
Order: Order:
1) DOM-based check via `is_logged_in(include_probe=False)` (preferred - stealthy) 1) DOM - based logged - in check via `is_logged_in(include_probe=False)`
2) Server-side auth probe via `_auth_probe_login_state` (fallback - more reliable) 2) Logged - out CTA check
3) If still inconclusive, capture diagnostics via 3) If inconclusive, optionally capture diagnostics and return `UNKNOWN`
`_capture_login_detection_diagnostics_if_enabled` and return `UNKNOWN`
""" """
# Prefer DOM-based checks first to minimize bot-like behavior. # Prefer DOM-based checks first to minimize bot-like behavior and avoid
# The auth probe makes a JSON API request that normal users wouldn't trigger. # fragile API probing side effects. Server-side auth probing was removed.
if await self.is_logged_in(include_probe = False): if await self.is_logged_in(include_probe = False):
return LoginState.LOGGED_IN return LoginState.LOGGED_IN
# Fall back to the more reliable server-side auth probe. if await self._has_logged_out_cta(log_timeout = False):
# SPA/hydration delays can cause DOM-based checks to temporarily miss login indicators. return LoginState.LOGGED_OUT
state = await self._auth_probe_login_state()
if state != LoginState.UNKNOWN:
return state
if capture_diagnostics:
await self._capture_login_detection_diagnostics_if_enabled() await self._capture_login_detection_diagnostics_if_enabled()
return LoginState.UNKNOWN return LoginState.UNKNOWN
@@ -1271,8 +1356,27 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
login_check_timeout, login_check_timeout,
effective_timeout, effective_timeout,
) )
quick_dom_timeout = self._timeout("quick_dom")
tried_login_selectors = _format_login_detection_selectors(_LOGIN_DETECTION_SELECTORS) tried_login_selectors = _format_login_detection_selectors(_LOGIN_DETECTION_SELECTORS)
try:
user_info, matched_selector = await self.web_text_first_available(
_LOGIN_DETECTION_SELECTORS,
timeout = quick_dom_timeout,
key = "quick_dom",
description = "login_detection(quick_logged_in)",
)
if username in user_info.lower():
matched_selector_display = (
f"{_LOGIN_DETECTION_SELECTORS[matched_selector][0].name}={_LOGIN_DETECTION_SELECTORS[matched_selector][1]}"
if 0 <= matched_selector < len(_LOGIN_DETECTION_SELECTORS)
else f"selector_index_{matched_selector}"
)
LOG.debug("Login detected via login detection selector '%s'", matched_selector_display)
return True
except TimeoutError:
LOG.debug("No login detected via configured login detection selectors (%s)", tried_login_selectors)
try: try:
user_info, matched_selector = await self.web_text_first_available( user_info, matched_selector = await self.web_text_first_available(
_LOGIN_DETECTION_SELECTORS, _LOGIN_DETECTION_SELECTORS,
@@ -1281,32 +1385,60 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
description = "login_detection(selector_group)", description = "login_detection(selector_group)",
) )
if username in user_info.lower(): if username in user_info.lower():
matched_selector_label = ( matched_selector_display = (
_LOGIN_DETECTION_SELECTOR_LABELS[matched_selector] f"{_LOGIN_DETECTION_SELECTORS[matched_selector][0].name}={_LOGIN_DETECTION_SELECTORS[matched_selector][1]}"
if 0 <= matched_selector < len(_LOGIN_DETECTION_SELECTOR_LABELS) if 0 <= matched_selector < len(_LOGIN_DETECTION_SELECTORS)
else f"selector_index_{matched_selector}" else f"selector_index_{matched_selector}"
) )
LOG.debug("Login detected via login detection selector '%s'", matched_selector_label) LOG.debug("Login detected via login detection selector '%s'", matched_selector_display)
return True return True
except TimeoutError: except TimeoutError:
LOG.debug("Timeout waiting for login detection selector group after %.1fs", effective_timeout) LOG.debug("Timeout waiting for login detection selector group after %.1fs", effective_timeout)
if not include_probe: if await self._has_logged_out_cta():
return False
if include_probe:
LOG.debug("No login detected via configured login detection selectors (%s); auth probe is disabled", tried_login_selectors)
return False
LOG.debug("No login detected via configured login detection selectors (%s)", tried_login_selectors) LOG.debug("No login detected via configured login detection selectors (%s)", tried_login_selectors)
return False return False
state = await self._auth_probe_login_state() async def _has_logged_out_cta(self, *, log_timeout:bool = True) -> bool:
if state == LoginState.LOGGED_IN: quick_dom_timeout = self._timeout("quick_dom")
return True tried_logged_out_selectors = _format_login_detection_selectors(_LOGGED_OUT_CTA_SELECTORS)
LOG.debug( try:
"No login detected - DOM login detection selectors (%s) did not confirm login and server probe returned %s", cta_element, cta_index = await self.web_find_first_available(
tried_login_selectors, _LOGGED_OUT_CTA_SELECTORS,
state.name, timeout = quick_dom_timeout,
key = "quick_dom",
description = "login_detection(logged_out_cta)",
) )
cta_text = await self._extract_visible_text(cta_element)
if cta_text.strip():
matched_selector_display = (
f"{_LOGGED_OUT_CTA_SELECTORS[cta_index][0].name}={_LOGGED_OUT_CTA_SELECTORS[cta_index][1]}"
if 0 <= cta_index < len(_LOGGED_OUT_CTA_SELECTORS)
else f"selector_index_{cta_index}"
)
if 0 <= cta_index < len(_LOGGED_OUT_CTA_SELECTORS):
LOG.debug("Fast logged-out pre-check matched selector '%s'", matched_selector_display)
return True
LOG.debug("Fast logged-out pre-check got unexpected selector index '%s'; failing closed", cta_index)
return False
except TimeoutError:
if log_timeout:
LOG.debug(
"Fast logged-out pre-check found no login CTA (%s) within %.1fs",
tried_logged_out_selectors,
quick_dom_timeout,
)
return False return False
async def _fetch_published_ads(self) -> list[dict[str, Any]]: async def _fetch_published_ads(self, *, strict:bool = False) -> list[dict[str, Any]]:
"""Fetch all published ads, handling API pagination. """Fetch all published ads, handling API pagination.
Returns: Returns:
@@ -1326,37 +1458,84 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
try: try:
response = await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum={page}") response = await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum={page}")
except TimeoutError as ex: except TimeoutError as ex:
LOG.warning("Pagination request timed out on page %s: %s", page, ex) if strict:
raise
LOG.warning("Pagination request failed on page %s: %s", page, ex)
break
if not isinstance(response, dict):
if strict:
raise TypeError(f"Unexpected pagination response type on page {page}: {type(response).__name__}")
LOG.warning("Unexpected pagination response type on page %s: %s", page, type(response).__name__)
break break
content = response.get("content", "") content = response.get("content", "")
if isinstance(content, bytearray):
content = bytes(content)
if isinstance(content, bytes):
content = content.decode("utf-8", errors = "replace")
if not isinstance(content, str):
if strict:
raise TypeError(f"Unexpected response content type on page {page}: {type(content).__name__}")
LOG.warning("Unexpected response content type on page %s: %s", page, type(content).__name__)
break
try: try:
json_data = json.loads(content) json_data = json.loads(content)
except json.JSONDecodeError as ex: except (json.JSONDecodeError, TypeError) as ex:
if not content: if not content:
if strict:
raise ValueError(f"Empty JSON response content on page {page}") from ex
LOG.warning("Empty JSON response content on page %s", page) LOG.warning("Empty JSON response content on page %s", page)
break break
if strict:
raise ValueError(f"Failed to parse JSON response on page {page}: {ex}") from ex
snippet = content[:SNIPPET_LIMIT] + ("..." if len(content) > SNIPPET_LIMIT else "") snippet = content[:SNIPPET_LIMIT] + ("..." if len(content) > SNIPPET_LIMIT else "")
LOG.warning("Failed to parse JSON response on page %s: %s (content: %s)", page, ex, snippet) LOG.warning("Failed to parse JSON response on page %s: %s (content: %s)", page, ex, snippet)
break break
if not isinstance(json_data, dict): if not isinstance(json_data, dict):
if strict:
raise TypeError(f"Unexpected JSON payload type on page {page}: {type(json_data).__name__}")
snippet = content[:SNIPPET_LIMIT] + ("..." if len(content) > SNIPPET_LIMIT else "") snippet = content[:SNIPPET_LIMIT] + ("..." if len(content) > SNIPPET_LIMIT else "")
LOG.warning("Unexpected JSON payload on page %s (content: %s)", page, snippet) LOG.warning("Unexpected JSON payload on page %s (content: %s)", page, snippet)
break break
page_ads = json_data.get("ads", []) page_ads = json_data.get("ads", [])
if not isinstance(page_ads, list): if not isinstance(page_ads, list):
if strict:
raise TypeError(f"Unexpected 'ads' type on page {page}: {type(page_ads).__name__}")
preview = str(page_ads) preview = str(page_ads)
if len(preview) > SNIPPET_LIMIT: if len(preview) > SNIPPET_LIMIT:
preview = preview[:SNIPPET_LIMIT] + "..." preview = preview[:SNIPPET_LIMIT] + "..."
LOG.warning("Unexpected 'ads' type on page %s: %s value: %s", page, type(page_ads).__name__, preview) LOG.warning("Unexpected 'ads' type on page %s: %s value: %s", page, type(page_ads).__name__, preview)
break break
ads.extend(page_ads) filtered_page_ads:list[dict[str, Any]] = []
rejected_count = 0
rejected_preview:str | None = None
for entry in page_ads:
if isinstance(entry, dict):
filtered_page_ads.append(entry)
continue
rejected_count += 1
if strict:
raise TypeError(f"Unexpected ad entry type on page {page}: {type(entry).__name__}")
if rejected_preview is None:
rejected_preview = repr(entry)
if rejected_count > 0:
preview = rejected_preview or "<none>"
if len(preview) > SNIPPET_LIMIT:
preview = preview[:SNIPPET_LIMIT] + "..."
LOG.warning("Filtered %s malformed ad entries on page %s (sample: %s)", rejected_count, page, preview)
ads.extend(filtered_page_ads)
paging = json_data.get("paging") paging = json_data.get("paging")
if not isinstance(paging, dict): if not isinstance(paging, dict):
if strict:
raise ValueError(f"Missing or invalid paging info on page {page}: {type(paging).__name__}")
LOG.debug("No paging dict found on page %s, assuming single page", page) LOG.debug("No paging dict found on page %s, assuming single page", page)
break break
@@ -1365,10 +1544,14 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
total_pages = misc.coerce_page_number(paging.get("last")) total_pages = misc.coerce_page_number(paging.get("last"))
if current_page_num is None: if current_page_num is None:
if strict:
raise ValueError(f"Invalid 'pageNum' in paging info: {paging.get('pageNum')}")
LOG.warning("Invalid 'pageNum' in paging info: %s, stopping pagination", paging.get("pageNum")) LOG.warning("Invalid 'pageNum' in paging info: %s, stopping pagination", paging.get("pageNum"))
break break
if total_pages is None: if total_pages is None:
if strict:
raise ValueError("No pagination info found")
LOG.debug("No pagination info found, assuming single page") LOG.debug("No pagination info found, assuming single page")
break break
@@ -1387,6 +1570,8 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
# Use API's next field for navigation (more robust than our counter) # Use API's next field for navigation (more robust than our counter)
next_page = misc.coerce_page_number(paging.get("next")) next_page = misc.coerce_page_number(paging.get("next"))
if next_page is None: if next_page is None:
if strict:
raise ValueError(f"Invalid 'next' page value in paging info: {paging.get('next')}")
LOG.warning("Invalid 'next' page value in paging info: %s, stopping pagination", paging.get("next")) LOG.warning("Invalid 'next' page value in paging info: %s, stopping pagination", paging.get("next"))
break break
page = next_page page = next_page
@@ -1554,6 +1739,28 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
# Check for success messages # Check for success messages
return await self.web_check(By.ID, "checking-done", Is.DISPLAYED) or await self.web_check(By.ID, "not-completed", Is.DISPLAYED) return await self.web_check(By.ID, "checking-done", Is.DISPLAYED) or await self.web_check(By.ID, "not-completed", Is.DISPLAYED)
async def _detect_new_published_ad_ids(self, ads_before_publish:set[str], ad_title:str) -> set[str] | None:
try:
current_ads = await self._fetch_published_ads(strict = True)
current_ad_ids:set[str] = set()
for current_ad in current_ads:
if not isinstance(current_ad, dict):
# Keep duplicate-prevention verification fail-closed: malformed entries
# must abort retries rather than risk creating duplicate listings.
entry_length = len(current_ad) if hasattr(current_ad, "__len__") else None
LOG.debug("Malformed ad entry in strict duplicate verification: type=%s length=%s", type(current_ad).__name__, entry_length)
raise TypeError(f"Unexpected ad entry type: {type(current_ad).__name__}")
if current_ad.get("id"):
current_ad_ids.add(str(current_ad["id"]))
except Exception as ex: # noqa: BLE001
LOG.warning(
"Could not verify published ads after failed attempt for '%s': %s -- aborting retries to prevent duplicates.",
ad_title,
ex,
)
return None
return current_ad_ids - ads_before_publish
async def publish_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None: async def publish_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None:
count = 0 count = 0
failed_count = 0 failed_count = 0
@@ -1589,34 +1796,33 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
raise # Respect task cancellation raise # Respect task cancellation
except (TimeoutError, ProtocolException) as ex: except (TimeoutError, ProtocolException) as ex:
await self._capture_publish_error_diagnostics_if_enabled(ad_cfg, ad_cfg_orig, ad_file, attempt, ex) await self._capture_publish_error_diagnostics_if_enabled(ad_cfg, ad_cfg_orig, ad_file, attempt, ex)
if attempt < max_retries: if attempt >= max_retries:
LOG.error("All %s attempts failed for '%s': %s. Skipping ad.", max_retries, ad_cfg.title, ex)
failed_count += 1
continue
# Before retrying, check if the ad was already created despite the error. # Before retrying, check if the ad was already created despite the error.
# A partially successful submission followed by a retry would create a duplicate listing, # A partially successful submission followed by a retry would create a duplicate listing,
# which violates kleinanzeigen.de terms of service and can lead to account suspension. # which violates kleinanzeigen.de terms of service and can lead to account suspension.
try: new_ad_ids = await self._detect_new_published_ad_ids(ads_before_publish, ad_cfg.title)
current_ads = await self._fetch_published_ads() if new_ad_ids is None:
current_ad_ids = {str(x["id"]) for x in current_ads if x.get("id")} failed_count += 1
new_ad_ids = current_ad_ids - ads_before_publish break
if new_ad_ids: if new_ad_ids:
LOG.warning( LOG.warning(
"Attempt %s/%s failed for '%s': %s. " "Attempt %s/%s failed for '%s': %s. "
"However, a new ad was detected (id: %s) -- aborting retries to prevent duplicates.", "However, a new ad was detected (id: %s) -- aborting retries to prevent duplicates.",
attempt, max_retries, ad_cfg.title, ex, ", ".join(new_ad_ids) attempt,
) max_retries,
failed_count += 1 ad_cfg.title,
break ex,
except Exception as verify_ex: # noqa: BLE001 ", ".join(new_ad_ids),
LOG.warning(
"Could not verify published ads after failed attempt for '%s': %s -- aborting retries to prevent duplicates.",
ad_cfg.title, verify_ex,
) )
failed_count += 1 failed_count += 1
break break
LOG.warning("Attempt %s/%s failed for '%s': %s. Retrying...", attempt, max_retries, ad_cfg.title, ex) LOG.warning("Attempt %s/%s failed for '%s': %s. Retrying...", attempt, max_retries, ad_cfg.title, ex)
await self.web_sleep(2) # Wait before retry await self.web_sleep(2_000) # Wait before retry
else:
LOG.error("All %s attempts failed for '%s': %s. Skipping ad.", max_retries, ad_cfg.title, ex)
failed_count += 1
# Check publishing result separately (no retry - ad is already submitted) # Check publishing result separately (no retry - ad is already submitted)
if success: if success:

View File

@@ -37,9 +37,12 @@ kleinanzeigen_bot/__init__.py:
"Empty JSON response content on page %s": "Leerer JSON-Antwortinhalt auf Seite %s" "Empty JSON response content on page %s": "Leerer JSON-Antwortinhalt auf Seite %s"
"Failed to parse JSON response on page %s: %s (content: %s)": "Fehler beim Parsen der JSON-Antwort auf Seite %s: %s (Inhalt: %s)" "Failed to parse JSON response on page %s: %s (content: %s)": "Fehler beim Parsen der JSON-Antwort auf Seite %s: %s (Inhalt: %s)"
"Stopping pagination after %s pages to avoid infinite loop": "Stoppe die Seitenaufschaltung nach %s Seiten, um eine Endlosschleife zu vermeiden" "Stopping pagination after %s pages to avoid infinite loop": "Stoppe die Seitenaufschaltung nach %s Seiten, um eine Endlosschleife zu vermeiden"
"Pagination request timed out on page %s: %s": "Zeitueberschreitung bei der Seitenabfrage auf Seite %s: %s" "Pagination request failed on page %s: %s": "Seitenabfrage auf Seite %s fehlgeschlagen: %s"
"Unexpected pagination response type on page %s: %s": "Unerwarteter Typ der Paginierungsantwort auf Seite %s: %s"
"Unexpected response content type on page %s: %s": "Unerwarteter Antwortinhalt-Typ auf Seite %s: %s"
"Unexpected JSON payload on page %s (content: %s)": "Unerwartete JSON-Antwort auf Seite %s (Inhalt: %s)" "Unexpected JSON payload on page %s (content: %s)": "Unerwartete JSON-Antwort auf Seite %s (Inhalt: %s)"
"Unexpected 'ads' type on page %s: %s value: %s": "Unerwarteter 'ads'-Typ auf Seite %s: %s Wert: %s" "Unexpected 'ads' type on page %s: %s value: %s": "Unerwarteter 'ads'-Typ auf Seite %s: %s Wert: %s"
"Filtered %s malformed ad entries on page %s (sample: %s)": "%s fehlerhafte Anzeigen-Einträge auf Seite %s gefiltert (Beispiel: %s)"
"Reached last page %s of %s, stopping pagination": "Letzte Seite %s von %s erreicht, beende Paginierung" "Reached last page %s of %s, stopping pagination": "Letzte Seite %s von %s erreicht, beende Paginierung"
"No ads found on page %s, stopping pagination": "Keine Anzeigen auf Seite %s gefunden, beende Paginierung" "No ads found on page %s, stopping pagination": "Keine Anzeigen auf Seite %s gefunden, beende Paginierung"
"Invalid 'next' page value in paging info: %s, stopping pagination": "Ungültiger 'next'-Seitenwert in Paginierungsinfo: %s, beende Paginierung" "Invalid 'next' page value in paging info: %s, stopping pagination": "Ungültiger 'next'-Seitenwert in Paginierungsinfo: %s, beende Paginierung"
@@ -86,14 +89,36 @@ kleinanzeigen_bot/__init__.py:
login: login:
"Checking if already logged in...": "Überprüfe, ob bereits eingeloggt..." "Checking if already logged in...": "Überprüfe, ob bereits eingeloggt..."
"Current page URL after opening homepage: %s": "Aktuelle Seiten-URL nach dem Öffnen der Startseite: %s" "Already logged in. Skipping login.": "Bereits eingeloggt. Überspringe Anmeldung."
"Already logged in as [%s]. Skipping login.": "Bereits eingeloggt als [%s]. Überspringe Anmeldung." "Navigating to SSO login page (Auth0)...": "Navigiere zur SSO-Anmeldeseite (Auth0)..."
"Opening login page...": "Öffne Anmeldeseite..." "Timeout navigating to SSO login page after %.1fs": "Zeitüberschreitung beim Navigieren zur SSO-Anmeldeseite nach %.1fs"
"Login state is UNKNOWN - cannot determine if already logged in. Skipping login attempt.": "Login-Status ist UNKNOWN - kann nicht bestimmt werden, ob bereits eingeloggt ist. Überspringe Anmeldeversuch." "Login confirmed.": "Anmeldung bestätigt."
"Login state is UNKNOWN after first login attempt - cannot determine login status. Aborting login process.": "Login-Status ist UNKNOWN nach dem ersten Anmeldeversuch - kann Login-Status nicht bestimmen. Breche Anmeldeprozess ab." "Login state after attempt is %s (url=%s)": "Login-Status nach dem Versuch ist %s (URL=%s)"
"First login attempt did not succeed, trying second login attempt": "Erster Anmeldeversuch war nicht erfolgreich, versuche zweiten Anmeldeversuch" "Login could not be confirmed after Auth0 flow (state=%s, url=%s)": "Anmeldung nach Auth0-Flow konnte nicht bestätigt werden (Status=%s, URL=%s)"
"Second login attempt succeeded": "Zweiter Anmeldeversuch erfolgreich"
"Second login attempt also failed - login may not have succeeded": "Zweiter Anmeldeversuch ebenfalls fehlgeschlagen - Anmeldung möglicherweise nicht erfolgreich" _wait_for_auth0_login_context:
"Auth0 redirect not detected (url=%s)": "Auth0-Weiterleitung nicht erkannt (URL=%s)"
_wait_for_auth0_password_step:
"Auth0 password step not reached (url=%s)": "Auth0-Passwortschritt nicht erreicht (URL=%s)"
_wait_for_post_auth0_submit_transition:
"Auth0 post-submit verification remained inconclusive (url=%s)": "Auth0-Verifikation nach Absenden blieb unklar (URL=%s)"
fill_login_data_and_send:
"Logging in...": "Anmeldung..."
"Auth0 Step 1: entering email...": "Auth0 Schritt 1: E-Mail wird eingegeben..."
"Waiting for Auth0 password page...": "Warte auf Auth0-Passwortseite..."
"Auth0 Step 2: entering password...": "Auth0 Schritt 2: Passwort wird eingegeben..."
"Auth0 login submitted.": "Auth0-Anmeldung abgesendet."
_check_sms_verification:
"# Device verification message detected. Please follow the instruction displayed in the Browser.": "# Nachricht zur Geräteverifizierung erkannt. Bitte den Anweisungen im Browser folgen."
"Press ENTER when done...": "EINGABETASTE drücken, wenn erledigt..."
_check_email_verification:
"# Device verification message detected. Please follow the instruction displayed in the Browser.": "# Nachricht zur Geräteverifizierung erkannt. Bitte den Anweisungen im Browser folgen."
"Press ENTER when done...": "EINGABETASTE drücken, wenn erledigt..."
is_logged_in: is_logged_in:
"Starting login detection (timeout: %.1fs base, %.1fs effective with multiplier/backoff)": "Starte Login-Erkennung (Timeout: %.1fs Basis, %.1fs effektiv mit Multiplikator/Backoff)" "Starting login detection (timeout: %.1fs base, %.1fs effective with multiplier/backoff)": "Starte Login-Erkennung (Timeout: %.1fs Basis, %.1fs effektiv mit Multiplikator/Backoff)"
@@ -101,8 +126,6 @@ kleinanzeigen_bot/__init__.py:
"Timeout waiting for login detection selector group after %.1fs": "Timeout beim Warten auf die Login-Erkennungs-Selektorgruppe nach %.1fs" "Timeout waiting for login detection selector group after %.1fs": "Timeout beim Warten auf die Login-Erkennungs-Selektorgruppe nach %.1fs"
handle_after_login_logic: handle_after_login_logic:
"# Device verification message detected. Please follow the instruction displayed in the Browser.": "# Nachricht zur Geräteverifizierung erkannt. Bitte den Anweisungen im Browser folgen."
"Press ENTER when done...": "EINGABETASTE drücken, wenn erledigt..."
"Handling GDPR disclaimer...": "Verarbeite DSGVO-Hinweis..." "Handling GDPR disclaimer...": "Verarbeite DSGVO-Hinweis..."
delete_ads: delete_ads:
@@ -156,11 +179,14 @@ kleinanzeigen_bot/__init__.py:
"Attempt %s/%s failed for '%s': %s. Retrying...": "Versuch %s/%s fehlgeschlagen für '%s': %s. Erneuter Versuch..." "Attempt %s/%s failed for '%s': %s. Retrying...": "Versuch %s/%s fehlgeschlagen für '%s': %s. Erneuter Versuch..."
"Attempt %s/%s failed for '%s': %s. However, a new ad was detected (id: %s) -- aborting retries to prevent duplicates.": "Versuch %s/%s fehlgeschlagen für '%s': %s. Jedoch wurde eine neue Anzeige erkannt (ID: %s) -- Wiederholungen werden abgebrochen, um Duplikate zu vermeiden." "Attempt %s/%s failed for '%s': %s. However, a new ad was detected (id: %s) -- aborting retries to prevent duplicates.": "Versuch %s/%s fehlgeschlagen für '%s': %s. Jedoch wurde eine neue Anzeige erkannt (ID: %s) -- Wiederholungen werden abgebrochen, um Duplikate zu vermeiden."
"Could not fetch fresh published-ads baseline for '%s': %s. Falling back to initial snapshot.": "Konnte keine aktuelle Anzeigen-Baseline für '%s' abrufen: %s. Verwende initialen Snapshot." "Could not fetch fresh published-ads baseline for '%s': %s. Falling back to initial snapshot.": "Konnte keine aktuelle Anzeigen-Baseline für '%s' abrufen: %s. Verwende initialen Snapshot."
"Could not verify published ads after failed attempt for '%s': %s -- aborting retries to prevent duplicates.": "Veröffentlichte Anzeigen konnten nach fehlgeschlagenem Versuch für '%s' nicht geprüft werden: %s -- Wiederholungen werden abgebrochen, um Duplikate zu vermeiden."
"All %s attempts failed for '%s': %s. Skipping ad.": "Alle %s Versuche fehlgeschlagen für '%s': %s. Überspringe Anzeige." "All %s attempts failed for '%s': %s. Skipping ad.": "Alle %s Versuche fehlgeschlagen für '%s': %s. Überspringe Anzeige."
"DONE: (Re-)published %s (%s failed after retries)": "FERTIG: %s (erneut) veröffentlicht (%s fehlgeschlagen nach Wiederholungen)" "DONE: (Re-)published %s (%s failed after retries)": "FERTIG: %s (erneut) veröffentlicht (%s fehlgeschlagen nach Wiederholungen)"
"DONE: (Re-)published %s": "FERTIG: %s (erneut) veröffentlicht" "DONE: (Re-)published %s": "FERTIG: %s (erneut) veröffentlicht"
"ad": "Anzeige" "ad": "Anzeige"
_detect_new_published_ad_ids:
"Could not verify published ads after failed attempt for '%s': %s -- aborting retries to prevent duplicates.": "Veröffentlichte Anzeigen konnten nach fehlgeschlagenem Versuch für '%s' nicht geprüft werden: %s -- Wiederholungen werden abgebrochen, um Duplikate zu vermeiden."
apply_auto_price_reduction: apply_auto_price_reduction:
"Auto price reduction is enabled for [%s] but no price is configured.": "Automatische Preisreduzierung ist für [%s] aktiviert, aber es wurde kein Preis konfiguriert." "Auto price reduction is enabled for [%s] but no price is configured.": "Automatische Preisreduzierung ist für [%s] aktiviert, aber es wurde kein Preis konfiguriert."
"Auto price reduction is enabled for [%s] but min_price equals price (%s) - no reductions will occur.": "Automatische Preisreduzierung ist für [%s] aktiviert, aber min_price entspricht dem Preis (%s) - es werden keine Reduktionen auftreten." "Auto price reduction is enabled for [%s] but min_price equals price (%s) - no reductions will occur.": "Automatische Preisreduzierung ist für [%s] aktiviert, aber min_price entspricht dem Preis (%s) - es werden keine Reduktionen auftreten."
@@ -264,9 +290,6 @@ kleinanzeigen_bot/__init__.py:
"Unknown command: %s": "Unbekannter Befehl: %s" "Unknown command: %s": "Unbekannter Befehl: %s"
"Timing collector flush failed: %s": "Zeitmessdaten konnten nicht gespeichert werden: %s" "Timing collector flush failed: %s": "Zeitmessdaten konnten nicht gespeichert werden: %s"
fill_login_data_and_send:
"Logging in as [%s]...": "Anmeldung als [%s]..."
__set_shipping: __set_shipping:
"Unable to close shipping dialog!": "Versanddialog konnte nicht geschlossen werden!" "Unable to close shipping dialog!": "Versanddialog konnte nicht geschlossen werden!"

View File

@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: © Jens Bergmann and contributors # SPDX-FileCopyrightText: © Jens Bergmann and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import copy, fnmatch, io, json, logging, os, tempfile # isort: skip import asyncio, copy, fnmatch, io, json, logging, os, tempfile # isort: skip
from collections.abc import Callable, Generator from collections.abc import Callable, Generator
from contextlib import redirect_stdout from contextlib import redirect_stdout
from datetime import timedelta from datetime import timedelta
@@ -442,7 +442,12 @@ class TestKleinanzeigenBotAuthentication:
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_is_logged_in_returns_true_when_logged_in(self, test_bot:KleinanzeigenBot) -> None: async def test_is_logged_in_returns_true_when_logged_in(self, test_bot:KleinanzeigenBot) -> None:
"""Verify that login check returns true when logged in.""" """Verify that login check returns true when logged in."""
with patch.object(test_bot, "web_text_first_available", new_callable = AsyncMock, return_value = ("Welcome dummy_user", 0)): with patch.object(
test_bot,
"web_text_first_available",
new_callable = AsyncMock,
return_value = ("Welcome dummy_user", 0),
):
assert await test_bot.is_logged_in() is True assert await test_bot.is_logged_in() is True
@pytest.mark.asyncio @pytest.mark.asyncio
@@ -460,45 +465,96 @@ class TestKleinanzeigenBotAuthentication:
async def test_is_logged_in_returns_false_when_not_logged_in(self, test_bot:KleinanzeigenBot) -> None: async def test_is_logged_in_returns_false_when_not_logged_in(self, test_bot:KleinanzeigenBot) -> None:
"""Verify that login check returns false when not logged in.""" """Verify that login check returns false when not logged in."""
with ( with (
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError),
patch.object( patch.object(
test_bot, test_bot,
"web_request", "web_text_first_available",
new_callable = AsyncMock, new_callable = AsyncMock,
return_value = {"statusCode": 200, "content": "<html><a href='/m-einloggen.html'>login</a></html>"}, side_effect = [("nicht-eingeloggt", 0), ("kein user signal", 0)],
), ),
patch.object(test_bot, "_has_logged_out_cta", new_callable = AsyncMock, return_value = False),
): ):
assert await test_bot.is_logged_in() is False assert await test_bot.is_logged_in() is False
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_is_logged_in_uses_selector_group_timeout_key(self, test_bot:KleinanzeigenBot) -> None: async def test_has_logged_out_cta_requires_visible_candidate(self, test_bot:KleinanzeigenBot) -> None:
"""Verify login detection uses selector-group lookup with login_detection timeout key.""" matched_element = MagicMock(spec = Element)
with patch.object(test_bot, "web_text_first_available", new_callable = AsyncMock, return_value = ("Welcome dummy_user", 0)) as group_text: with (
assert await test_bot.is_logged_in(include_probe = False) is True patch.object(test_bot, "web_find_first_available", new_callable = AsyncMock, return_value = (matched_element, 0)),
patch.object(test_bot, "_extract_visible_text", new_callable = AsyncMock, return_value = ""),
group_text.assert_awaited_once() ):
call_args = group_text.await_args assert await test_bot._has_logged_out_cta() is False
assert call_args is not None
assert call_args.args[0] == [(By.CLASS_NAME, "mr-medium"), (By.ID, "user-email")]
assert call_args.kwargs["key"] == "login_detection"
assert call_args.kwargs["timeout"] == test_bot._timeout("login_detection")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_is_logged_in_logs_selector_label_without_raw_selector_literals( async def test_has_logged_out_cta_accepts_visible_candidate(self, test_bot:KleinanzeigenBot) -> None:
matched_element = MagicMock(spec = Element)
with (
patch.object(test_bot, "web_find_first_available", new_callable = AsyncMock, return_value = (matched_element, 0)),
patch.object(test_bot, "_extract_visible_text", new_callable = AsyncMock, return_value = "Einloggen"),
):
assert await test_bot._has_logged_out_cta() is True
@pytest.mark.asyncio
async def test_is_logged_in_uses_selector_group_timeout_key(self, test_bot:KleinanzeigenBot) -> None:
"""Verify login detection uses selector-group lookup with login_detection timeout key."""
with patch.object(
test_bot,
"web_text_first_available",
new_callable = AsyncMock,
side_effect = [TimeoutError(), ("Welcome dummy_user", 0)],
) as group_text:
assert await test_bot.is_logged_in(include_probe = False) is True
group_text.assert_awaited()
assert any(call.kwargs.get("timeout") == test_bot._timeout("login_detection") for call in group_text.await_args_list)
@pytest.mark.asyncio
async def test_is_logged_in_runs_full_selector_group_before_cta_precheck(self, test_bot:KleinanzeigenBot) -> None:
"""Quick CTA checks must not short-circuit before full logged-in selector checks."""
with patch.object(
test_bot,
"web_text_first_available",
new_callable = AsyncMock,
side_effect = [TimeoutError(), ("Welcome dummy_user", 0)],
) as group_text:
assert await test_bot.is_logged_in(include_probe = False) is True
group_text.assert_awaited()
assert group_text.await_count >= 1
@pytest.mark.asyncio
async def test_is_logged_in_short_circuits_before_cta_check_when_quick_user_signal_matches(self, test_bot:KleinanzeigenBot) -> None:
"""Logged-in quick pre-check should win even if incidental login links exist elsewhere."""
with patch.object(
test_bot,
"web_text_first_available",
new_callable = AsyncMock,
return_value = ("angemeldet als: dummy_user", 0),
) as group_text:
assert await test_bot.is_logged_in(include_probe = False) is True
group_text.assert_awaited()
assert group_text.await_count >= 1
@pytest.mark.asyncio
async def test_is_logged_in_logs_matched_raw_selector(
self, test_bot:KleinanzeigenBot, caplog:pytest.LogCaptureFixture self, test_bot:KleinanzeigenBot, caplog:pytest.LogCaptureFixture
) -> None: ) -> None:
"""Login detection logs should reference stable labels, not raw selector values.""" """Login detection logs should show the matched raw selector."""
caplog.set_level("DEBUG") caplog.set_level("DEBUG")
with ( with (
caplog.at_level("DEBUG"), caplog.at_level("DEBUG"),
patch.object(test_bot, "web_text_first_available", new_callable = AsyncMock, return_value = ("angemeldet als: dummy_user", 1)), patch.object(
test_bot,
"web_text_first_available",
new_callable = AsyncMock,
return_value = ("angemeldet als: dummy_user", 0),
),
): ):
assert await test_bot.is_logged_in(include_probe = False) is True assert await test_bot.is_logged_in(include_probe = False) is True
assert "Login detected via login detection selector 'user_info_secondary'" in caplog.text assert "Login detected via login detection selector" in caplog.text
for forbidden in (".mr-medium", "#user-email", "mr-medium", "user-email"): assert "CLASS_NAME=mr-medium" in caplog.text
assert forbidden not in caplog.text
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_is_logged_in_logs_generic_message_when_selector_group_does_not_match( async def test_is_logged_in_logs_generic_message_when_selector_group_does_not_match(
@@ -509,78 +565,87 @@ class TestKleinanzeigenBotAuthentication:
with ( with (
caplog.at_level("DEBUG"), caplog.at_level("DEBUG"),
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError), patch.object(test_bot, "web_text_first_available", side_effect = [TimeoutError(), TimeoutError()]),
patch.object(test_bot, "_has_logged_out_cta", new_callable = AsyncMock, return_value = False),
): ):
assert await test_bot.is_logged_in(include_probe = False) is False assert await test_bot.is_logged_in(include_probe = False) is False
assert any( assert "No login detected via configured login detection selectors" in caplog.text
record.message == "No login detected via configured login detection selectors (CLASS_NAME=mr-medium, ID=user-email)" assert "CLASS_NAME=mr-medium" in caplog.text
for record in caplog.records assert "ID=user-email" in caplog.text
)
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_is_logged_in_logs_raw_selectors_when_probe_reports_logged_out( async def test_is_logged_in_logs_raw_selectors_when_dom_checks_fail_and_probe_disabled(
self, test_bot:KleinanzeigenBot, caplog:pytest.LogCaptureFixture self, test_bot:KleinanzeigenBot, caplog:pytest.LogCaptureFixture
) -> None: ) -> None:
"""Probe-based final failure should include the tried raw selectors for debugging.""" """Final failure should report selectors and disabled-probe state."""
caplog.set_level("DEBUG") caplog.set_level("DEBUG")
with ( with (
caplog.at_level("DEBUG"), caplog.at_level("DEBUG"),
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError), patch.object(test_bot, "web_text_first_available", side_effect = [TimeoutError(), TimeoutError()]),
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.LOGGED_OUT), patch.object(test_bot, "_has_logged_out_cta", new_callable = AsyncMock, return_value = False),
): ):
assert await test_bot.is_logged_in() is False assert await test_bot.is_logged_in() is False
assert any( assert "No login detected via configured login detection selectors" in caplog.text
record.message == ( assert "auth probe is disabled" in caplog.text
"No login detected - DOM login detection selectors (CLASS_NAME=mr-medium, ID=user-email) "
"did not confirm login and server probe returned LOGGED_OUT"
)
for record in caplog.records
)
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_get_login_state_prefers_dom_over_auth_probe(self, test_bot:KleinanzeigenBot) -> None: async def test_get_login_state_prefers_dom_checks(self, test_bot:KleinanzeigenBot) -> None:
with ( with (
patch.object(test_bot, "web_text_first_available", new_callable = AsyncMock, return_value = ("Welcome dummy_user", 0)) as web_text,
patch.object( patch.object(
test_bot, "_auth_probe_login_state", new_callable = AsyncMock, side_effect = AssertionError("Probe must not run when DOM is deterministic") test_bot,
) as probe, "web_text_first_available",
new_callable = AsyncMock,
return_value = ("Welcome dummy_user", 0),
) as web_text,
): ):
assert await test_bot.get_login_state() == LoginState.LOGGED_IN assert await test_bot.get_login_state() == LoginState.LOGGED_IN
web_text.assert_awaited_once() web_text.assert_awaited_once()
probe.assert_not_called()
def test_current_page_url_strips_query_and_fragment(self, test_bot:KleinanzeigenBot) -> None:
page = MagicMock()
page.url = "https://login.kleinanzeigen.de/u/login/password?state=secret&code=abc#frag"
test_bot.page = page
assert test_bot._current_page_url() == "https://login.kleinanzeigen.de/u/login/password"
def test_is_valid_post_auth0_destination_filters_invalid_urls(self, test_bot:KleinanzeigenBot) -> None:
assert test_bot._is_valid_post_auth0_destination("https://www.kleinanzeigen.de/") is True
assert test_bot._is_valid_post_auth0_destination("https://www.kleinanzeigen.de/m-meine-anzeigen.html") is True
assert test_bot._is_valid_post_auth0_destination("https://foo.kleinanzeigen.de/") is True
assert test_bot._is_valid_post_auth0_destination("unknown") is False
assert test_bot._is_valid_post_auth0_destination("about:blank") is False
assert test_bot._is_valid_post_auth0_destination("https://evilkleinanzeigen.de/") is False
assert test_bot._is_valid_post_auth0_destination("https://kleinanzeigen.de.evil.com/") is False
assert test_bot._is_valid_post_auth0_destination("https://login.kleinanzeigen.de/u/login/password") is False
assert test_bot._is_valid_post_auth0_destination("https://www.kleinanzeigen.de/login-error-500") is False
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_get_login_state_falls_back_to_auth_probe_when_dom_inconclusive(self, test_bot:KleinanzeigenBot) -> None: async def test_get_login_state_returns_unknown_when_dom_checks_are_inconclusive(self, test_bot:KleinanzeigenBot) -> None:
with ( with (
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError) as web_text, patch.object(test_bot, "web_text_first_available", side_effect = [TimeoutError(), TimeoutError()]) as web_text,
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.LOGGED_IN) as probe, patch.object(test_bot, "web_find_first_available", side_effect = TimeoutError()) as cta_find,
):
assert await test_bot.get_login_state() == LoginState.LOGGED_IN
web_text.assert_awaited_once()
probe.assert_awaited_once()
@pytest.mark.asyncio
async def test_get_login_state_falls_back_to_auth_probe_when_dom_logged_out(self, test_bot:KleinanzeigenBot) -> None:
with (
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError) as web_text,
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.LOGGED_OUT) as probe,
):
assert await test_bot.get_login_state() == LoginState.LOGGED_OUT
web_text.assert_awaited_once()
probe.assert_awaited_once()
@pytest.mark.asyncio
async def test_get_login_state_returns_unknown_when_probe_unknown_and_dom_inconclusive(self, test_bot:KleinanzeigenBot) -> None:
with (
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN) as probe,
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError) as web_text,
): ):
assert await test_bot.get_login_state() == LoginState.UNKNOWN assert await test_bot.get_login_state() == LoginState.UNKNOWN
probe.assert_awaited_once() assert web_text.await_count == 2
web_text.assert_awaited_once() assert cta_find.await_count == 2
@pytest.mark.asyncio
async def test_get_login_state_returns_logged_out_when_cta_detected(self, test_bot:KleinanzeigenBot) -> None:
matched_element = MagicMock(spec = Element)
with (
patch.object(
test_bot,
"web_text_first_available",
side_effect = [TimeoutError(), TimeoutError()],
) as web_text,
patch.object(test_bot, "web_find_first_available", new_callable = AsyncMock, return_value = (matched_element, 0)),
patch.object(test_bot, "_extract_visible_text", new_callable = AsyncMock, return_value = "Hier einloggen"),
):
assert await test_bot.get_login_state() == LoginState.LOGGED_OUT
assert web_text.await_count == 2
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_get_login_state_unknown_captures_diagnostics_when_enabled(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None: async def test_get_login_state_unknown_captures_diagnostics_when_enabled(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None:
@@ -592,8 +657,8 @@ class TestKleinanzeigenBotAuthentication:
test_bot.page = page test_bot.page = page
with ( with (
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN), patch.object(test_bot, "web_text_first_available", side_effect = [TimeoutError(), TimeoutError(), TimeoutError(), TimeoutError()]),
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError), patch.object(test_bot, "web_find_first_available", side_effect = TimeoutError()),
): ):
assert await test_bot.get_login_state() == LoginState.UNKNOWN assert await test_bot.get_login_state() == LoginState.UNKNOWN
@@ -610,8 +675,8 @@ class TestKleinanzeigenBotAuthentication:
test_bot.page = page test_bot.page = page
with ( with (
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN), patch.object(test_bot, "web_text_first_available", side_effect = [TimeoutError(), TimeoutError(), TimeoutError(), TimeoutError()]),
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError), patch.object(test_bot, "web_find_first_available", side_effect = TimeoutError()),
): ):
assert await test_bot.get_login_state() == LoginState.UNKNOWN assert await test_bot.get_login_state() == LoginState.UNKNOWN
@@ -633,8 +698,21 @@ class TestKleinanzeigenBotAuthentication:
stdin_mock.isatty.return_value = True stdin_mock.isatty.return_value = True
with ( with (
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN), patch.object(
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError), test_bot,
"web_text_first_available",
side_effect = [
TimeoutError(),
TimeoutError(),
TimeoutError(),
TimeoutError(),
TimeoutError(),
TimeoutError(),
TimeoutError(),
TimeoutError(),
],
),
patch.object(test_bot, "web_find_first_available", side_effect = TimeoutError()),
patch("kleinanzeigen_bot.sys.stdin", stdin_mock), patch("kleinanzeigen_bot.sys.stdin", stdin_mock),
patch("kleinanzeigen_bot.ainput", new_callable = AsyncMock) as mock_ainput, patch("kleinanzeigen_bot.ainput", new_callable = AsyncMock) as mock_ainput,
): ):
@@ -661,8 +739,8 @@ class TestKleinanzeigenBotAuthentication:
stdin_mock.isatty.return_value = False stdin_mock.isatty.return_value = False
with ( with (
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN), patch.object(test_bot, "web_text_first_available", side_effect = [TimeoutError(), TimeoutError(), TimeoutError(), TimeoutError()]),
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError), patch.object(test_bot, "web_find_first_available", side_effect = TimeoutError()),
patch("kleinanzeigen_bot.sys.stdin", stdin_mock), patch("kleinanzeigen_bot.sys.stdin", stdin_mock),
patch("kleinanzeigen_bot.ainput", new_callable = AsyncMock) as mock_ainput, patch("kleinanzeigen_bot.ainput", new_callable = AsyncMock) as mock_ainput,
): ):
@@ -676,67 +754,71 @@ class TestKleinanzeigenBotAuthentication:
with ( with (
patch.object(test_bot, "web_open") as mock_open, patch.object(test_bot, "web_open") as mock_open,
patch.object(test_bot, "get_login_state", new_callable = AsyncMock, side_effect = [LoginState.LOGGED_OUT, LoginState.LOGGED_IN]) as mock_logged_in, patch.object(test_bot, "get_login_state", new_callable = AsyncMock, side_effect = [LoginState.LOGGED_OUT, LoginState.LOGGED_IN]) as mock_logged_in,
patch.object(test_bot, "web_find", side_effect = TimeoutError), patch.object(test_bot, "_click_gdpr_banner", new_callable = AsyncMock),
patch.object(test_bot, "web_input") as mock_input, patch.object(test_bot, "fill_login_data_and_send", new_callable = AsyncMock) as mock_fill,
patch.object(test_bot, "web_click") as mock_click, patch.object(test_bot, "handle_after_login_logic", new_callable = AsyncMock) as mock_after_login,
patch.object(test_bot, "_dismiss_consent_banner", new_callable = AsyncMock),
): ):
await test_bot.login() await test_bot.login()
mock_open.assert_called() opened_urls = [call.args[0] for call in mock_open.call_args_list]
mock_logged_in.assert_called() assert any(url.startswith(test_bot.root_url) for url in opened_urls)
mock_input.assert_called() assert any(url.endswith("/m-einloggen-sso.html") for url in opened_urls)
mock_click.assert_called() mock_logged_in.assert_awaited()
mock_fill.assert_awaited_once()
mock_after_login.assert_awaited_once()
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_login_flow_handles_captcha(self, test_bot:KleinanzeigenBot) -> None: async def test_login_flow_returns_early_when_already_logged_in(self, test_bot:KleinanzeigenBot) -> None:
"""Verify that login flow handles captcha correctly.""" """Login should return early when state is already LOGGED_IN."""
with ( with (
patch.object(test_bot, "web_open"), patch.object(test_bot, "web_open") as mock_open,
patch.object( patch.object(test_bot, "get_login_state", new_callable = AsyncMock, return_value = LoginState.LOGGED_IN) as mock_state,
test_bot, patch.object(test_bot, "_click_gdpr_banner", new_callable = AsyncMock),
"get_login_state", patch.object(test_bot, "fill_login_data_and_send", new_callable = AsyncMock) as mock_fill,
new_callable = AsyncMock, patch.object(test_bot, "handle_after_login_logic", new_callable = AsyncMock) as mock_after_login,
side_effect = [LoginState.LOGGED_OUT, LoginState.LOGGED_OUT, LoginState.LOGGED_IN],
),
patch.object(test_bot, "web_find") as mock_find,
patch.object(test_bot, "web_input") as mock_input,
patch.object(test_bot, "web_click") as mock_click,
patch("kleinanzeigen_bot.ainput", new_callable = AsyncMock) as mock_ainput,
): ):
# Mock the sequence of web_find calls:
# 0. Consent banner not found (in _dismiss_consent_banner, before login state check)
# First login attempt:
# 1. Captcha iframe found (in check_and_wait_for_captcha)
# 2. Phone verification not found (in handle_after_login_logic)
# 3. Email verification not found (in handle_after_login_logic)
# 4. GDPR banner not found (in handle_after_login_logic)
# Second login attempt:
# 5. Captcha iframe found (in check_and_wait_for_captcha)
# 6. Phone verification not found (in handle_after_login_logic)
# 7. Email verification not found (in handle_after_login_logic)
# 8. GDPR banner not found (in handle_after_login_logic)
mock_find.side_effect = [
TimeoutError(), # Consent banner (before login state check)
AsyncMock(), # Captcha iframe (first login)
TimeoutError(), # Phone verification (first login)
TimeoutError(), # Email verification (first login)
TimeoutError(), # GDPR banner (first login)
AsyncMock(), # Captcha iframe (second login)
TimeoutError(), # Phone verification (second login)
TimeoutError(), # Email verification (second login)
TimeoutError(), # GDPR banner (second login)
]
mock_ainput.return_value = ""
mock_input.return_value = AsyncMock()
mock_click.return_value = AsyncMock()
await test_bot.login() await test_bot.login()
# Verify the complete flow mock_open.assert_awaited_once()
assert mock_find.call_count == 9 # 1 consent banner + 8 original web_find calls assert mock_open.await_args is not None
assert mock_ainput.call_count == 2 # Two captcha prompts assert mock_open.await_args.args[0] == test_bot.root_url
assert mock_input.call_count == 6 # Two login attempts with username, clear password, and set password mock_state.assert_awaited_once()
assert mock_click.call_count == 2 # Two submit button clicks mock_fill.assert_not_called()
mock_after_login.assert_not_called()
@pytest.mark.asyncio
async def test_login_flow_raises_when_state_remains_unknown(self, test_bot:KleinanzeigenBot) -> None:
"""Post-login UNKNOWN state should fail fast with diagnostics."""
with (
patch.object(test_bot, "web_open"),
patch.object(test_bot, "get_login_state", new_callable = AsyncMock, side_effect = [LoginState.LOGGED_OUT, LoginState.UNKNOWN]) as mock_state,
patch.object(test_bot, "_click_gdpr_banner", new_callable = AsyncMock),
patch.object(test_bot, "fill_login_data_and_send", new_callable = AsyncMock),
patch.object(test_bot, "handle_after_login_logic", new_callable = AsyncMock),
patch.object(test_bot, "_dismiss_consent_banner", new_callable = AsyncMock),
patch.object(test_bot, "_capture_login_detection_diagnostics_if_enabled", new_callable = AsyncMock) as mock_diagnostics,
):
with pytest.raises(AssertionError, match = "Login could not be confirmed"):
await test_bot.login()
mock_diagnostics.assert_awaited_once()
mock_state.assert_awaited()
@pytest.mark.asyncio
async def test_login_flow_raises_when_sso_navigation_times_out(self, test_bot:KleinanzeigenBot) -> None:
"""SSO navigation timeout should trigger diagnostics and re-raise."""
with (
patch.object(test_bot, "web_open", new_callable = AsyncMock, side_effect = [None, TimeoutError("sso timeout")]),
patch.object(test_bot, "get_login_state", new_callable = AsyncMock, return_value = LoginState.LOGGED_OUT) as mock_state,
patch.object(test_bot, "_click_gdpr_banner", new_callable = AsyncMock),
patch.object(test_bot, "_capture_login_detection_diagnostics_if_enabled", new_callable = AsyncMock) as mock_diagnostics,
):
with pytest.raises(TimeoutError, match = "sso timeout"):
await test_bot.login()
mock_diagnostics.assert_awaited_once()
mock_state.assert_awaited_once()
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_check_and_wait_for_captcha(self, test_bot:KleinanzeigenBot) -> None: async def test_check_and_wait_for_captcha(self, test_bot:KleinanzeigenBot) -> None:
@@ -764,62 +846,142 @@ class TestKleinanzeigenBotAuthentication:
async def test_fill_login_data_and_send(self, test_bot:KleinanzeigenBot) -> None: async def test_fill_login_data_and_send(self, test_bot:KleinanzeigenBot) -> None:
"""Verify that login form filling works correctly.""" """Verify that login form filling works correctly."""
with ( with (
patch.object(test_bot, "_wait_for_auth0_login_context", new_callable = AsyncMock) as wait_context,
patch.object(test_bot, "_wait_for_auth0_password_step", new_callable = AsyncMock) as wait_password,
patch.object(test_bot, "_wait_for_post_auth0_submit_transition", new_callable = AsyncMock) as wait_transition,
patch.object(test_bot, "web_input") as mock_input, patch.object(test_bot, "web_input") as mock_input,
patch.object(test_bot, "web_click") as mock_click, patch.object(test_bot, "web_click") as mock_click,
patch.object(test_bot, "check_and_wait_for_captcha", new_callable = AsyncMock) as mock_captcha, patch.object(test_bot, "check_and_wait_for_captcha", new_callable = AsyncMock) as mock_captcha,
): ):
# Mock successful login form interaction
mock_input.return_value = AsyncMock()
mock_click.return_value = AsyncMock()
await test_bot.fill_login_data_and_send() await test_bot.fill_login_data_and_send()
wait_context.assert_awaited_once()
wait_password.assert_awaited_once()
wait_transition.assert_awaited_once()
assert mock_captcha.call_count == 1 assert mock_captcha.call_count == 1
assert mock_input.call_count == 3 # Username, clear password, set password assert mock_input.call_count == 2
assert mock_click.call_count == 1 # Submit button assert mock_click.call_count == 2
@pytest.mark.asyncio
async def test_fill_login_data_and_send_logs_generic_start_message(
self, test_bot:KleinanzeigenBot, caplog:pytest.LogCaptureFixture
) -> None:
with (
caplog.at_level("INFO"),
patch.object(test_bot, "_wait_for_auth0_login_context", new_callable = AsyncMock),
patch.object(test_bot, "_wait_for_auth0_password_step", new_callable = AsyncMock),
patch.object(test_bot, "_wait_for_post_auth0_submit_transition", new_callable = AsyncMock),
patch.object(test_bot, "web_input"),
patch.object(test_bot, "web_click"),
patch.object(test_bot, "check_and_wait_for_captcha", new_callable = AsyncMock),
):
await test_bot.fill_login_data_and_send()
assert "Logging in..." in caplog.text
assert test_bot.config.login.username not in caplog.text
@pytest.mark.asyncio
async def test_fill_login_data_and_send_fails_when_password_step_missing(self, test_bot:KleinanzeigenBot) -> None:
"""Missing Auth0 password step should fail fast."""
with (
patch.object(test_bot, "_wait_for_auth0_login_context", new_callable = AsyncMock),
patch.object(test_bot, "_wait_for_auth0_password_step", new_callable = AsyncMock, side_effect = AssertionError("missing password")),
patch.object(test_bot, "web_input") as mock_input,
patch.object(test_bot, "web_click") as mock_click,
):
with pytest.raises(AssertionError, match = "missing password"):
await test_bot.fill_login_data_and_send()
assert mock_input.call_count == 1
assert mock_click.call_count == 1
@pytest.mark.asyncio
async def test_wait_for_post_auth0_submit_transition_url_branch(self, test_bot:KleinanzeigenBot) -> None:
"""URL transition success should return without fallback checks."""
with (
patch.object(test_bot, "web_await", new_callable = AsyncMock, return_value = True) as mock_wait,
patch.object(test_bot, "web_sleep", new_callable = AsyncMock) as mock_sleep,
):
await test_bot._wait_for_post_auth0_submit_transition()
mock_wait.assert_awaited_once()
mock_sleep.assert_not_called()
@pytest.mark.asyncio
async def test_wait_for_post_auth0_submit_transition_dom_fallback_branch(self, test_bot:KleinanzeigenBot) -> None:
"""DOM fallback should run when URL transition is inconclusive."""
with (
patch.object(test_bot, "web_await", new_callable = AsyncMock, side_effect = [TimeoutError()]) as mock_wait,
patch.object(test_bot, "is_logged_in", new_callable = AsyncMock, return_value = True) as mock_is_logged_in,
patch.object(test_bot, "web_sleep", new_callable = AsyncMock) as mock_sleep,
):
await test_bot._wait_for_post_auth0_submit_transition()
mock_wait.assert_awaited_once()
mock_is_logged_in.assert_awaited_once()
mock_sleep.assert_not_called()
@pytest.mark.asyncio
async def test_wait_for_post_auth0_submit_transition_sleep_fallback_branch(self, test_bot:KleinanzeigenBot) -> None:
"""Sleep fallback should run when bounded login check times out."""
with (
patch.object(test_bot, "web_await", new_callable = AsyncMock, side_effect = [TimeoutError()]) as mock_wait,
patch.object(test_bot, "is_logged_in", new_callable = AsyncMock, side_effect = asyncio.TimeoutError) as mock_is_logged_in,
patch.object(test_bot, "web_sleep", new_callable = AsyncMock) as mock_sleep,
):
with pytest.raises(TimeoutError, match = "Auth0 post-submit verification remained inconclusive"):
await test_bot._wait_for_post_auth0_submit_transition()
mock_wait.assert_awaited_once()
assert mock_is_logged_in.await_count == 2
mock_sleep.assert_awaited_once()
assert mock_sleep.await_args is not None
sleep_kwargs = cast(Any, mock_sleep.await_args).kwargs
assert sleep_kwargs["min_ms"] < sleep_kwargs["max_ms"]
@pytest.mark.asyncio
async def test_wait_for_post_auth0_submit_transition_sleep_fallback_when_login_not_confirmed(
self, test_bot:KleinanzeigenBot
) -> None:
"""Sleep fallback should run when bounded login check returns False."""
with (
patch.object(test_bot, "web_await", new_callable = AsyncMock, side_effect = [TimeoutError()]) as mock_wait,
patch.object(test_bot, "is_logged_in", new_callable = AsyncMock, return_value = False) as mock_is_logged_in,
patch.object(test_bot, "web_sleep", new_callable = AsyncMock) as mock_sleep,
):
with pytest.raises(TimeoutError, match = "Auth0 post-submit verification remained inconclusive"):
await test_bot._wait_for_post_auth0_submit_transition()
mock_wait.assert_awaited_once()
assert mock_is_logged_in.await_count == 2
mock_sleep.assert_awaited_once()
@pytest.mark.asyncio
async def test_click_gdpr_banner_uses_quick_dom_timeout_and_passes_click_timeout(self, test_bot:KleinanzeigenBot) -> None:
with (
patch.object(test_bot, "_timeout", return_value = 1.25) as mock_timeout,
patch.object(test_bot, "web_find", new_callable = AsyncMock) as mock_find,
patch.object(test_bot, "web_click", new_callable = AsyncMock) as mock_click,
):
await test_bot._click_gdpr_banner()
mock_timeout.assert_called_once_with("quick_dom")
mock_find.assert_awaited_once_with(By.ID, "gdpr-banner-accept", timeout = 1.25)
mock_click.assert_awaited_once_with(By.ID, "gdpr-banner-accept", timeout = 1.25)
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_handle_after_login_logic(self, test_bot:KleinanzeigenBot) -> None: async def test_handle_after_login_logic(self, test_bot:KleinanzeigenBot) -> None:
"""Verify that post-login handling works correctly.""" """Verify that post-login handling works correctly."""
with ( with (
patch.object(test_bot, "web_find") as mock_find, patch.object(test_bot, "_check_sms_verification", new_callable = AsyncMock, side_effect = TimeoutError()) as mock_sms,
patch.object(test_bot, "web_click") as mock_click, patch.object(test_bot, "_check_email_verification", new_callable = AsyncMock, side_effect = TimeoutError()) as mock_email,
patch("kleinanzeigen_bot.ainput", new_callable = AsyncMock) as mock_ainput, patch.object(test_bot, "_click_gdpr_banner", new_callable = AsyncMock, side_effect = TimeoutError()) as mock_gdpr,
): ):
# Test case 1: No special handling needed
mock_find.side_effect = [TimeoutError(), TimeoutError(), TimeoutError()] # No phone verification, no email verification, no GDPR
mock_click.return_value = AsyncMock()
mock_ainput.return_value = ""
await test_bot.handle_after_login_logic() await test_bot.handle_after_login_logic()
assert mock_find.call_count == 3 mock_sms.assert_awaited_once()
assert mock_click.call_count == 0 mock_email.assert_awaited_once()
assert mock_ainput.call_count == 0 mock_gdpr.assert_awaited_once()
# Test case 2: Phone verification needed
mock_find.reset_mock()
mock_click.reset_mock()
mock_ainput.reset_mock()
mock_find.side_effect = [AsyncMock(), TimeoutError(), TimeoutError()] # Phone verification found, no email verification, no GDPR
await test_bot.handle_after_login_logic()
assert mock_find.call_count == 3
assert mock_click.call_count == 0 # No click needed, just wait for user
assert mock_ainput.call_count == 1 # Wait for user to complete verification
# Test case 3: GDPR banner present
mock_find.reset_mock()
mock_click.reset_mock()
mock_ainput.reset_mock()
mock_find.side_effect = [TimeoutError(), TimeoutError(), AsyncMock()] # No phone verification, no email verification, GDPR found
await test_bot.handle_after_login_logic()
assert mock_find.call_count == 3
assert mock_click.call_count == 2 # Click to accept GDPR and continue
assert mock_ainput.call_count == 0
class TestKleinanzeigenBotDiagnostics: class TestKleinanzeigenBotDiagnostics:
@@ -866,9 +1028,10 @@ class TestKleinanzeigenBotDiagnostics:
ad_cfg = Ad.model_validate(diagnostics_ad_config) ad_cfg = Ad.model_validate(diagnostics_ad_config)
ad_cfg_orig = copy.deepcopy(diagnostics_ad_config) ad_cfg_orig = copy.deepcopy(diagnostics_ad_config)
ad_file = str(tmp_path / "ad_000001_Test.yml") ad_file = str(tmp_path / "ad_000001_Test.yml")
ads_response = {"content": json.dumps({"ads": [], "paging": {"pageNum": 1, "last": 1}})}
with ( with (
patch.object(test_bot, "web_request", new_callable = AsyncMock, return_value = {"content": json.dumps({"ads": []})}), patch.object(test_bot, "web_request", new_callable = AsyncMock, return_value = ads_response),
patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("boom")), patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("boom")),
): ):
await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)]) await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)])
@@ -907,9 +1070,10 @@ class TestKleinanzeigenBotDiagnostics:
ad_cfg = Ad.model_validate(diagnostics_ad_config) ad_cfg = Ad.model_validate(diagnostics_ad_config)
ad_cfg_orig = copy.deepcopy(diagnostics_ad_config) ad_cfg_orig = copy.deepcopy(diagnostics_ad_config)
ad_file = str(tmp_path / "ad_000001_Test.yml") ad_file = str(tmp_path / "ad_000001_Test.yml")
ads_response = {"content": json.dumps({"ads": [], "paging": {"pageNum": 1, "last": 1}})}
with ( with (
patch.object(test_bot, "web_request", new_callable = AsyncMock, return_value = {"content": json.dumps({"ads": []})}), patch.object(test_bot, "web_request", new_callable = AsyncMock, return_value = ads_response),
patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("boom")), patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("boom")),
): ):
await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)]) await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)])
@@ -1015,6 +1179,35 @@ class TestKleinanzeigenBotBasics:
web_await_mock.assert_awaited_once() web_await_mock.assert_awaited_once()
delete_ad_mock.assert_awaited_once_with(ad_cfgs[0][1], [], delete_old_ads_by_title = False) delete_ad_mock.assert_awaited_once_with(ad_cfgs[0][1], [], delete_old_ads_by_title = False)
@pytest.mark.asyncio
async def test_publish_ads_uses_millisecond_retry_delay_on_retryable_failure(
self,
test_bot:KleinanzeigenBot,
base_ad_config:dict[str, Any],
mock_page:MagicMock,
) -> None:
"""Retry branch should sleep with explicit millisecond delay."""
test_bot.page = mock_page
test_bot.keep_old_ads = True
ad_cfg = Ad.model_validate(base_ad_config)
ad_cfg_orig = copy.deepcopy(base_ad_config)
ad_file = "ad.yaml"
ads_response = {"content": json.dumps({"ads": [], "paging": {"pageNum": 1, "last": 1}})}
with (
patch.object(test_bot, "web_request", new_callable = AsyncMock, return_value = ads_response),
patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = [TimeoutError("transient"), None]) as publish_mock,
patch.object(test_bot, "_detect_new_published_ad_ids", new_callable = AsyncMock, return_value = set()) as detect_mock,
patch.object(test_bot, "web_sleep", new_callable = AsyncMock) as sleep_mock,
patch.object(test_bot, "web_await", new_callable = AsyncMock, return_value = True),
):
await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)])
assert publish_mock.await_count == 2
detect_mock.assert_awaited_once()
sleep_mock.assert_awaited_once_with(2_000)
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_publish_ads_aborts_retry_on_duplicate_detection( async def test_publish_ads_aborts_retry_on_duplicate_detection(
self, self,
@@ -1047,6 +1240,62 @@ class TestKleinanzeigenBotBasics:
# publish_ad should have been called only once — retry was aborted due to duplicate detection # publish_ad should have been called only once — retry was aborted due to duplicate detection
assert publish_mock.await_count == 1 assert publish_mock.await_count == 1
@pytest.mark.asyncio
async def test_publish_ads_aborts_retry_when_duplicate_verification_fetch_is_malformed(
self,
test_bot:KleinanzeigenBot,
base_ad_config:dict[str, Any],
mock_page:MagicMock,
) -> None:
"""Retry verification must fail closed on malformed published-ads responses."""
test_bot.page = mock_page
ad_cfg = Ad.model_validate(base_ad_config)
ad_cfg_orig = copy.deepcopy(base_ad_config)
ad_file = "ad.yaml"
fetch_responses = [
{"content": json.dumps({"ads": []})},
{"content": json.dumps({"ads": []})},
[],
]
with (
patch.object(test_bot, "web_request", new_callable = AsyncMock, side_effect = fetch_responses),
patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("image upload timeout")) as publish_mock,
):
await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)])
assert publish_mock.await_count == 1
@pytest.mark.asyncio
async def test_publish_ads_aborts_retry_when_duplicate_verification_ads_entries_are_malformed(
self,
test_bot:KleinanzeigenBot,
base_ad_config:dict[str, Any],
mock_page:MagicMock,
) -> None:
"""Retry verification must fail closed when strict fetch returns non-dict ad entries."""
test_bot.page = mock_page
ad_cfg = Ad.model_validate(base_ad_config)
ad_cfg_orig = copy.deepcopy(base_ad_config)
ad_file = "ad.yaml"
fetch_responses = [
{"content": json.dumps({"ads": [], "paging": {"pageNum": 1, "last": 1}})},
{"content": json.dumps({"ads": [], "paging": {"pageNum": 1, "last": 1}})},
{"content": json.dumps({"ads": [42], "paging": {"pageNum": 1, "last": 1}})},
]
with (
patch.object(test_bot, "web_request", new_callable = AsyncMock, side_effect = fetch_responses),
patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("image upload timeout")) as publish_mock,
):
await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)])
assert publish_mock.await_count == 1
def test_get_root_url(self, test_bot:KleinanzeigenBot) -> None: def test_get_root_url(self, test_bot:KleinanzeigenBot) -> None:
"""Test root URL retrieval.""" """Test root URL retrieval."""
assert test_bot.root_url == "https://www.kleinanzeigen.de" assert test_bot.root_url == "https://www.kleinanzeigen.de"

View File

@@ -187,6 +187,17 @@ class TestJSONPagination:
pytest.fail(f"expected 2 ads, got {len(result)}") pytest.fail(f"expected 2 ads, got {len(result)}")
mock_request.assert_awaited_once() mock_request.assert_awaited_once()
@pytest.mark.asyncio
async def test_fetch_published_ads_strict_raises_on_missing_paging_dict(self, bot:KleinanzeigenBot) -> None:
"""Strict mode should fail closed when paging metadata is missing."""
response_data = {"ads": [{"id": 1}, {"id": 2}]}
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": json.dumps(response_data)}
with pytest.raises(ValueError, match = "Missing or invalid paging info on page 1: NoneType"):
await bot._fetch_published_ads(strict = True)
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_fetch_published_ads_non_integer_paging_values(self, bot:KleinanzeigenBot) -> None: async def test_fetch_published_ads_non_integer_paging_values(self, bot:KleinanzeigenBot) -> None:
"""Test handling of non-integer paging values.""" """Test handling of non-integer paging values."""
@@ -219,6 +230,33 @@ class TestJSONPagination:
if len(result) != 0: if len(result) != 0:
pytest.fail(f"expected empty list when 'ads' is not a list, got: {result}") pytest.fail(f"expected empty list when 'ads' is not a list, got: {result}")
@pytest.mark.asyncio
async def test_fetch_published_ads_strict_rejects_non_dict_entries(self, bot:KleinanzeigenBot) -> None:
"""Strict mode should reject malformed entries inside ads list."""
response_data = {"ads": [42, {"id": 1}], "paging": {"pageNum": 1, "last": 1}}
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": json.dumps(response_data)}
with pytest.raises(TypeError, match = "Unexpected ad entry type on page 1: int"):
await bot._fetch_published_ads(strict = True)
@pytest.mark.asyncio
async def test_fetch_published_ads_non_strict_filters_non_dict_entries(self, bot:KleinanzeigenBot, caplog:pytest.LogCaptureFixture) -> None:
"""Non-strict mode should filter malformed entries and continue."""
response_data = {"ads": [42, {"id": 1}, "broken"], "paging": {"pageNum": 1, "last": 1}}
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": json.dumps(response_data)}
with caplog.at_level("WARNING"):
result = await bot._fetch_published_ads(strict = False)
if result != [{"id": 1}]:
pytest.fail(f"expected malformed entries to be filtered out, got: {result}")
if "Filtered 2 malformed ad entries on page 1" not in caplog.text:
pytest.fail(f"expected malformed-entry warning in logs, got: {caplog.text}")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_fetch_published_ads_timeout(self, bot:KleinanzeigenBot) -> None: async def test_fetch_published_ads_timeout(self, bot:KleinanzeigenBot) -> None:
"""Test handling of timeout during pagination.""" """Test handling of timeout during pagination."""
@@ -229,3 +267,26 @@ class TestJSONPagination:
if result != []: if result != []:
pytest.fail(f"Expected empty list on timeout, got {result}") pytest.fail(f"Expected empty list on timeout, got {result}")
@pytest.mark.asyncio
async def test_fetch_published_ads_non_strict_handles_non_string_content_type(self, bot:KleinanzeigenBot, caplog:pytest.LogCaptureFixture) -> None:
"""Non-strict mode should gracefully stop on unexpected non-string content types."""
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": None}
with caplog.at_level("WARNING"):
result = await bot._fetch_published_ads(strict = False)
if result != []:
pytest.fail(f"expected empty result on non-string content in non-strict mode, got: {result}")
if "Unexpected response content type on page 1: NoneType" not in caplog.text:
pytest.fail(f"expected non-string content warning in logs, got: {caplog.text}")
@pytest.mark.asyncio
async def test_fetch_published_ads_strict_raises_on_non_string_content_type(self, bot:KleinanzeigenBot) -> None:
"""Strict mode should fail closed on unexpected non-string content types."""
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": None}
with pytest.raises(TypeError, match = "Unexpected response content type on page 1: NoneType"):
await bot._fetch_published_ads(strict = True)