From 49e44b9a204ae86b150396d18691c939147b2e4d Mon Sep 17 00:00:00 2001 From: Jens <1742418+1cu@users.noreply.github.com> Date: Fri, 30 Jan 2026 06:03:39 +0100 Subject: [PATCH] fix: prioritize DOM-based login detection over auth probe for stealth (#798) --- src/kleinanzeigen_bot/__init__.py | 20 +++++++++--------- tests/unit/test_init.py | 34 ++++++++++++++++--------------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py index 20ad5ad..63891c6 100644 --- a/src/kleinanzeigen_bot/__init__.py +++ b/src/kleinanzeigen_bot/__init__.py @@ -894,9 +894,8 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 async def _auth_probe_login_state(self) -> LoginState: """Probe an auth-required endpoint to classify login state. - The probe is non-mutating (GET request). It is used as a primary method by - get_login_state() to classify login state, falling back to DOM checks only when - the probe returns UNKNOWN. + The probe is non-mutating (GET request). It is used as a fallback method by + get_login_state() when DOM-based checks are inconclusive. """ url = f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT" @@ -932,21 +931,22 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 """Determine current login state using layered detection. Order: - 1) Server-side auth probe via `_auth_probe_login_state` (preferred) - 2) DOM-based check via `is_logged_in(include_probe=False)` + 1) DOM-based check via `is_logged_in(include_probe=False)` (preferred - stealthy) + 2) Server-side auth probe via `_auth_probe_login_state` (fallback - more reliable) 3) If still inconclusive, capture diagnostics via `_capture_login_detection_diagnostics_if_enabled` and return `UNKNOWN` """ - # Prefer the deterministic, server-side auth probe first. + # Prefer DOM-based checks first to minimize bot-like behavior. + # The auth probe makes a JSON API request that normal users wouldn't trigger. + if await self.is_logged_in(include_probe = False): + return LoginState.LOGGED_IN + + # Fall back to the more reliable server-side auth probe. # SPA/hydration delays can cause DOM-based checks to temporarily miss login indicators. state = await self._auth_probe_login_state() if state != LoginState.UNKNOWN: return state - # Fall back to DOM-based checks only when the probe is inconclusive. - if await self.is_logged_in(include_probe = False): - return LoginState.LOGGED_IN - await self._capture_login_detection_diagnostics_if_enabled() return LoginState.UNKNOWN diff --git a/tests/unit/test_init.py b/tests/unit/test_init.py index 1956bac..6cfedb8 100644 --- a/tests/unit/test_init.py +++ b/tests/unit/test_init.py @@ -345,34 +345,36 @@ class TestKleinanzeigenBotAuthentication: assert await test_bot.is_logged_in() is False @pytest.mark.asyncio - async def test_get_login_state_prefers_auth_probe_over_dom(self, test_bot:KleinanzeigenBot) -> None: + async def test_get_login_state_prefers_dom_over_auth_probe(self, test_bot:KleinanzeigenBot) -> None: with ( - patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.LOGGED_IN) as probe, - patch.object(test_bot, "web_text", side_effect = AssertionError("DOM check must not run when probe is deterministic")) as web_text, - ): - assert await test_bot.get_login_state() == LoginState.LOGGED_IN - probe.assert_awaited_once() - web_text.assert_not_called() - - @pytest.mark.asyncio - async def test_get_login_state_falls_back_to_dom_when_probe_unknown(self, test_bot:KleinanzeigenBot) -> None: - with ( - patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN) as probe, patch.object(test_bot, "web_text", new_callable = AsyncMock, return_value = "Welcome dummy_user") as web_text, + patch.object( + test_bot, "_auth_probe_login_state", new_callable = AsyncMock, side_effect = AssertionError("Probe must not run when DOM is deterministic") + ) as probe, ): assert await test_bot.get_login_state() == LoginState.LOGGED_IN - probe.assert_awaited_once() web_text.assert_awaited_once() + probe.assert_not_called() @pytest.mark.asyncio - async def test_get_login_state_prefers_logged_out_from_probe_over_dom(self, test_bot:KleinanzeigenBot) -> None: + async def test_get_login_state_falls_back_to_auth_probe_when_dom_inconclusive(self, test_bot:KleinanzeigenBot) -> None: with ( + patch.object(test_bot, "web_text", side_effect = TimeoutError) as web_text, + patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.LOGGED_IN) as probe, + ): + assert await test_bot.get_login_state() == LoginState.LOGGED_IN + assert web_text.call_count == 2 + probe.assert_awaited_once() + + @pytest.mark.asyncio + async def test_get_login_state_falls_back_to_auth_probe_when_dom_logged_out(self, test_bot:KleinanzeigenBot) -> None: + with ( + patch.object(test_bot, "web_text", side_effect = TimeoutError) as web_text, patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.LOGGED_OUT) as probe, - patch.object(test_bot, "web_text", side_effect = AssertionError("DOM check must not run when probe is deterministic")) as web_text, ): assert await test_bot.get_login_state() == LoginState.LOGGED_OUT + assert web_text.call_count == 2 probe.assert_awaited_once() - web_text.assert_not_called() @pytest.mark.asyncio async def test_get_login_state_returns_unknown_when_probe_unknown_and_dom_inconclusive(self, test_bot:KleinanzeigenBot) -> None: