fix: prioritize DOM-based login detection over auth probe for stealth (#798)

This commit is contained in:
Jens
2026-01-30 06:03:39 +01:00
committed by GitHub
parent c0378412d1
commit 49e44b9a20
2 changed files with 28 additions and 26 deletions

View File

@@ -894,9 +894,8 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
async def _auth_probe_login_state(self) -> LoginState: async def _auth_probe_login_state(self) -> LoginState:
"""Probe an auth-required endpoint to classify login state. """Probe an auth-required endpoint to classify login state.
The probe is non-mutating (GET request). It is used as a primary method by The probe is non-mutating (GET request). It is used as a fallback method by
get_login_state() to classify login state, falling back to DOM checks only when get_login_state() when DOM-based checks are inconclusive.
the probe returns UNKNOWN.
""" """
url = f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT" url = f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"
@@ -932,21 +931,22 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
"""Determine current login state using layered detection. """Determine current login state using layered detection.
Order: Order:
1) Server-side auth probe via `_auth_probe_login_state` (preferred) 1) DOM-based check via `is_logged_in(include_probe=False)` (preferred - stealthy)
2) DOM-based check via `is_logged_in(include_probe=False)` 2) Server-side auth probe via `_auth_probe_login_state` (fallback - more reliable)
3) If still inconclusive, capture diagnostics via 3) If still inconclusive, capture diagnostics via
`_capture_login_detection_diagnostics_if_enabled` and return `UNKNOWN` `_capture_login_detection_diagnostics_if_enabled` and return `UNKNOWN`
""" """
# Prefer the deterministic, server-side auth probe first. # Prefer DOM-based checks first to minimize bot-like behavior.
# The auth probe makes a JSON API request that normal users wouldn't trigger.
if await self.is_logged_in(include_probe = False):
return LoginState.LOGGED_IN
# Fall back to the more reliable server-side auth probe.
# SPA/hydration delays can cause DOM-based checks to temporarily miss login indicators. # SPA/hydration delays can cause DOM-based checks to temporarily miss login indicators.
state = await self._auth_probe_login_state() state = await self._auth_probe_login_state()
if state != LoginState.UNKNOWN: if state != LoginState.UNKNOWN:
return state return state
# Fall back to DOM-based checks only when the probe is inconclusive.
if await self.is_logged_in(include_probe = False):
return LoginState.LOGGED_IN
await self._capture_login_detection_diagnostics_if_enabled() await self._capture_login_detection_diagnostics_if_enabled()
return LoginState.UNKNOWN return LoginState.UNKNOWN

View File

@@ -345,34 +345,36 @@ class TestKleinanzeigenBotAuthentication:
assert await test_bot.is_logged_in() is False assert await test_bot.is_logged_in() is False
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_get_login_state_prefers_auth_probe_over_dom(self, test_bot:KleinanzeigenBot) -> None: async def test_get_login_state_prefers_dom_over_auth_probe(self, test_bot:KleinanzeigenBot) -> None:
with ( with (
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.LOGGED_IN) as probe,
patch.object(test_bot, "web_text", side_effect = AssertionError("DOM check must not run when probe is deterministic")) as web_text,
):
assert await test_bot.get_login_state() == LoginState.LOGGED_IN
probe.assert_awaited_once()
web_text.assert_not_called()
@pytest.mark.asyncio
async def test_get_login_state_falls_back_to_dom_when_probe_unknown(self, test_bot:KleinanzeigenBot) -> None:
with (
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN) as probe,
patch.object(test_bot, "web_text", new_callable = AsyncMock, return_value = "Welcome dummy_user") as web_text, patch.object(test_bot, "web_text", new_callable = AsyncMock, return_value = "Welcome dummy_user") as web_text,
patch.object(
test_bot, "_auth_probe_login_state", new_callable = AsyncMock, side_effect = AssertionError("Probe must not run when DOM is deterministic")
) as probe,
): ):
assert await test_bot.get_login_state() == LoginState.LOGGED_IN assert await test_bot.get_login_state() == LoginState.LOGGED_IN
probe.assert_awaited_once()
web_text.assert_awaited_once() web_text.assert_awaited_once()
probe.assert_not_called()
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_get_login_state_prefers_logged_out_from_probe_over_dom(self, test_bot:KleinanzeigenBot) -> None: async def test_get_login_state_falls_back_to_auth_probe_when_dom_inconclusive(self, test_bot:KleinanzeigenBot) -> None:
with ( with (
patch.object(test_bot, "web_text", side_effect = TimeoutError) as web_text,
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.LOGGED_IN) as probe,
):
assert await test_bot.get_login_state() == LoginState.LOGGED_IN
assert web_text.call_count == 2
probe.assert_awaited_once()
@pytest.mark.asyncio
async def test_get_login_state_falls_back_to_auth_probe_when_dom_logged_out(self, test_bot:KleinanzeigenBot) -> None:
with (
patch.object(test_bot, "web_text", side_effect = TimeoutError) as web_text,
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.LOGGED_OUT) as probe, patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.LOGGED_OUT) as probe,
patch.object(test_bot, "web_text", side_effect = AssertionError("DOM check must not run when probe is deterministic")) as web_text,
): ):
assert await test_bot.get_login_state() == LoginState.LOGGED_OUT assert await test_bot.get_login_state() == LoginState.LOGGED_OUT
assert web_text.call_count == 2
probe.assert_awaited_once() probe.assert_awaited_once()
web_text.assert_not_called()
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_get_login_state_returns_unknown_when_probe_unknown_and_dom_inconclusive(self, test_bot:KleinanzeigenBot) -> None: async def test_get_login_state_returns_unknown_when_probe_unknown_and_dom_inconclusive(self, test_bot:KleinanzeigenBot) -> None: