mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
feat: add grouped selector timeout fallback for login detection (#843)
This commit is contained in:
@@ -442,7 +442,7 @@ class TestKleinanzeigenBotAuthentication:
|
||||
@pytest.mark.asyncio
|
||||
async def test_is_logged_in_returns_true_when_logged_in(self, test_bot:KleinanzeigenBot) -> None:
|
||||
"""Verify that login check returns true when logged in."""
|
||||
with patch.object(test_bot, "web_text", return_value = "Welcome dummy_user"):
|
||||
with patch.object(test_bot, "web_text_first_available", new_callable = AsyncMock, return_value = ("Welcome dummy_user", 0)):
|
||||
assert await test_bot.is_logged_in() is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -450,11 +450,9 @@ class TestKleinanzeigenBotAuthentication:
|
||||
"""Verify that login check returns true when logged in with alternative element."""
|
||||
with patch.object(
|
||||
test_bot,
|
||||
"web_text",
|
||||
side_effect = [
|
||||
TimeoutError(), # First try with mr-medium fails
|
||||
"angemeldet als: dummy_user", # Second try with user-email succeeds
|
||||
],
|
||||
"web_text_first_available",
|
||||
new_callable = AsyncMock,
|
||||
return_value = ("angemeldet als: dummy_user", 1),
|
||||
):
|
||||
assert await test_bot.is_logged_in() is True
|
||||
|
||||
@@ -462,7 +460,7 @@ class TestKleinanzeigenBotAuthentication:
|
||||
async def test_is_logged_in_returns_false_when_not_logged_in(self, test_bot:KleinanzeigenBot) -> None:
|
||||
"""Verify that login check returns false when not logged in."""
|
||||
with (
|
||||
patch.object(test_bot, "web_text", side_effect = TimeoutError),
|
||||
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError),
|
||||
patch.object(
|
||||
test_bot,
|
||||
"web_request",
|
||||
@@ -472,10 +470,23 @@ class TestKleinanzeigenBotAuthentication:
|
||||
):
|
||||
assert await test_bot.is_logged_in() is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_is_logged_in_uses_selector_group_timeout_key(self, test_bot:KleinanzeigenBot) -> None:
|
||||
"""Verify login detection uses selector-group lookup with login_detection timeout key."""
|
||||
with patch.object(test_bot, "web_text_first_available", new_callable = AsyncMock, return_value = ("Welcome dummy_user", 0)) as group_text:
|
||||
assert await test_bot.is_logged_in(include_probe = False) is True
|
||||
|
||||
group_text.assert_awaited_once()
|
||||
call_args = group_text.await_args
|
||||
assert call_args is not None
|
||||
assert call_args.args[0] == [(By.CLASS_NAME, "mr-medium"), (By.ID, "user-email")]
|
||||
assert call_args.kwargs["key"] == "login_detection"
|
||||
assert call_args.kwargs["timeout"] == test_bot._timeout("login_detection")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_login_state_prefers_dom_over_auth_probe(self, test_bot:KleinanzeigenBot) -> None:
|
||||
with (
|
||||
patch.object(test_bot, "web_text", new_callable = AsyncMock, return_value = "Welcome dummy_user") as web_text,
|
||||
patch.object(test_bot, "web_text_first_available", new_callable = AsyncMock, return_value = ("Welcome dummy_user", 0)) as web_text,
|
||||
patch.object(
|
||||
test_bot, "_auth_probe_login_state", new_callable = AsyncMock, side_effect = AssertionError("Probe must not run when DOM is deterministic")
|
||||
) as probe,
|
||||
@@ -487,32 +498,32 @@ class TestKleinanzeigenBotAuthentication:
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_login_state_falls_back_to_auth_probe_when_dom_inconclusive(self, test_bot:KleinanzeigenBot) -> None:
|
||||
with (
|
||||
patch.object(test_bot, "web_text", side_effect = TimeoutError) as web_text,
|
||||
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError) as web_text,
|
||||
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.LOGGED_IN) as probe,
|
||||
):
|
||||
assert await test_bot.get_login_state() == LoginState.LOGGED_IN
|
||||
assert web_text.call_count == 2
|
||||
web_text.assert_awaited_once()
|
||||
probe.assert_awaited_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_login_state_falls_back_to_auth_probe_when_dom_logged_out(self, test_bot:KleinanzeigenBot) -> None:
|
||||
with (
|
||||
patch.object(test_bot, "web_text", side_effect = TimeoutError) as web_text,
|
||||
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError) as web_text,
|
||||
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.LOGGED_OUT) as probe,
|
||||
):
|
||||
assert await test_bot.get_login_state() == LoginState.LOGGED_OUT
|
||||
assert web_text.call_count == 2
|
||||
web_text.assert_awaited_once()
|
||||
probe.assert_awaited_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_login_state_returns_unknown_when_probe_unknown_and_dom_inconclusive(self, test_bot:KleinanzeigenBot) -> None:
|
||||
with (
|
||||
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN) as probe,
|
||||
patch.object(test_bot, "web_text", side_effect = TimeoutError) as web_text,
|
||||
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError) as web_text,
|
||||
):
|
||||
assert await test_bot.get_login_state() == LoginState.UNKNOWN
|
||||
probe.assert_awaited_once()
|
||||
assert web_text.call_count == 2
|
||||
web_text.assert_awaited_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_login_state_unknown_captures_diagnostics_when_enabled(self, test_bot:KleinanzeigenBot, tmp_path:Path) -> None:
|
||||
@@ -525,7 +536,7 @@ class TestKleinanzeigenBotAuthentication:
|
||||
|
||||
with (
|
||||
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN),
|
||||
patch.object(test_bot, "web_text", side_effect = TimeoutError),
|
||||
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError),
|
||||
):
|
||||
assert await test_bot.get_login_state() == LoginState.UNKNOWN
|
||||
|
||||
@@ -543,7 +554,7 @@ class TestKleinanzeigenBotAuthentication:
|
||||
|
||||
with (
|
||||
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN),
|
||||
patch.object(test_bot, "web_text", side_effect = TimeoutError),
|
||||
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError),
|
||||
):
|
||||
assert await test_bot.get_login_state() == LoginState.UNKNOWN
|
||||
|
||||
@@ -566,7 +577,7 @@ class TestKleinanzeigenBotAuthentication:
|
||||
|
||||
with (
|
||||
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN),
|
||||
patch.object(test_bot, "web_text", side_effect = TimeoutError),
|
||||
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError),
|
||||
patch("kleinanzeigen_bot.sys.stdin", stdin_mock),
|
||||
patch("kleinanzeigen_bot.ainput", new_callable = AsyncMock) as mock_ainput,
|
||||
):
|
||||
@@ -594,7 +605,7 @@ class TestKleinanzeigenBotAuthentication:
|
||||
|
||||
with (
|
||||
patch.object(test_bot, "_auth_probe_login_state", new_callable = AsyncMock, return_value = LoginState.UNKNOWN),
|
||||
patch.object(test_bot, "web_text", side_effect = TimeoutError),
|
||||
patch.object(test_bot, "web_text_first_available", side_effect = TimeoutError),
|
||||
patch("kleinanzeigen_bot.sys.stdin", stdin_mock),
|
||||
patch("kleinanzeigen_bot.ainput", new_callable = AsyncMock) as mock_ainput,
|
||||
):
|
||||
|
||||
@@ -2,9 +2,6 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||
"""Unit tests for web_scraping_mixin.py focusing on error handling scenarios.
|
||||
|
||||
Copyright (c) 2024, kleinanzeigen-bot contributors.
|
||||
All rights reserved.
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -536,6 +533,112 @@ class TestTimeoutAndRetryHelpers:
|
||||
):
|
||||
await web_scraper._run_with_timeout_retries(never_called, description = "guarded-op")
|
||||
|
||||
def test_allocate_selector_group_budgets_distributes_total(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""Selector group budgets should consume the full timeout budget."""
|
||||
budgets = web_scraper._allocate_selector_group_budgets(2.0, 2)
|
||||
assert len(budgets) == 2
|
||||
assert budgets[0] + budgets[1] == pytest.approx(2.0)
|
||||
|
||||
def test_allocate_selector_group_budgets_rejects_zero_selector_count(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""Selector budget helper should reject empty selector groups."""
|
||||
with pytest.raises(ValueError, match = "selector_count must be > 0"):
|
||||
web_scraper._allocate_selector_group_budgets(1.0, 0)
|
||||
|
||||
def test_allocate_selector_group_budgets_single_selector_clamps_negative_timeout(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""Single-selector budgets should never be negative."""
|
||||
budgets = web_scraper._allocate_selector_group_budgets(-1.0, 1)
|
||||
assert budgets == [0.0]
|
||||
|
||||
def test_allocate_selector_group_budgets_non_positive_timeout_returns_zeroes(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""Multi-selector groups with non-positive timeout should return zero budgets."""
|
||||
budgets = web_scraper._allocate_selector_group_budgets(0.0, 3)
|
||||
assert budgets == [0.0, 0.0, 0.0]
|
||||
|
||||
def test_allocate_selector_group_budgets_tiny_timeout_splits_equally(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""When timeout is too small for floors, budgets should split equally."""
|
||||
# 0.2s is below floor_total for two selectors (2 * 0.25s), so equal split applies.
|
||||
budgets = web_scraper._allocate_selector_group_budgets(0.2, 2)
|
||||
assert budgets == pytest.approx([0.1, 0.1])
|
||||
|
||||
def test_allocate_selector_group_budgets_redistributes_surplus_to_primary(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""Last-backup cap overflow should be redistributed back to primary budget."""
|
||||
budgets = web_scraper._allocate_selector_group_budgets(5.0, 2)
|
||||
# Derivation with current constants:
|
||||
# primary=min(5.0*0.70, 5.0-0.25)=3.5; last backup cap=0.75; surplus=1.5 -> primary+surplus=5.0-0.75=4.25.
|
||||
assert budgets == pytest.approx([4.25, 0.75])
|
||||
|
||||
def test_allocate_selector_group_budgets_multiple_backups_apply_reserve_logic(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""Multi-backup groups should apply reserve/floor logic before final backup cap."""
|
||||
budgets = web_scraper._allocate_selector_group_budgets(3.0, 4)
|
||||
# Derivation with current constants:
|
||||
# reserve_for_backups=0.25*3=0.75; primary=min(3.0*0.70, 2.25)=2.1.
|
||||
# remaining=0.9 -> backup1=max(0.25, min(0.75, 0.9-0.5))=0.4.
|
||||
# remaining=0.5 -> backup2=max(0.25, min(0.75, 0.5-0.25))=0.25.
|
||||
# final backup=min(0.25, 0.75)=0.25.
|
||||
assert budgets == pytest.approx([2.1, 0.4, 0.25, 0.25])
|
||||
assert sum(budgets) == pytest.approx(3.0)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_find_first_available_uses_shared_budget(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""web_find_first_available should try alternatives in order with shared budget slices."""
|
||||
first_timeout:float | None = None
|
||||
second_timeout:float | None = None
|
||||
found = AsyncMock(spec = Element)
|
||||
|
||||
async def fake_find_once(
|
||||
selector_type:By, selector_value:str, timeout:float, *, parent:Element | None = None
|
||||
) -> Element:
|
||||
nonlocal first_timeout, second_timeout
|
||||
if selector_value == "first":
|
||||
first_timeout = timeout
|
||||
raise TimeoutError("first timeout")
|
||||
second_timeout = timeout
|
||||
return found
|
||||
|
||||
with patch.object(web_scraper, "_web_find_once", side_effect = fake_find_once):
|
||||
result, index = await web_scraper.web_find_first_available(
|
||||
[(By.ID, "first"), (By.ID, "second")],
|
||||
timeout = 2.0,
|
||||
key = "login_detection",
|
||||
)
|
||||
|
||||
assert result is found
|
||||
assert index == 1
|
||||
assert first_timeout is not None
|
||||
assert second_timeout is not None
|
||||
assert first_timeout + second_timeout == pytest.approx(2.0)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_find_first_available_exhausts_candidates_once_when_retry_disabled(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""Candidate exhaustion should not multiply attempts when retry is disabled."""
|
||||
web_scraper.config.timeouts.retry_enabled = False
|
||||
|
||||
with (
|
||||
patch.object(web_scraper, "_web_find_once", side_effect = TimeoutError("not found")) as find_once,
|
||||
pytest.raises(TimeoutError, match = "No HTML element found using selector group"),
|
||||
):
|
||||
await web_scraper.web_find_first_available([(By.ID, "first"), (By.ID, "second")], timeout = 1.0)
|
||||
|
||||
assert find_once.await_count == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_find_first_available_rejects_empty_selectors(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""Selector-group lookup should fail fast when no selectors are configured."""
|
||||
with pytest.raises(ValueError, match = "selectors must contain at least one selector"):
|
||||
await web_scraper.web_find_first_available([])
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_text_first_available_returns_text_and_index(self, web_scraper:WebScrapingMixin) -> None:
|
||||
"""Text-group helper should return extracted text and the matched selector index."""
|
||||
mock_element = AsyncMock(spec = Element)
|
||||
mock_element.apply = AsyncMock(return_value = "dummy-user")
|
||||
|
||||
with patch.object(web_scraper, "web_find_first_available", new_callable = AsyncMock, return_value = (mock_element, 1)):
|
||||
text, index = await web_scraper.web_text_first_available([(By.ID, "a"), (By.ID, "b")], key = "login_detection")
|
||||
|
||||
assert text == "dummy-user"
|
||||
assert index == 1
|
||||
|
||||
|
||||
class TestSelectorTimeoutMessages:
|
||||
"""Ensure selector helpers provide informative timeout messages."""
|
||||
@@ -815,6 +918,21 @@ class TestWebScrolling:
|
||||
with pytest.raises(TimeoutError):
|
||||
await web_scraper.web_await(condition, timeout = 0.05)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_await_caps_sleep_to_remaining_timeout(self, web_scraper:WebScrapingMixin, mock_page:TrulyAwaitableMockPage) -> None:
|
||||
"""web_await should not sleep longer than the remaining timeout budget."""
|
||||
|
||||
async def condition() -> bool:
|
||||
return False
|
||||
|
||||
with pytest.raises(TimeoutError):
|
||||
await web_scraper.web_await(condition, timeout = 0.2, apply_multiplier = False)
|
||||
|
||||
sleep_mock = cast(AsyncMock, mock_page.sleep)
|
||||
sleep_mock.assert_awaited()
|
||||
slept_seconds = sleep_mock.await_args_list[0].args[0]
|
||||
assert slept_seconds <= 0.2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_find_retry_mechanism(self, web_scraper:WebScrapingMixin, mock_page:TrulyAwaitableMockPage) -> None:
|
||||
"""Test web_find retries until element is found within timeout."""
|
||||
|
||||
Reference in New Issue
Block a user