feat: add configurable timeouts (#673)

## ℹ️ Description
- Related issues: #671, #658
- Introduces configurable timeout controls plus retry/backoff handling
for flaky DOM operations.

We often see timeouts which are note reproducible in certain
configurations. I suspect timeout issues based on a combination of
internet speed, browser, os, age of the computer and the weather.

This PR introduces a comprehensive config model to tweak timeouts.

## 📋 Changes Summary
- add TimeoutConfig to the main config/schema and expose timeouts in
README/docs
- wire WebScrapingMixin, extractor, update checker, and browser
diagnostics to honor the configurable timeouts and retries
- update translations/tests to cover the new behaviour and ensure
lint/mypy/pyright pipelines remain green

### ⚙️ Type of Change
- [ ] 🐞 Bug fix (non-breaking change which fixes an issue)
- [x]  New feature (adds new functionality without breaking existing
usage)
- [ ] 💥 Breaking change (changes that might break existing user setups,
scripts, or configurations)

##  Checklist
- [x] I have reviewed my changes to ensure they meet the project's
standards.
- [x] I have tested my changes and ensured that all tests pass (`pdm run
test`).
- [x] I have formatted the code (`pdm run format`).
- [x] I have verified that linting passes (`pdm run lint`).
- [x] I have updated documentation where necessary.


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Centralized, configurable timeout system for web interactions,
detection flows, publishing, and pagination.
* Optional retry with exponential backoff for operations that time out.

* **Improvements**
* Replaced fixed wait times with dynamic timeouts throughout workflows.
  * More informative timeout-related messages and diagnostics.

* **Tests**
* New and expanded test coverage for timeout behavior, pagination,
diagnostics, and retry logic.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Jens
2025-11-13 15:08:52 +01:00
committed by GitHub
parent ac678ed888
commit a3ac27c441
16 changed files with 972 additions and 121 deletions

View File

@@ -1,7 +1,9 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
from kleinanzeigen_bot.model.config_model import AdDefaults, Config
import pytest
from kleinanzeigen_bot.model.config_model import AdDefaults, Config, TimeoutConfig
def test_migrate_legacy_description_prefix() -> None:
@@ -74,3 +76,50 @@ def test_minimal_config_validation() -> None:
config = Config.model_validate(minimal_cfg)
assert config.login.username == "dummy"
assert config.login.password == "dummy" # noqa: S105
def test_timeout_config_defaults_and_effective_values() -> None:
cfg = Config.model_validate({
"login": {"username": "dummy", "password": "dummy"}, # noqa: S105
"timeouts": {
"multiplier": 2.0,
"pagination_initial": 12.0,
"retry_max_attempts": 3,
"retry_backoff_factor": 2.0
}
})
timeouts = cfg.timeouts
base = timeouts.resolve("pagination_initial")
multiplier = timeouts.multiplier
backoff = timeouts.retry_backoff_factor
assert base == 12.0
assert timeouts.effective("pagination_initial") == base * multiplier * (backoff ** 0)
# attempt 1 should apply backoff factor once in addition to multiplier
assert timeouts.effective("pagination_initial", attempt = 1) == base * multiplier * (backoff ** 1)
def test_validate_glob_pattern_rejects_blank_strings() -> None:
with pytest.raises(ValueError, match = "must be a non-empty, non-blank glob pattern"):
Config.model_validate({
"ad_files": [" "],
"ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}},
"login": {"username": "dummy", "password": "dummy"}
})
cfg = Config.model_validate({
"ad_files": ["*.yaml"],
"ad_defaults": {"contact": {"name": "dummy", "zipcode": "12345"}},
"login": {"username": "dummy", "password": "dummy"}
})
assert cfg.ad_files == ["*.yaml"]
def test_timeout_config_resolve_returns_specific_value() -> None:
timeouts = TimeoutConfig(default = 4.0, page_load = 12.5)
assert timeouts.resolve("page_load") == 12.5
def test_timeout_config_resolve_falls_back_to_default() -> None:
timeouts = TimeoutConfig(default = 3.0)
assert timeouts.resolve("nonexistent_key") == 3.0

View File

@@ -412,6 +412,60 @@ class TestAdExtractorNavigation:
call(By.CLASS_NAME, "cardbox", parent = ad_list_container_mock),
], any_order = False)
@pytest.mark.asyncio
async def test_extract_own_ads_urls_paginates_with_enabled_next_button(self, test_extractor:AdExtractor) -> None:
"""Ensure the paginator clicks the first enabled next button and advances."""
ad_list_container_mock = MagicMock()
pagination_section_mock = MagicMock()
cardbox_page_one = MagicMock()
cardbox_page_two = MagicMock()
link_page_one = MagicMock(attrs = {"href": "/s-anzeige/page-one/111"})
link_page_two = MagicMock(attrs = {"href": "/s-anzeige/page-two/222"})
next_button_enabled = AsyncMock()
next_button_enabled.attrs = {}
disabled_button = MagicMock()
disabled_button.attrs = {"disabled": True}
link_queue = [link_page_one, link_page_two]
next_button_call = {"count": 0}
cardbox_call = {"count": 0}
async def fake_web_find(selector_type:By, selector_value:str, *, parent:Element | None = None,
timeout:int | float | None = None) -> Element:
if selector_type == By.ID and selector_value == "my-manageitems-adlist":
return ad_list_container_mock
if selector_type == By.CSS_SELECTOR and selector_value == ".Pagination":
return pagination_section_mock
if selector_type == By.CSS_SELECTOR and selector_value == "div h3 a.text-onSurface":
return link_queue.pop(0)
raise AssertionError(f"Unexpected selector {selector_type} {selector_value}")
async def fake_web_find_all(selector_type:By, selector_value:str, *, parent:Element | None = None,
timeout:int | float | None = None) -> list[Element]:
if selector_type == By.CSS_SELECTOR and selector_value == 'button[aria-label="Nächste"]':
next_button_call["count"] += 1
if next_button_call["count"] == 1:
return [next_button_enabled] # initial detection -> multi page
if next_button_call["count"] == 2:
return [disabled_button, next_button_enabled] # navigation on page 1
return [] # after navigating, stop
if selector_type == By.CLASS_NAME and selector_value == "cardbox":
cardbox_call["count"] += 1
return [cardbox_page_one] if cardbox_call["count"] == 1 else [cardbox_page_two]
raise AssertionError(f"Unexpected find_all selector {selector_type} {selector_value}")
with patch.object(test_extractor, "web_open", new_callable = AsyncMock), \
patch.object(test_extractor, "web_scroll_page_down", new_callable = AsyncMock), \
patch.object(test_extractor, "web_sleep", new_callable = AsyncMock), \
patch.object(test_extractor, "web_find", new_callable = AsyncMock, side_effect = fake_web_find), \
patch.object(test_extractor, "web_find_all", new_callable = AsyncMock, side_effect = fake_web_find_all):
refs = await test_extractor.extract_own_ads_urls()
assert refs == ["/s-anzeige/page-one/111", "/s-anzeige/page-two/222"]
next_button_enabled.click.assert_awaited() # triggered once during navigation
class TestAdExtractorContent:
"""Tests for content extraction functionality."""
@@ -641,6 +695,24 @@ class TestAdExtractorCategory:
mock_web_find.assert_any_call(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line)
mock_web_find_all.assert_awaited_once_with(By.CSS_SELECTOR, "a", parent = category_line)
@pytest.mark.asyncio
async def test_extract_category_legacy_selectors_timeout(self, extractor:AdExtractor, caplog:pytest.LogCaptureFixture) -> None:
"""Ensure fallback timeout logs the error and re-raises with translated message."""
category_line = MagicMock()
async def fake_web_find(selector_type:By, selector_value:str, *, parent:Element | None = None,
timeout:int | float | None = None) -> Element:
if selector_type == By.ID and selector_value == "vap-brdcrmb":
return category_line
raise TimeoutError("legacy selectors missing")
with patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = fake_web_find), \
patch.object(extractor, "web_find_all", new_callable = AsyncMock, side_effect = TimeoutError), \
caplog.at_level("ERROR"), pytest.raises(TimeoutError, match = "Unable to locate breadcrumb fallback selectors"):
await extractor._extract_category_from_ad_page()
assert any("Legacy breadcrumb selectors not found" in record.message for record in caplog.records)
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_special_attributes_empty(self, extractor:AdExtractor) -> None:

View File

@@ -95,6 +95,18 @@ class TestUpdateChecker:
with patch("requests.get", return_value = MagicMock(json = lambda: {"target_commitish": "e7a3d46"})):
assert checker._get_release_commit("latest") == "e7a3d46"
def test_request_timeout_uses_config(self, config:Config, mocker:"MockerFixture") -> None:
"""Ensure HTTP calls honor the timeout configuration."""
config.timeouts.multiplier = 1.5
checker = UpdateChecker(config)
mock_response = MagicMock(json = lambda: {"target_commitish": "abc"})
mock_get = mocker.patch("requests.get", return_value = mock_response)
checker._get_release_commit("latest")
expected_timeout = config.timeouts.effective("update_check")
assert mock_get.call_args.kwargs["timeout"] == expected_timeout
def test_get_commit_date(self, config:Config) -> None:
"""Test that the commit date is correctly retrieved from the GitHub API."""
checker = UpdateChecker(config)

View File

@@ -8,12 +8,14 @@ All rights reserved.
"""
import json
import logging
import os
import platform
import shutil
import zipfile
from collections.abc import Awaitable, Callable
from pathlib import Path
from typing import NoReturn, Protocol, cast
from typing import Any, NoReturn, Protocol, cast
from unittest.mock import AsyncMock, MagicMock, Mock, mock_open, patch
import nodriver
@@ -22,6 +24,7 @@ import pytest
from nodriver.core.element import Element
from nodriver.core.tab import Tab as Page
from kleinanzeigen_bot.model.config_model import Config
from kleinanzeigen_bot.utils import loggers
from kleinanzeigen_bot.utils.web_scraping_mixin import By, Is, WebScrapingMixin, _is_admin # noqa: PLC2701
@@ -32,7 +35,13 @@ class ConfigProtocol(Protocol):
browser_args:list[str]
user_data_dir:str | None
def add_extension(self, ext:str) -> None: ...
def add_extension(self, ext:str) -> None:
...
def _nodriver_start_mock() -> Mock:
"""Return the nodriver.start mock with proper typing."""
return cast(Mock, cast(Any, nodriver).start)
class TrulyAwaitableMockPage:
@@ -82,6 +91,7 @@ def web_scraper(mock_browser:AsyncMock, mock_page:TrulyAwaitableMockPage) -> Web
scraper = WebScrapingMixin()
scraper.browser = mock_browser
scraper.page = mock_page # type: ignore[unused-ignore,reportAttributeAccessIssue]
scraper.config = Config.model_validate({"login": {"username": "user@example.com", "password": "secret"}}) # noqa: S105
return scraper
@@ -156,6 +166,21 @@ class TestWebScrapingErrorHandling:
with pytest.raises(Exception, match = "Cannot clear input"):
await web_scraper.web_input(By.ID, "test-id", "test text")
@pytest.mark.asyncio
async def test_web_input_success_returns_element(self, web_scraper:WebScrapingMixin, mock_page:TrulyAwaitableMockPage) -> None:
"""Successful web_input should send keys, wait, and return the element."""
mock_element = AsyncMock(spec = Element)
mock_page.query_selector.return_value = mock_element
mock_sleep = AsyncMock()
cast(Any, web_scraper).web_sleep = mock_sleep
result = await web_scraper.web_input(By.ID, "username", "hello world", timeout = 1)
assert result is mock_element
mock_element.clear_input.assert_awaited_once()
mock_element.send_keys.assert_awaited_once_with("hello world")
mock_sleep.assert_awaited_once()
@pytest.mark.asyncio
async def test_web_open_timeout(self, web_scraper:WebScrapingMixin, mock_browser:AsyncMock) -> None:
"""Test page load timeout in web_open."""
@@ -173,6 +198,19 @@ class TestWebScrapingErrorHandling:
with pytest.raises(TimeoutError, match = "Page did not finish loading within"):
await web_scraper.web_open("https://example.com", timeout = 0.1)
@pytest.mark.asyncio
async def test_web_open_skip_when_url_already_loaded(self, web_scraper:WebScrapingMixin, mock_browser:AsyncMock, mock_page:TrulyAwaitableMockPage) -> None:
"""web_open should short-circuit when the requested URL is already active."""
mock_browser.get.reset_mock()
mock_page.url = "https://example.com"
mock_execute = AsyncMock()
cast(Any, web_scraper).web_execute = mock_execute
await web_scraper.web_open("https://example.com", reload_if_already_open = False)
mock_browser.get.assert_not_awaited()
mock_execute.assert_not_called()
@pytest.mark.asyncio
async def test_web_request_invalid_response(self, web_scraper:WebScrapingMixin, mock_page:TrulyAwaitableMockPage) -> None:
"""Test invalid response handling in web_request."""
@@ -216,6 +254,179 @@ class TestWebScrapingErrorHandling:
with pytest.raises(Exception, match = "Attribute error"):
await web_scraper.web_check(By.ID, "test-id", Is.DISPLAYED)
@pytest.mark.asyncio
async def test_web_find_applies_timeout_multiplier_and_backoff(self, web_scraper:WebScrapingMixin) -> None:
"""Ensure multiplier/backoff logic is honored when timeouts occur."""
assert web_scraper.config is not None
web_scraper.config.timeouts.multiplier = 2.0
web_scraper.config.timeouts.retry_enabled = True
web_scraper.config.timeouts.retry_max_attempts = 2
web_scraper.config.timeouts.retry_backoff_factor = 2.0
recorded:list[tuple[float, bool]] = []
async def fake_web_await(condition:Callable[[], object], *, timeout:float, timeout_error_message:str = "",
apply_multiplier:bool = True) -> Element:
recorded.append((timeout, apply_multiplier))
raise TimeoutError(timeout_error_message or "timeout")
cast(Any, web_scraper).web_await = fake_web_await
with pytest.raises(TimeoutError):
await web_scraper.web_find(By.ID, "test-id", timeout = 0.5)
assert recorded == [(1.0, False), (2.0, False), (4.0, False)]
class TestTimeoutAndRetryHelpers:
"""Test timeout helper utilities in WebScrapingMixin."""
def test_get_timeout_config_prefers_config_timeouts(self, web_scraper:WebScrapingMixin) -> None:
"""_get_timeout_config should return the config-provided timeout model when available."""
custom_config = Config.model_validate({
"login": {"username": "user@example.com", "password": "secret"}, # noqa: S105
"timeouts": {"default": 7.5}
})
web_scraper.config = custom_config
assert web_scraper._get_timeout_config() is custom_config.timeouts
def test_timeout_attempts_respects_retry_switch(self, web_scraper:WebScrapingMixin) -> None:
"""_timeout_attempts should collapse to a single attempt when retries are disabled."""
web_scraper.config.timeouts.retry_enabled = False
assert web_scraper._timeout_attempts() == 1
web_scraper.config.timeouts.retry_enabled = True
web_scraper.config.timeouts.retry_max_attempts = 3
assert web_scraper._timeout_attempts() == 4
@pytest.mark.asyncio
async def test_run_with_timeout_retries_retries_operation(self, web_scraper:WebScrapingMixin) -> None:
"""_run_with_timeout_retries should retry when TimeoutError is raised before succeeding."""
attempts:list[float] = []
async def flaky_operation(timeout:float) -> str:
attempts.append(timeout)
if len(attempts) == 1:
raise TimeoutError("first attempt")
return "done"
web_scraper.config.timeouts.retry_max_attempts = 1
result = await web_scraper._run_with_timeout_retries(flaky_operation, description = "retry-op")
assert result == "done"
assert len(attempts) == 2
@pytest.mark.asyncio
async def test_run_with_timeout_retries_guard_clause(self, web_scraper:WebScrapingMixin) -> None:
"""_run_with_timeout_retries should guard against zero-attempt edge cases."""
async def never_called(timeout:float) -> None:
pytest.fail("operation should not run when attempts are zero")
with patch.object(web_scraper, "_timeout_attempts", return_value = 0), \
pytest.raises(TimeoutError, match = "guarded-op failed without executing operation"):
await web_scraper._run_with_timeout_retries(never_called, description = "guarded-op")
class TestSelectorTimeoutMessages:
"""Ensure selector helpers provide informative timeout messages."""
@pytest.mark.asyncio
@pytest.mark.parametrize(
("selector_type", "selector_value", "expected_message"),
[
(By.TAG_NAME, "section", "No HTML element found of tag <section> within 2.0 seconds."),
(By.CSS_SELECTOR, ".hero", "No HTML element found using CSS selector '.hero' within 2.0 seconds."),
(By.TEXT, "Submit", "No HTML element found containing text 'Submit' within 2.0 seconds."),
(By.XPATH, "//div[@class='hero']", "No HTML element found using XPath '//div[@class='hero']' within 2.0 seconds."),
]
)
async def test_web_find_timeout_suffixes(
self,
web_scraper:WebScrapingMixin,
selector_type:By,
selector_value:str,
expected_message:str
) -> None:
"""web_find should pass descriptive timeout messages for every selector strategy."""
mock_element = AsyncMock(spec = Element)
mock_wait = AsyncMock(return_value = mock_element)
cast(Any, web_scraper).web_await = mock_wait
result = await web_scraper.web_find(selector_type, selector_value, timeout = 2)
assert result is mock_element
call = mock_wait.await_args_list[0]
assert expected_message == call.kwargs["timeout_error_message"]
assert call.kwargs["apply_multiplier"] is False
@pytest.mark.asyncio
@pytest.mark.parametrize(
("selector_type", "selector_value", "expected_message"),
[
(By.CLASS_NAME, "hero", "No HTML elements found with CSS class 'hero' within 1 seconds."),
(By.CSS_SELECTOR, ".card", "No HTML elements found using CSS selector '.card' within 1 seconds."),
(By.TAG_NAME, "article", "No HTML elements found of tag <article> within 1 seconds."),
(By.TEXT, "Listings", "No HTML elements found containing text 'Listings' within 1 seconds."),
(By.XPATH, "//footer", "No HTML elements found using XPath '//footer' within 1 seconds."),
]
)
async def test_web_find_all_once_timeout_suffixes(
self,
web_scraper:WebScrapingMixin,
selector_type:By,
selector_value:str,
expected_message:str
) -> None:
"""_web_find_all_once should surface informative timeout errors for each selector."""
elements = [AsyncMock(spec = Element)]
mock_wait = AsyncMock(return_value = elements)
cast(Any, web_scraper).web_await = mock_wait
result = await web_scraper._web_find_all_once(selector_type, selector_value, 1)
assert result is elements
call = mock_wait.await_args_list[0]
assert expected_message == call.kwargs["timeout_error_message"]
assert call.kwargs["apply_multiplier"] is False
@pytest.mark.asyncio
async def test_web_find_all_delegates_to_retry_helper(self, web_scraper:WebScrapingMixin) -> None:
"""web_find_all should execute via the timeout retry helper."""
elements = [AsyncMock(spec = Element)]
async def fake_retry(operation:Callable[[float], Awaitable[list[Element]]], **kwargs:Any) -> list[Element]:
assert kwargs["description"] == "web_find_all(CLASS_NAME, hero)"
assert kwargs["override"] == 1.5
result = await operation(0.42)
return result
retry_mock = AsyncMock(side_effect = fake_retry)
once_mock = AsyncMock(return_value = elements)
cast(Any, web_scraper)._run_with_timeout_retries = retry_mock
cast(Any, web_scraper)._web_find_all_once = once_mock
result = await web_scraper.web_find_all(By.CLASS_NAME, "hero", timeout = 1.5)
assert result is elements
retry_call = retry_mock.await_args_list[0]
assert retry_call.kwargs["key"] == "default"
assert retry_call.kwargs["override"] == 1.5
once_call = once_mock.await_args_list[0]
assert once_call.args[:2] == (By.CLASS_NAME, "hero")
assert once_call.args[2] == 0.42
@pytest.mark.asyncio
async def test_web_check_unsupported_attribute(self, web_scraper:WebScrapingMixin, mock_page:TrulyAwaitableMockPage) -> None:
"""web_check should raise for unsupported attribute queries."""
mock_element = AsyncMock(spec = Element)
mock_element.attrs = {}
mock_page.query_selector.return_value = mock_element
with pytest.raises(AssertionError, match = "Unsupported attribute"):
await web_scraper.web_check(By.ID, "test-id", cast(Is, object()), timeout = 0.1)
class TestWebScrapingSessionManagement:
"""Test session management edge cases in WebScrapingMixin."""
@@ -295,9 +506,42 @@ class TestWebScrapingSessionManagement:
with patch("psutil.Process") as mock_proc:
mock_proc.return_value.children.return_value = []
scraper.close_browser_session()
stop_mock.assert_called_once()
assert scraper.browser is None
assert scraper.page is None
stop_mock.assert_called_once()
assert scraper.browser is None
assert scraper.page is None
class TestWebScrolling:
"""Test scrolling helpers."""
@pytest.mark.asyncio
async def test_web_scroll_page_down_scrolls_and_returns(self, web_scraper:WebScrapingMixin) -> None:
"""web_scroll_page_down should scroll both directions when requested."""
scripts:list[str] = []
async def exec_side_effect(script:str) -> int | None:
scripts.append(script)
if script == "document.body.scrollHeight":
return 20
return None
cast(Any, web_scraper).web_execute = AsyncMock(side_effect = exec_side_effect)
with patch("kleinanzeigen_bot.utils.web_scraping_mixin.asyncio.sleep", new_callable = AsyncMock) as mock_sleep:
await web_scraper.web_scroll_page_down(scroll_length = 10, scroll_speed = 10, scroll_back_top = True)
assert scripts[0] == "document.body.scrollHeight"
# Expect four scrollTo operations: two down, two up
assert scripts.count("document.body.scrollHeight") == 1
scroll_calls = [script for script in scripts if script.startswith("window.scrollTo")]
assert scroll_calls == [
"window.scrollTo(0, 10)",
"window.scrollTo(0, 20)",
"window.scrollTo(0, 10)",
"window.scrollTo(0, 0)"
]
sleep_durations = [call.args[0] for call in mock_sleep.await_args_list]
assert sleep_durations == [1.0, 1.0, 0.5, 0.5]
@pytest.mark.asyncio
async def test_session_expiration_handling(self, web_scraper:WebScrapingMixin, mock_browser:AsyncMock) -> None:
@@ -468,7 +712,7 @@ class TestWebScrapingBrowserConfiguration:
def add_extension(self, ext:str) -> None:
self._extensions.append(ext) # Use private extensions list
# Mock nodriver.start to return a mock browser # type: ignore[attr-defined]
# Mock nodriver.start to return a mock browser
mock_browser = AsyncMock()
mock_browser.websocket_url = "ws://localhost:9222"
monkeypatch.setattr(nodriver, "start", AsyncMock(return_value = mock_browser))
@@ -557,7 +801,7 @@ class TestWebScrapingBrowserConfiguration:
def add_extension(self, ext:str) -> None:
self.extensions.append(ext)
# Mock nodriver.start to return a mock browser # type: ignore[attr-defined]
# Mock nodriver.start to return a mock browser
mock_browser = AsyncMock()
mock_browser.websocket_url = "ws://localhost:9222"
monkeypatch.setattr(nodriver, "start", AsyncMock(return_value = mock_browser))
@@ -576,7 +820,7 @@ class TestWebScrapingBrowserConfiguration:
await scraper.create_browser_session()
# Verify browser arguments
config = cast(Mock, nodriver.start).call_args[0][0] # type: ignore[attr-defined]
config = _nodriver_start_mock().call_args[0][0]
assert "--custom-arg=value" in config.browser_args
assert "--another-arg" in config.browser_args
assert "--incognito" in config.browser_args
@@ -589,7 +833,7 @@ class TestWebScrapingBrowserConfiguration:
await scraper.create_browser_session()
# Verify Edge-specific arguments
config = cast(Mock, nodriver.start).call_args[0][0] # type: ignore[attr-defined]
config = _nodriver_start_mock().call_args[0][0]
assert "-inprivate" in config.browser_args
assert os.environ.get("MSEDGEDRIVER_TELEMETRY_OPTOUT") == "1"
@@ -620,7 +864,7 @@ class TestWebScrapingBrowserConfiguration:
with zipfile.ZipFile(ext2, "w") as z:
z.writestr("manifest.json", '{"name": "Test Extension 2"}')
# Mock nodriver.start to return a mock browser # type: ignore[attr-defined]
# Mock nodriver.start to return a mock browser
mock_browser = AsyncMock()
mock_browser.websocket_url = "ws://localhost:9222"
monkeypatch.setattr(nodriver, "start", AsyncMock(return_value = mock_browser))
@@ -644,7 +888,7 @@ class TestWebScrapingBrowserConfiguration:
await scraper.create_browser_session()
# Verify extensions were loaded
config = cast(Mock, nodriver.start).call_args[0][0] # type: ignore[attr-defined]
config = _nodriver_start_mock().call_args[0][0]
assert len(config._extensions) == 2
for ext_path in config._extensions:
assert os.path.exists(ext_path)
@@ -713,7 +957,7 @@ class TestWebScrapingBrowserConfiguration:
def add_extension(self, ext:str) -> None:
self._extensions.append(ext)
# Mock nodriver.start to return a mock browser # type: ignore[attr-defined]
# Mock nodriver.start to return a mock browser
mock_browser = AsyncMock()
mock_browser.websocket_url = "ws://localhost:9222"
monkeypatch.setattr(nodriver, "start", AsyncMock(return_value = mock_browser))
@@ -772,7 +1016,7 @@ class TestWebScrapingBrowserConfiguration:
temp_file = tmp_path / "temp_resource"
temp_file.write_text("test")
# Mock nodriver.start to raise an exception # type: ignore[attr-defined]
# Mock nodriver.start to raise an exception
async def mock_start_fail(*args:object, **kwargs:object) -> NoReturn:
if temp_file.exists():
temp_file.unlink()
@@ -801,7 +1045,7 @@ class TestWebScrapingBrowserConfiguration:
assert scraper.browser is None
assert scraper.page is None
# Now patch nodriver.start to return a new mock browser each time # type: ignore[attr-defined]
# Now patch nodriver.start to return a new mock browser each time
mock_browser = make_mock_browser()
mock_page = TrulyAwaitableMockPage()
mock_browser.get = AsyncMock(return_value = mock_page)
@@ -1445,6 +1689,46 @@ class TestWebScrapingDiagnostics:
# Should not raise any exceptions
web_scraper.diagnose_browser_issues()
def test_diagnose_browser_issues_handles_per_process_errors(
self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
) -> None:
"""diagnose_browser_issues should ignore psutil errors raised per process."""
caplog.set_level(logging.INFO)
class FailingProcess:
@property
def info(self) -> dict[str, object]:
raise psutil.AccessDenied(pid = 999)
with patch("os.path.exists", return_value = True), \
patch("os.access", return_value = True), \
patch("psutil.process_iter", return_value = [FailingProcess()]), \
patch("platform.system", return_value = "Linux"), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
patch.object(scraper_with_config, "_diagnose_chrome_version_issues"):
scraper_with_config.browser_config.binary_location = "/usr/bin/chrome"
scraper_with_config.diagnose_browser_issues()
assert "(info) No browser processes currently running" in caplog.text
def test_diagnose_browser_issues_handles_global_psutil_failure(
self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
) -> None:
"""diagnose_browser_issues should log a warning if psutil.process_iter fails entirely."""
caplog.set_level(logging.WARNING)
with patch("os.path.exists", return_value = True), \
patch("os.access", return_value = True), \
patch("psutil.process_iter", side_effect = psutil.Error("boom")), \
patch("platform.system", return_value = "Linux"), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
patch.object(scraper_with_config, "_diagnose_chrome_version_issues"):
scraper_with_config.browser_config.binary_location = "/usr/bin/chrome"
scraper_with_config.diagnose_browser_issues()
assert "(warn) Unable to inspect browser processes:" in caplog.text
@pytest.mark.asyncio
async def test_validate_chrome_version_configuration_port_open_but_api_inaccessible(
self, web_scraper:WebScrapingMixin

View File

@@ -41,8 +41,11 @@ class TestWebScrapingMixinChromeVersionValidation:
# Test validation
await scraper._validate_chrome_version_configuration()
# Verify detection was called correctly
mock_detect.assert_called_once_with("/path/to/chrome")
# Verify detection was called correctly with timeout
assert mock_detect.call_count == 1
args, kwargs = mock_detect.call_args
assert args[0] == "/path/to/chrome"
assert kwargs["timeout"] == pytest.approx(10.0)
# Verify validation passed (no exception raised)
# The validation is now done internally in _validate_chrome_136_configuration
@@ -73,7 +76,10 @@ class TestWebScrapingMixinChromeVersionValidation:
# Test validation should log error but not raise exception due to error handling
await scraper._validate_chrome_version_configuration()
# Verify error was logged
# Verify detection call and logged error
assert mock_detect.call_count == 1
_, kwargs = mock_detect.call_args
assert kwargs["timeout"] == pytest.approx(10.0)
assert "Chrome 136+ configuration validation failed" in caplog.text
assert "Chrome 136+ requires --user-data-dir" in caplog.text
finally:
@@ -104,12 +110,37 @@ class TestWebScrapingMixinChromeVersionValidation:
await scraper._validate_chrome_version_configuration()
# Verify detection was called but no validation
mock_detect.assert_called_once_with("/path/to/chrome")
assert mock_detect.call_count == 1
_, kwargs = mock_detect.call_args
assert kwargs["timeout"] == pytest.approx(10.0)
finally:
# Restore environment
if original_env:
os.environ["PYTEST_CURRENT_TEST"] = original_env
@patch("kleinanzeigen_bot.utils.chrome_version_detector.detect_chrome_version_from_binary")
@patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_remote_debugging")
async def test_validate_chrome_version_logs_remote_detection(
self,
mock_remote:Mock,
mock_binary:Mock,
scraper:WebScrapingMixin,
caplog:pytest.LogCaptureFixture
) -> None:
"""When a remote browser responds, the detected version should be logged."""
mock_remote.return_value = ChromeVersionInfo("136.0.6778.0", 136, "Chrome")
mock_binary.return_value = None
scraper.browser_config.arguments = ["--remote-debugging-port=9222"]
scraper.browser_config.binary_location = "/path/to/chrome"
caplog.set_level("DEBUG")
with patch.dict(os.environ, {}, clear = True), \
patch.object(scraper, "_check_port_with_retry", return_value = True):
await scraper._validate_chrome_version_configuration()
assert "Detected version from existing browser" in caplog.text
mock_remote.assert_called_once()
@patch("kleinanzeigen_bot.utils.chrome_version_detector.detect_chrome_version_from_binary")
async def test_validate_chrome_version_configuration_no_binary_location(
self, mock_detect:Mock, scraper:WebScrapingMixin
@@ -145,7 +176,9 @@ class TestWebScrapingMixinChromeVersionValidation:
await scraper._validate_chrome_version_configuration()
# Verify detection was called
mock_detect.assert_called_once_with("/path/to/chrome")
assert mock_detect.call_count == 1
_, kwargs = mock_detect.call_args
assert kwargs["timeout"] == pytest.approx(10.0)
# Verify debug log message (line 824)
assert "Could not detect browser version, skipping validation" in caplog.text
@@ -201,10 +234,13 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
assert "Chrome 136+ detected - security validation required" in caplog.text
# Verify mocks were called
mock_get_diagnostic.assert_called_once_with(
binary_path = "/path/to/chrome",
remote_port = 9222
)
assert mock_get_diagnostic.call_count == 1
kwargs = mock_get_diagnostic.call_args.kwargs
assert kwargs["binary_path"] == "/path/to/chrome"
assert kwargs["remote_port"] == 9222
assert kwargs["remote_host"] == "127.0.0.1"
assert kwargs["remote_timeout"] > 0
assert kwargs["binary_timeout"] > 0
finally:
# Restore environment
if original_env:
@@ -364,10 +400,12 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
assert "Chrome pre-136 detected - no special security requirements" in caplog.text
# Verify that the diagnostic function was called with correct parameters
mock_get_diagnostic.assert_called_once_with(
binary_path = "/path/to/chrome",
remote_port = None
)
assert mock_get_diagnostic.call_count == 1
kwargs = mock_get_diagnostic.call_args.kwargs
assert kwargs["binary_path"] == "/path/to/chrome"
assert kwargs["remote_port"] is None
assert kwargs["remote_timeout"] > 0
assert kwargs["binary_timeout"] > 0
finally:
# Restore environment
if original_env: