feat: add configurable timeouts (#673)

## ℹ️ Description
- Related issues: #671, #658
- Introduces configurable timeout controls plus retry/backoff handling
for flaky DOM operations.

We often see timeouts which are note reproducible in certain
configurations. I suspect timeout issues based on a combination of
internet speed, browser, os, age of the computer and the weather.

This PR introduces a comprehensive config model to tweak timeouts.

## 📋 Changes Summary
- add TimeoutConfig to the main config/schema and expose timeouts in
README/docs
- wire WebScrapingMixin, extractor, update checker, and browser
diagnostics to honor the configurable timeouts and retries
- update translations/tests to cover the new behaviour and ensure
lint/mypy/pyright pipelines remain green

### ⚙️ Type of Change
- [ ] 🐞 Bug fix (non-breaking change which fixes an issue)
- [x]  New feature (adds new functionality without breaking existing
usage)
- [ ] 💥 Breaking change (changes that might break existing user setups,
scripts, or configurations)

##  Checklist
- [x] I have reviewed my changes to ensure they meet the project's
standards.
- [x] I have tested my changes and ensured that all tests pass (`pdm run
test`).
- [x] I have formatted the code (`pdm run format`).
- [x] I have verified that linting passes (`pdm run lint`).
- [x] I have updated documentation where necessary.


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Centralized, configurable timeout system for web interactions,
detection flows, publishing, and pagination.
* Optional retry with exponential backoff for operations that time out.

* **Improvements**
* Replaced fixed wait times with dynamic timeouts throughout workflows.
  * More informative timeout-related messages and diagnostics.

* **Tests**
* New and expanded test coverage for timeout behavior, pagination,
diagnostics, and retry logic.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Jens
2025-11-13 15:08:52 +01:00
committed by GitHub
parent ac678ed888
commit a3ac27c441
16 changed files with 972 additions and 121 deletions

View File

@@ -41,8 +41,11 @@ class TestWebScrapingMixinChromeVersionValidation:
# Test validation
await scraper._validate_chrome_version_configuration()
# Verify detection was called correctly
mock_detect.assert_called_once_with("/path/to/chrome")
# Verify detection was called correctly with timeout
assert mock_detect.call_count == 1
args, kwargs = mock_detect.call_args
assert args[0] == "/path/to/chrome"
assert kwargs["timeout"] == pytest.approx(10.0)
# Verify validation passed (no exception raised)
# The validation is now done internally in _validate_chrome_136_configuration
@@ -73,7 +76,10 @@ class TestWebScrapingMixinChromeVersionValidation:
# Test validation should log error but not raise exception due to error handling
await scraper._validate_chrome_version_configuration()
# Verify error was logged
# Verify detection call and logged error
assert mock_detect.call_count == 1
_, kwargs = mock_detect.call_args
assert kwargs["timeout"] == pytest.approx(10.0)
assert "Chrome 136+ configuration validation failed" in caplog.text
assert "Chrome 136+ requires --user-data-dir" in caplog.text
finally:
@@ -104,12 +110,37 @@ class TestWebScrapingMixinChromeVersionValidation:
await scraper._validate_chrome_version_configuration()
# Verify detection was called but no validation
mock_detect.assert_called_once_with("/path/to/chrome")
assert mock_detect.call_count == 1
_, kwargs = mock_detect.call_args
assert kwargs["timeout"] == pytest.approx(10.0)
finally:
# Restore environment
if original_env:
os.environ["PYTEST_CURRENT_TEST"] = original_env
@patch("kleinanzeigen_bot.utils.chrome_version_detector.detect_chrome_version_from_binary")
@patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_remote_debugging")
async def test_validate_chrome_version_logs_remote_detection(
self,
mock_remote:Mock,
mock_binary:Mock,
scraper:WebScrapingMixin,
caplog:pytest.LogCaptureFixture
) -> None:
"""When a remote browser responds, the detected version should be logged."""
mock_remote.return_value = ChromeVersionInfo("136.0.6778.0", 136, "Chrome")
mock_binary.return_value = None
scraper.browser_config.arguments = ["--remote-debugging-port=9222"]
scraper.browser_config.binary_location = "/path/to/chrome"
caplog.set_level("DEBUG")
with patch.dict(os.environ, {}, clear = True), \
patch.object(scraper, "_check_port_with_retry", return_value = True):
await scraper._validate_chrome_version_configuration()
assert "Detected version from existing browser" in caplog.text
mock_remote.assert_called_once()
@patch("kleinanzeigen_bot.utils.chrome_version_detector.detect_chrome_version_from_binary")
async def test_validate_chrome_version_configuration_no_binary_location(
self, mock_detect:Mock, scraper:WebScrapingMixin
@@ -145,7 +176,9 @@ class TestWebScrapingMixinChromeVersionValidation:
await scraper._validate_chrome_version_configuration()
# Verify detection was called
mock_detect.assert_called_once_with("/path/to/chrome")
assert mock_detect.call_count == 1
_, kwargs = mock_detect.call_args
assert kwargs["timeout"] == pytest.approx(10.0)
# Verify debug log message (line 824)
assert "Could not detect browser version, skipping validation" in caplog.text
@@ -201,10 +234,13 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
assert "Chrome 136+ detected - security validation required" in caplog.text
# Verify mocks were called
mock_get_diagnostic.assert_called_once_with(
binary_path = "/path/to/chrome",
remote_port = 9222
)
assert mock_get_diagnostic.call_count == 1
kwargs = mock_get_diagnostic.call_args.kwargs
assert kwargs["binary_path"] == "/path/to/chrome"
assert kwargs["remote_port"] == 9222
assert kwargs["remote_host"] == "127.0.0.1"
assert kwargs["remote_timeout"] > 0
assert kwargs["binary_timeout"] > 0
finally:
# Restore environment
if original_env:
@@ -364,10 +400,12 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
assert "Chrome pre-136 detected - no special security requirements" in caplog.text
# Verify that the diagnostic function was called with correct parameters
mock_get_diagnostic.assert_called_once_with(
binary_path = "/path/to/chrome",
remote_port = None
)
assert mock_get_diagnostic.call_count == 1
kwargs = mock_get_diagnostic.call_args.kwargs
assert kwargs["binary_path"] == "/path/to/chrome"
assert kwargs["remote_port"] is None
assert kwargs["remote_timeout"] > 0
assert kwargs["binary_timeout"] > 0
finally:
# Restore environment
if original_env: