feat: add configurable timeouts (#673)

## ℹ️ Description
- Related issues: #671, #658
- Introduces configurable timeout controls plus retry/backoff handling
for flaky DOM operations.

We often see timeouts which are note reproducible in certain
configurations. I suspect timeout issues based on a combination of
internet speed, browser, os, age of the computer and the weather.

This PR introduces a comprehensive config model to tweak timeouts.

## 📋 Changes Summary
- add TimeoutConfig to the main config/schema and expose timeouts in
README/docs
- wire WebScrapingMixin, extractor, update checker, and browser
diagnostics to honor the configurable timeouts and retries
- update translations/tests to cover the new behaviour and ensure
lint/mypy/pyright pipelines remain green

### ⚙️ Type of Change
- [ ] 🐞 Bug fix (non-breaking change which fixes an issue)
- [x]  New feature (adds new functionality without breaking existing
usage)
- [ ] 💥 Breaking change (changes that might break existing user setups,
scripts, or configurations)

##  Checklist
- [x] I have reviewed my changes to ensure they meet the project's
standards.
- [x] I have tested my changes and ensured that all tests pass (`pdm run
test`).
- [x] I have formatted the code (`pdm run format`).
- [x] I have verified that linting passes (`pdm run lint`).
- [x] I have updated documentation where necessary.


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Centralized, configurable timeout system for web interactions,
detection flows, publishing, and pagination.
* Optional retry with exponential backoff for operations that time out.

* **Improvements**
* Replaced fixed wait times with dynamic timeouts throughout workflows.
  * More informative timeout-related messages and diagnostics.

* **Tests**
* New and expanded test coverage for timeout behavior, pagination,
diagnostics, and retry logic.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Jens
2025-11-13 15:08:52 +01:00
committed by GitHub
parent ac678ed888
commit a3ac27c441
16 changed files with 972 additions and 121 deletions

View File

@@ -78,23 +78,25 @@ def _normalize_browser_name(browser_name:str) -> str:
return "Chrome"
def detect_chrome_version_from_binary(binary_path:str) -> ChromeVersionInfo | None:
def detect_chrome_version_from_binary(binary_path:str, *, timeout:float | None = None) -> ChromeVersionInfo | None:
"""
Detect Chrome version by running the browser binary.
Args:
binary_path: Path to the Chrome binary
timeout: Optional timeout (seconds) for the subprocess call
Returns:
ChromeVersionInfo if successful, None if detection fails
"""
effective_timeout = timeout if timeout is not None else 10.0
try:
# Run browser with --version flag
result = subprocess.run( # noqa: S603
[binary_path, "--version"],
check = False, capture_output = True,
text = True,
timeout = 10
timeout = effective_timeout
)
if result.returncode != 0:
@@ -114,28 +116,30 @@ def detect_chrome_version_from_binary(binary_path:str) -> ChromeVersionInfo | No
return ChromeVersionInfo(version_string, major_version, browser_name)
except subprocess.TimeoutExpired:
LOG.debug("Browser version command timed out")
LOG.debug("Browser version command timed out after %.1fs", effective_timeout)
return None
except (subprocess.SubprocessError, ValueError) as e:
LOG.debug("Failed to detect browser version: %s", str(e))
return None
def detect_chrome_version_from_remote_debugging(host:str = "127.0.0.1", port:int = 9222) -> ChromeVersionInfo | None:
def detect_chrome_version_from_remote_debugging(host:str = "127.0.0.1", port:int = 9222, *, timeout:float | None = None) -> ChromeVersionInfo | None:
"""
Detect Chrome version from remote debugging API.
Args:
host: Remote debugging host
port: Remote debugging port
timeout: Optional timeout (seconds) for the HTTP request
Returns:
ChromeVersionInfo if successful, None if detection fails
"""
effective_timeout = timeout if timeout is not None else 5.0
try:
# Query the remote debugging API
url = f"http://{host}:{port}/json/version"
response = urllib.request.urlopen(url, timeout = 5) # noqa: S310
response = urllib.request.urlopen(url, timeout = effective_timeout) # noqa: S310
version_data = json.loads(response.read().decode())
# Extract version information
@@ -200,7 +204,10 @@ def validate_chrome_136_configuration(browser_arguments:list[str], user_data_dir
def get_chrome_version_diagnostic_info(
binary_path:str | None = None,
remote_host:str = "127.0.0.1",
remote_port:int | None = None
remote_port:int | None = None,
*,
remote_timeout:float | None = None,
binary_timeout:float | None = None
) -> dict[str, Any]:
"""
Get comprehensive Chrome version diagnostic information.
@@ -209,6 +216,8 @@ def get_chrome_version_diagnostic_info(
binary_path: Path to Chrome binary (optional)
remote_host: Remote debugging host
remote_port: Remote debugging port (optional)
remote_timeout: Timeout for remote debugging detection
binary_timeout: Timeout for binary detection
Returns:
Dictionary with diagnostic information
@@ -223,7 +232,7 @@ def get_chrome_version_diagnostic_info(
# Try binary detection
if binary_path:
version_info = detect_chrome_version_from_binary(binary_path)
version_info = detect_chrome_version_from_binary(binary_path, timeout = binary_timeout)
if version_info:
diagnostic_info["binary_detection"] = {
"version_string": version_info.version_string,
@@ -235,7 +244,7 @@ def get_chrome_version_diagnostic_info(
# Try remote debugging detection
if remote_port:
version_info = detect_chrome_version_from_remote_debugging(remote_host, remote_port)
version_info = detect_chrome_version_from_remote_debugging(remote_host, remote_port, timeout = remote_timeout)
if version_info:
diagnostic_info["remote_detection"] = {
"version_string": version_info.version_string,