feat: add configurable timeouts (#673)

## ℹ️ Description - Related issues: #671, #658 - Introduces configurable timeout controls plus retry/backoff handling for flaky DOM operations. We often see timeouts which are note reproducible in certain configurations. I suspect timeout issues based on a combination of internet speed, browser, os, age of the computer and the weather. This PR introduces a comprehensive config model to tweak timeouts. ## 📋 Changes Summary - add TimeoutConfig to the main config/schema and expose timeouts in README/docs - wire WebScrapingMixin, extractor, update checker, and browser diagnostics to honor the configurable timeouts and retries - update translations/tests to cover the new behaviour and ensure lint/mypy/pyright pipelines remain green ### ⚙️ Type of Change - [ ] 🐞 Bug fix (non-breaking change which fixes an issue) - [x] ✨ New feature (adds new functionality without breaking existing usage) - [ ] 💥 Breaking change (changes that might break existing user setups, scripts, or configurations) ## ✅ Checklist - [x] I have reviewed my changes to ensure they meet the project's standards. - [x] I have tested my changes and ensured that all tests pass (`pdm run test`). - [x] I have formatted the code (`pdm run format`). - [x] I have verified that linting passes (`pdm run lint`). - [x] I have updated documentation where necessary.  ## Summary by CodeRabbit * **New Features** * Centralized, configurable timeout system for web interactions, detection flows, publishing, and pagination. * Optional retry with exponential backoff for operations that time out. * **Improvements** * Replaced fixed wait times with dynamic timeouts throughout workflows. * More informative timeout-related messages and diagnostics. * **Tests** * New and expanded test coverage for timeout behavior, pagination, diagnostics, and retry logic.
2026-03-12 10:31:50 +01:00 · 2025-11-13 15:08:52 +01:00
parent ac678ed888
commit a3ac27c441
16 changed files with 972 additions and 121 deletions
--- a/tests/unit/test_web_scraping_mixin_chrome_version.py
+++ b/tests/unit/test_web_scraping_mixin_chrome_version.py
@@ -41,8 +41,11 @@ class TestWebScrapingMixinChromeVersionValidation:
            # Test validation
            await scraper._validate_chrome_version_configuration()

-            # Verify detection was called correctly
-            mock_detect.assert_called_once_with("/path/to/chrome")
+            # Verify detection was called correctly with timeout
+            assert mock_detect.call_count == 1
+            args, kwargs = mock_detect.call_args
+            assert args[0] == "/path/to/chrome"
+            assert kwargs["timeout"] == pytest.approx(10.0)

            # Verify validation passed (no exception raised)
            # The validation is now done internally in _validate_chrome_136_configuration
@@ -73,7 +76,10 @@ class TestWebScrapingMixinChromeVersionValidation:
            # Test validation should log error but not raise exception due to error handling
            await scraper._validate_chrome_version_configuration()

-            # Verify error was logged
+            # Verify detection call and logged error
+            assert mock_detect.call_count == 1
+            _, kwargs = mock_detect.call_args
+            assert kwargs["timeout"] == pytest.approx(10.0)
            assert "Chrome 136+ configuration validation failed" in caplog.text
            assert "Chrome 136+ requires --user-data-dir" in caplog.text
        finally:
@@ -104,12 +110,37 @@ class TestWebScrapingMixinChromeVersionValidation:
            await scraper._validate_chrome_version_configuration()

            # Verify detection was called but no validation
-            mock_detect.assert_called_once_with("/path/to/chrome")
+            assert mock_detect.call_count == 1
+            _, kwargs = mock_detect.call_args
+            assert kwargs["timeout"] == pytest.approx(10.0)
        finally:
            # Restore environment
            if original_env:
                os.environ["PYTEST_CURRENT_TEST"] = original_env

+    @patch("kleinanzeigen_bot.utils.chrome_version_detector.detect_chrome_version_from_binary")
+    @patch("kleinanzeigen_bot.utils.web_scraping_mixin.detect_chrome_version_from_remote_debugging")
+    async def test_validate_chrome_version_logs_remote_detection(
+        self,
+        mock_remote:Mock,
+        mock_binary:Mock,
+        scraper:WebScrapingMixin,
+        caplog:pytest.LogCaptureFixture
+    ) -> None:
+        """When a remote browser responds, the detected version should be logged."""
+        mock_remote.return_value = ChromeVersionInfo("136.0.6778.0", 136, "Chrome")
+        mock_binary.return_value = None
+        scraper.browser_config.arguments = ["--remote-debugging-port=9222"]
+        scraper.browser_config.binary_location = "/path/to/chrome"
+        caplog.set_level("DEBUG")
+
+        with patch.dict(os.environ, {}, clear = True), \
+                patch.object(scraper, "_check_port_with_retry", return_value = True):
+            await scraper._validate_chrome_version_configuration()
+
+        assert "Detected version from existing browser" in caplog.text
+        mock_remote.assert_called_once()
+
    @patch("kleinanzeigen_bot.utils.chrome_version_detector.detect_chrome_version_from_binary")
    async def test_validate_chrome_version_configuration_no_binary_location(
        self, mock_detect:Mock, scraper:WebScrapingMixin
@@ -145,7 +176,9 @@ class TestWebScrapingMixinChromeVersionValidation:
            await scraper._validate_chrome_version_configuration()

            # Verify detection was called
-            mock_detect.assert_called_once_with("/path/to/chrome")
+            assert mock_detect.call_count == 1
+            _, kwargs = mock_detect.call_args
+            assert kwargs["timeout"] == pytest.approx(10.0)

            # Verify debug log message (line 824)
            assert "Could not detect browser version, skipping validation" in caplog.text
@@ -201,10 +234,13 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
            assert "Chrome 136+ detected - security validation required" in caplog.text

            # Verify mocks were called
-            mock_get_diagnostic.assert_called_once_with(
-                binary_path = "/path/to/chrome",
-                remote_port = 9222
-            )
+            assert mock_get_diagnostic.call_count == 1
+            kwargs = mock_get_diagnostic.call_args.kwargs
+            assert kwargs["binary_path"] == "/path/to/chrome"
+            assert kwargs["remote_port"] == 9222
+            assert kwargs["remote_host"] == "127.0.0.1"
+            assert kwargs["remote_timeout"] > 0
+            assert kwargs["binary_timeout"] > 0
        finally:
            # Restore environment
            if original_env:
@@ -364,10 +400,12 @@ class TestWebScrapingMixinChromeVersionDiagnostics:
            assert "Chrome pre-136 detected - no special security requirements" in caplog.text

            # Verify that the diagnostic function was called with correct parameters
-            mock_get_diagnostic.assert_called_once_with(
-                binary_path = "/path/to/chrome",
-                remote_port = None
-            )
+            assert mock_get_diagnostic.call_count == 1
+            kwargs = mock_get_diagnostic.call_args.kwargs
+            assert kwargs["binary_path"] == "/path/to/chrome"
+            assert kwargs["remote_port"] is None
+            assert kwargs["remote_timeout"] > 0
+            assert kwargs["binary_timeout"] > 0
        finally:
            # Restore environment
            if original_env: