mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
feat: add configurable timeouts (#673)
## ℹ️ Description - Related issues: #671, #658 - Introduces configurable timeout controls plus retry/backoff handling for flaky DOM operations. We often see timeouts which are note reproducible in certain configurations. I suspect timeout issues based on a combination of internet speed, browser, os, age of the computer and the weather. This PR introduces a comprehensive config model to tweak timeouts. ## 📋 Changes Summary - add TimeoutConfig to the main config/schema and expose timeouts in README/docs - wire WebScrapingMixin, extractor, update checker, and browser diagnostics to honor the configurable timeouts and retries - update translations/tests to cover the new behaviour and ensure lint/mypy/pyright pipelines remain green ### ⚙️ Type of Change - [ ] 🐞 Bug fix (non-breaking change which fixes an issue) - [x] ✨ New feature (adds new functionality without breaking existing usage) - [ ] 💥 Breaking change (changes that might break existing user setups, scripts, or configurations) ## ✅ Checklist - [x] I have reviewed my changes to ensure they meet the project's standards. - [x] I have tested my changes and ensured that all tests pass (`pdm run test`). - [x] I have formatted the code (`pdm run format`). - [x] I have verified that linting passes (`pdm run lint`). - [x] I have updated documentation where necessary. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Centralized, configurable timeout system for web interactions, detection flows, publishing, and pagination. * Optional retry with exponential backoff for operations that time out. * **Improvements** * Replaced fixed wait times with dynamic timeouts throughout workflows. * More informative timeout-related messages and diagnostics. * **Tests** * New and expanded test coverage for timeout behavior, pagination, diagnostics, and retry logic. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -78,23 +78,25 @@ def _normalize_browser_name(browser_name:str) -> str:
|
||||
return "Chrome"
|
||||
|
||||
|
||||
def detect_chrome_version_from_binary(binary_path:str) -> ChromeVersionInfo | None:
|
||||
def detect_chrome_version_from_binary(binary_path:str, *, timeout:float | None = None) -> ChromeVersionInfo | None:
|
||||
"""
|
||||
Detect Chrome version by running the browser binary.
|
||||
|
||||
Args:
|
||||
binary_path: Path to the Chrome binary
|
||||
timeout: Optional timeout (seconds) for the subprocess call
|
||||
|
||||
Returns:
|
||||
ChromeVersionInfo if successful, None if detection fails
|
||||
"""
|
||||
effective_timeout = timeout if timeout is not None else 10.0
|
||||
try:
|
||||
# Run browser with --version flag
|
||||
result = subprocess.run( # noqa: S603
|
||||
[binary_path, "--version"],
|
||||
check = False, capture_output = True,
|
||||
text = True,
|
||||
timeout = 10
|
||||
timeout = effective_timeout
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
@@ -114,28 +116,30 @@ def detect_chrome_version_from_binary(binary_path:str) -> ChromeVersionInfo | No
|
||||
return ChromeVersionInfo(version_string, major_version, browser_name)
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
LOG.debug("Browser version command timed out")
|
||||
LOG.debug("Browser version command timed out after %.1fs", effective_timeout)
|
||||
return None
|
||||
except (subprocess.SubprocessError, ValueError) as e:
|
||||
LOG.debug("Failed to detect browser version: %s", str(e))
|
||||
return None
|
||||
|
||||
|
||||
def detect_chrome_version_from_remote_debugging(host:str = "127.0.0.1", port:int = 9222) -> ChromeVersionInfo | None:
|
||||
def detect_chrome_version_from_remote_debugging(host:str = "127.0.0.1", port:int = 9222, *, timeout:float | None = None) -> ChromeVersionInfo | None:
|
||||
"""
|
||||
Detect Chrome version from remote debugging API.
|
||||
|
||||
Args:
|
||||
host: Remote debugging host
|
||||
port: Remote debugging port
|
||||
timeout: Optional timeout (seconds) for the HTTP request
|
||||
|
||||
Returns:
|
||||
ChromeVersionInfo if successful, None if detection fails
|
||||
"""
|
||||
effective_timeout = timeout if timeout is not None else 5.0
|
||||
try:
|
||||
# Query the remote debugging API
|
||||
url = f"http://{host}:{port}/json/version"
|
||||
response = urllib.request.urlopen(url, timeout = 5) # noqa: S310
|
||||
response = urllib.request.urlopen(url, timeout = effective_timeout) # noqa: S310
|
||||
version_data = json.loads(response.read().decode())
|
||||
|
||||
# Extract version information
|
||||
@@ -200,7 +204,10 @@ def validate_chrome_136_configuration(browser_arguments:list[str], user_data_dir
|
||||
def get_chrome_version_diagnostic_info(
|
||||
binary_path:str | None = None,
|
||||
remote_host:str = "127.0.0.1",
|
||||
remote_port:int | None = None
|
||||
remote_port:int | None = None,
|
||||
*,
|
||||
remote_timeout:float | None = None,
|
||||
binary_timeout:float | None = None
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Get comprehensive Chrome version diagnostic information.
|
||||
@@ -209,6 +216,8 @@ def get_chrome_version_diagnostic_info(
|
||||
binary_path: Path to Chrome binary (optional)
|
||||
remote_host: Remote debugging host
|
||||
remote_port: Remote debugging port (optional)
|
||||
remote_timeout: Timeout for remote debugging detection
|
||||
binary_timeout: Timeout for binary detection
|
||||
|
||||
Returns:
|
||||
Dictionary with diagnostic information
|
||||
@@ -223,7 +232,7 @@ def get_chrome_version_diagnostic_info(
|
||||
|
||||
# Try binary detection
|
||||
if binary_path:
|
||||
version_info = detect_chrome_version_from_binary(binary_path)
|
||||
version_info = detect_chrome_version_from_binary(binary_path, timeout = binary_timeout)
|
||||
if version_info:
|
||||
diagnostic_info["binary_detection"] = {
|
||||
"version_string": version_info.version_string,
|
||||
@@ -235,7 +244,7 @@ def get_chrome_version_diagnostic_info(
|
||||
|
||||
# Try remote debugging detection
|
||||
if remote_port:
|
||||
version_info = detect_chrome_version_from_remote_debugging(remote_host, remote_port)
|
||||
version_info = detect_chrome_version_from_remote_debugging(remote_host, remote_port, timeout = remote_timeout)
|
||||
if version_info:
|
||||
diagnostic_info["remote_detection"] = {
|
||||
"version_string": version_info.version_string,
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||
import asyncio, enum, inspect, json, os, platform, secrets, shutil, subprocess, urllib.request # isort: skip # noqa: S404
|
||||
from collections.abc import Callable, Coroutine, Iterable
|
||||
from collections.abc import Awaitable, Callable, Coroutine, Iterable
|
||||
from gettext import gettext as _
|
||||
from typing import Any, Final, cast
|
||||
from typing import Any, Final, Optional, cast
|
||||
|
||||
try:
|
||||
from typing import Never # type: ignore[attr-defined,unused-ignore] # mypy
|
||||
@@ -15,10 +15,13 @@ import nodriver, psutil # isort: skip
|
||||
from typing import TYPE_CHECKING, TypeGuard
|
||||
|
||||
from nodriver.core.browser import Browser
|
||||
from nodriver.core.config import Config
|
||||
from nodriver.core.config import Config as NodriverConfig
|
||||
from nodriver.core.element import Element
|
||||
from nodriver.core.tab import Tab as Page
|
||||
|
||||
from kleinanzeigen_bot.model.config_model import Config as BotConfig
|
||||
from kleinanzeigen_bot.model.config_model import TimeoutConfig
|
||||
|
||||
from . import loggers, net
|
||||
from .chrome_version_detector import (
|
||||
ChromeVersionInfo,
|
||||
@@ -32,6 +35,7 @@ from .misc import T, ensure
|
||||
if TYPE_CHECKING:
|
||||
from nodriver.cdp.runtime import RemoteObject
|
||||
|
||||
|
||||
# Constants for RemoteObject conversion
|
||||
_KEY_VALUE_PAIR_SIZE = 2
|
||||
|
||||
@@ -102,6 +106,69 @@ class WebScrapingMixin:
|
||||
self.browser_config:Final[BrowserConfig] = BrowserConfig()
|
||||
self.browser:Browser = None # pyright: ignore[reportAttributeAccessIssue]
|
||||
self.page:Page = None # pyright: ignore[reportAttributeAccessIssue]
|
||||
self._default_timeout_config:TimeoutConfig | None = None
|
||||
self.config:BotConfig = cast(BotConfig, None)
|
||||
|
||||
def _get_timeout_config(self) -> TimeoutConfig:
|
||||
config = getattr(self, "config", None)
|
||||
timeouts:TimeoutConfig | None = None
|
||||
if config is not None:
|
||||
timeouts = cast(Optional[TimeoutConfig], getattr(config, "timeouts", None))
|
||||
if timeouts is not None:
|
||||
return timeouts
|
||||
|
||||
if self._default_timeout_config is None:
|
||||
self._default_timeout_config = TimeoutConfig()
|
||||
return self._default_timeout_config
|
||||
|
||||
def _timeout(self, key:str = "default", override:float | None = None) -> float:
|
||||
"""
|
||||
Return the base timeout (seconds) for a given key without applying multipliers.
|
||||
"""
|
||||
return self._get_timeout_config().resolve(key, override)
|
||||
|
||||
def _effective_timeout(self, key:str = "default", override:float | None = None, *, attempt:int = 0) -> float:
|
||||
"""
|
||||
Return the effective timeout (seconds) with multiplier/backoff applied.
|
||||
"""
|
||||
return self._get_timeout_config().effective(key, override, attempt = attempt)
|
||||
|
||||
def _timeout_attempts(self) -> int:
|
||||
cfg = self._get_timeout_config()
|
||||
if not cfg.retry_enabled:
|
||||
return 1
|
||||
# Always perform the initial attempt plus the configured number of retries.
|
||||
return 1 + cfg.retry_max_attempts
|
||||
|
||||
async def _run_with_timeout_retries(
|
||||
self,
|
||||
operation:Callable[[float], Awaitable[T]],
|
||||
*,
|
||||
description:str,
|
||||
key:str = "default",
|
||||
override:float | None = None
|
||||
) -> T:
|
||||
"""
|
||||
Execute an async callable with retry/backoff handling for TimeoutError.
|
||||
"""
|
||||
attempts = self._timeout_attempts()
|
||||
|
||||
for attempt in range(attempts):
|
||||
effective_timeout = self._effective_timeout(key, override, attempt = attempt)
|
||||
try:
|
||||
return await operation(effective_timeout)
|
||||
except TimeoutError:
|
||||
if attempt >= attempts - 1:
|
||||
raise
|
||||
LOG.debug(
|
||||
"Retrying %s after TimeoutError (attempt %d/%d, timeout %.1fs)",
|
||||
description,
|
||||
attempt + 1,
|
||||
attempts,
|
||||
effective_timeout
|
||||
)
|
||||
|
||||
raise TimeoutError(f"{description} failed without executing operation")
|
||||
|
||||
async def create_browser_session(self) -> None:
|
||||
LOG.info("Creating Browser session...")
|
||||
@@ -137,7 +204,7 @@ class WebScrapingMixin:
|
||||
f"Make sure the browser is running and the port is not blocked by firewall.")
|
||||
|
||||
try:
|
||||
cfg = Config(
|
||||
cfg = NodriverConfig(
|
||||
browser_executable_path = self.browser_config.binary_location # actually not necessary but nodriver fails without
|
||||
)
|
||||
cfg.host = remote_host
|
||||
@@ -207,7 +274,7 @@ class WebScrapingMixin:
|
||||
if self.browser_config.user_data_dir:
|
||||
LOG.info(" -> Browser user data dir: %s", self.browser_config.user_data_dir)
|
||||
|
||||
cfg = Config(
|
||||
cfg = NodriverConfig(
|
||||
headless = False,
|
||||
browser_executable_path = self.browser_config.binary_location,
|
||||
browser_args = browser_args,
|
||||
@@ -355,7 +422,8 @@ class WebScrapingMixin:
|
||||
LOG.info("(ok) Remote debugging port is open")
|
||||
# Try to get more information about the debugging endpoint
|
||||
try:
|
||||
response = urllib.request.urlopen(f"http://127.0.0.1:{remote_port}/json/version", timeout = 2)
|
||||
probe_timeout = self._effective_timeout("chrome_remote_probe")
|
||||
response = urllib.request.urlopen(f"http://127.0.0.1:{remote_port}/json/version", timeout = probe_timeout)
|
||||
version_info = json.loads(response.read().decode())
|
||||
LOG.info("(ok) Remote debugging API accessible - Browser: %s", version_info.get("Browser", "Unknown"))
|
||||
except Exception as e:
|
||||
@@ -378,30 +446,34 @@ class WebScrapingMixin:
|
||||
except (AssertionError, TypeError):
|
||||
target_browser_name = ""
|
||||
|
||||
for proc in psutil.process_iter(["pid", "name", "cmdline"]):
|
||||
try:
|
||||
proc_name = proc.info["name"] or ""
|
||||
cmdline = proc.info["cmdline"] or []
|
||||
try:
|
||||
for proc in psutil.process_iter(["pid", "name", "cmdline"]):
|
||||
try:
|
||||
proc_name = proc.info["name"] or ""
|
||||
cmdline = proc.info["cmdline"] or []
|
||||
|
||||
# Check if this is a browser process relevant to our diagnostics
|
||||
is_relevant_browser = False
|
||||
# Check if this is a browser process relevant to our diagnostics
|
||||
is_relevant_browser = False
|
||||
|
||||
# Is this the target browser?
|
||||
is_target_browser = target_browser_name and target_browser_name in proc_name.lower()
|
||||
# Is this the target browser?
|
||||
is_target_browser = target_browser_name and target_browser_name in proc_name.lower()
|
||||
|
||||
# Does it have remote debugging?
|
||||
has_remote_debugging = cmdline and any(arg.startswith("--remote-debugging-port=") for arg in cmdline)
|
||||
# Does it have remote debugging?
|
||||
has_remote_debugging = cmdline and any(arg.startswith("--remote-debugging-port=") for arg in cmdline)
|
||||
|
||||
# Detect target browser processes for diagnostics
|
||||
if is_target_browser:
|
||||
is_relevant_browser = True
|
||||
# Add debugging status to the process info for better diagnostics
|
||||
proc.info["has_remote_debugging"] = has_remote_debugging
|
||||
# Detect target browser processes for diagnostics
|
||||
if is_target_browser:
|
||||
is_relevant_browser = True
|
||||
# Add debugging status to the process info for better diagnostics
|
||||
proc.info["has_remote_debugging"] = has_remote_debugging
|
||||
|
||||
if is_relevant_browser:
|
||||
browser_processes.append(proc.info)
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
pass
|
||||
if is_relevant_browser:
|
||||
browser_processes.append(proc.info)
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
pass
|
||||
except (psutil.Error, PermissionError) as exc:
|
||||
LOG.warning("(warn) Unable to inspect browser processes: %s", exc)
|
||||
browser_processes = []
|
||||
|
||||
if browser_processes:
|
||||
LOG.info("(info) Found %d browser processes running", len(browser_processes))
|
||||
@@ -486,15 +558,17 @@ class WebScrapingMixin:
|
||||
raise AssertionError(_("Installed browser could not be detected"))
|
||||
|
||||
async def web_await(self, condition:Callable[[], T | Never | Coroutine[Any, Any, T | Never]], *,
|
||||
timeout:int | float = 5, timeout_error_message:str = "") -> T:
|
||||
timeout:int | float | None = None, timeout_error_message:str = "", apply_multiplier:bool = True) -> T:
|
||||
"""
|
||||
Blocks/waits until the given condition is met.
|
||||
|
||||
:param timeout: timeout in seconds
|
||||
:param timeout: timeout in seconds (base value, multiplier applied unless disabled)
|
||||
:raises TimeoutError: if element could not be found within time
|
||||
"""
|
||||
loop = asyncio.get_running_loop()
|
||||
start_at = loop.time()
|
||||
base_timeout = timeout if timeout is not None else self._timeout()
|
||||
effective_timeout = self._effective_timeout(override = base_timeout) if apply_multiplier else base_timeout
|
||||
|
||||
while True:
|
||||
await self.page
|
||||
@@ -506,13 +580,13 @@ class WebScrapingMixin:
|
||||
return result
|
||||
except Exception as ex1:
|
||||
ex = ex1
|
||||
if loop.time() - start_at > timeout:
|
||||
if loop.time() - start_at > effective_timeout:
|
||||
if ex:
|
||||
raise ex
|
||||
raise TimeoutError(timeout_error_message or f"Condition not met within {timeout} seconds")
|
||||
raise TimeoutError(timeout_error_message or f"Condition not met within {effective_timeout} seconds")
|
||||
await self.page.sleep(0.5)
|
||||
|
||||
async def web_check(self, selector_type:By, selector_value:str, attr:Is, *, timeout:int | float = 5) -> bool:
|
||||
async def web_check(self, selector_type:By, selector_value:str, attr:Is, *, timeout:int | float | None = None) -> bool:
|
||||
"""
|
||||
Locates an HTML element and returns a state.
|
||||
|
||||
@@ -559,7 +633,7 @@ class WebScrapingMixin:
|
||||
"""))
|
||||
raise AssertionError(_("Unsupported attribute: %s") % attr)
|
||||
|
||||
async def web_click(self, selector_type:By, selector_value:str, *, timeout:int | float = 5) -> Element:
|
||||
async def web_click(self, selector_type:By, selector_value:str, *, timeout:int | float | None = None) -> Element:
|
||||
"""
|
||||
Locates an HTML element by ID.
|
||||
|
||||
@@ -652,91 +726,130 @@ class WebScrapingMixin:
|
||||
# Return primitive values as-is
|
||||
return data
|
||||
|
||||
async def web_find(self, selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float = 5) -> Element:
|
||||
async def web_find(self, selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float | None = None) -> Element:
|
||||
"""
|
||||
Locates an HTML element by the given selector type and value.
|
||||
|
||||
:param timeout: timeout in seconds
|
||||
:param timeout: timeout in seconds (base value before multiplier/backoff)
|
||||
:raises TimeoutError: if element could not be found within time
|
||||
"""
|
||||
|
||||
async def attempt(effective_timeout:float) -> Element:
|
||||
return await self._web_find_once(selector_type, selector_value, effective_timeout, parent = parent)
|
||||
|
||||
return await self._run_with_timeout_retries(
|
||||
attempt,
|
||||
description = f"web_find({selector_type.name}, {selector_value})",
|
||||
key = "default",
|
||||
override = timeout
|
||||
)
|
||||
|
||||
async def web_find_all(self, selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float | None = None) -> list[Element]:
|
||||
"""
|
||||
Locates multiple HTML elements by the given selector type and value.
|
||||
|
||||
:param timeout: timeout in seconds (base value before multiplier/backoff)
|
||||
:raises TimeoutError: if element could not be found within time
|
||||
"""
|
||||
|
||||
async def attempt(effective_timeout:float) -> list[Element]:
|
||||
return await self._web_find_all_once(selector_type, selector_value, effective_timeout, parent = parent)
|
||||
|
||||
return await self._run_with_timeout_retries(
|
||||
attempt,
|
||||
description = f"web_find_all({selector_type.name}, {selector_value})",
|
||||
key = "default",
|
||||
override = timeout
|
||||
)
|
||||
|
||||
async def _web_find_once(self, selector_type:By, selector_value:str, timeout:float, *, parent:Element | None = None) -> Element:
|
||||
timeout_suffix = f" within {timeout} seconds."
|
||||
|
||||
match selector_type:
|
||||
case By.ID:
|
||||
escaped_id = selector_value.translate(METACHAR_ESCAPER)
|
||||
return await self.web_await(
|
||||
lambda: self.page.query_selector(f"#{escaped_id}", parent),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML element found with ID '{selector_value}' within {timeout} seconds.")
|
||||
timeout_error_message = f"No HTML element found with ID '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
case By.CLASS_NAME:
|
||||
escaped_classname = selector_value.translate(METACHAR_ESCAPER)
|
||||
return await self.web_await(
|
||||
lambda: self.page.query_selector(f".{escaped_classname}", parent),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML element found with CSS class '{selector_value}' within {timeout} seconds.")
|
||||
timeout_error_message = f"No HTML element found with CSS class '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
case By.TAG_NAME:
|
||||
return await self.web_await(
|
||||
lambda: self.page.query_selector(selector_value, parent),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML element found of tag <{selector_value}> within {timeout} seconds.")
|
||||
timeout_error_message = f"No HTML element found of tag <{selector_value}>{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
case By.CSS_SELECTOR:
|
||||
return await self.web_await(
|
||||
lambda: self.page.query_selector(selector_value, parent),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML element found using CSS selector '{selector_value}' within {timeout} seconds.")
|
||||
timeout_error_message = f"No HTML element found using CSS selector '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
case By.TEXT:
|
||||
ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
||||
return await self.web_await(
|
||||
lambda: self.page.find_element_by_text(selector_value, best_match = True),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML element found containing text '{selector_value}' within {timeout} seconds.")
|
||||
timeout_error_message = f"No HTML element found containing text '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
case By.XPATH:
|
||||
ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
||||
return await self.web_await(
|
||||
lambda: self.page.find_element_by_text(selector_value, best_match = True),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML element found using XPath '{selector_value}' within {timeout} seconds.")
|
||||
timeout_error_message = f"No HTML element found using XPath '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
|
||||
raise AssertionError(_("Unsupported selector type: %s") % selector_type)
|
||||
|
||||
async def web_find_all(self, selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float = 5) -> list[Element]:
|
||||
"""
|
||||
Locates an HTML element by ID.
|
||||
async def _web_find_all_once(self, selector_type:By, selector_value:str, timeout:float, *, parent:Element | None = None) -> list[Element]:
|
||||
timeout_suffix = f" within {timeout} seconds."
|
||||
|
||||
:param timeout: timeout in seconds
|
||||
:raises TimeoutError: if element could not be found within time
|
||||
"""
|
||||
match selector_type:
|
||||
case By.CLASS_NAME:
|
||||
escaped_classname = selector_value.translate(METACHAR_ESCAPER)
|
||||
return await self.web_await(
|
||||
lambda: self.page.query_selector_all(f".{escaped_classname}", parent),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML elements found with CSS class '{selector_value}' within {timeout} seconds.")
|
||||
timeout_error_message = f"No HTML elements found with CSS class '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
case By.CSS_SELECTOR:
|
||||
return await self.web_await(
|
||||
lambda: self.page.query_selector_all(selector_value, parent),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML elements found using CSS selector '{selector_value}' within {timeout} seconds.")
|
||||
timeout_error_message = f"No HTML elements found using CSS selector '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
case By.TAG_NAME:
|
||||
return await self.web_await(
|
||||
lambda: self.page.query_selector_all(selector_value, parent),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML elements found of tag <{selector_value}> within {timeout} seconds.")
|
||||
timeout_error_message = f"No HTML elements found of tag <{selector_value}>{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
case By.TEXT:
|
||||
ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
||||
return await self.web_await(
|
||||
lambda: self.page.find_elements_by_text(selector_value),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML elements found containing text '{selector_value}' within {timeout} seconds.")
|
||||
timeout_error_message = f"No HTML elements found containing text '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
case By.XPATH:
|
||||
ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
||||
return await self.web_await(
|
||||
lambda: self.page.find_elements_by_text(selector_value),
|
||||
timeout = timeout,
|
||||
timeout_error_message = f"No HTML elements found using XPath '{selector_value}' within {timeout} seconds.")
|
||||
timeout_error_message = f"No HTML elements found using XPath '{selector_value}'{timeout_suffix}",
|
||||
apply_multiplier = False)
|
||||
|
||||
raise AssertionError(_("Unsupported selector type: %s") % selector_type)
|
||||
|
||||
async def web_input(self, selector_type:By, selector_value:str, text:str | int, *, timeout:int | float = 5) -> Element:
|
||||
async def web_input(self, selector_type:By, selector_value:str, text:str | int, *, timeout:int | float | None = None) -> Element:
|
||||
"""
|
||||
Enters text into an HTML input field.
|
||||
|
||||
@@ -749,10 +862,10 @@ class WebScrapingMixin:
|
||||
await self.web_sleep()
|
||||
return input_field
|
||||
|
||||
async def web_open(self, url:str, *, timeout:int | float = 15_000, reload_if_already_open:bool = False) -> None:
|
||||
async def web_open(self, url:str, *, timeout:int | float | None = None, reload_if_already_open:bool = False) -> None:
|
||||
"""
|
||||
:param url: url to open in browser
|
||||
:param timeout: timespan in seconds within the page needs to be loaded
|
||||
:param timeout: timespan in seconds within the page needs to be loaded (base value)
|
||||
:param reload_if_already_open: if False does nothing if the URL is already open in the browser
|
||||
:raises TimeoutException: if page did not open within given timespan
|
||||
"""
|
||||
@@ -761,10 +874,15 @@ class WebScrapingMixin:
|
||||
LOG.debug(" => skipping, [%s] is already open", url)
|
||||
return
|
||||
self.page = await self.browser.get(url = url, new_tab = False, new_window = False)
|
||||
await self.web_await(lambda: self.web_execute("document.readyState == 'complete'"), timeout = timeout,
|
||||
timeout_error_message = f"Page did not finish loading within {timeout} seconds.")
|
||||
page_timeout = self._effective_timeout("page_load", timeout)
|
||||
await self.web_await(
|
||||
lambda: self.web_execute("document.readyState == 'complete'"),
|
||||
timeout = page_timeout,
|
||||
timeout_error_message = f"Page did not finish loading within {page_timeout} seconds.",
|
||||
apply_multiplier = False
|
||||
)
|
||||
|
||||
async def web_text(self, selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float = 5) -> str:
|
||||
async def web_text(self, selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float | None = None) -> str:
|
||||
return str(await (await self.web_find(selector_type, selector_value, parent = parent, timeout = timeout)).apply("""
|
||||
function (elem) {
|
||||
let sel = window.getSelection()
|
||||
@@ -835,7 +953,7 @@ class WebScrapingMixin:
|
||||
await self.web_execute(f"window.scrollTo(0, {current_y_pos})")
|
||||
await asyncio.sleep(scroll_length / scroll_speed / 2) # double speed
|
||||
|
||||
async def web_select(self, selector_type:By, selector_value:str, selected_value:Any, timeout:int | float = 5) -> Element:
|
||||
async def web_select(self, selector_type:By, selector_value:str, selected_value:Any, timeout:int | float | None = None) -> Element:
|
||||
"""
|
||||
Selects an <option/> of a <select/> HTML element.
|
||||
|
||||
@@ -895,7 +1013,11 @@ class WebScrapingMixin:
|
||||
port_available = await self._check_port_with_retry(remote_host, remote_port)
|
||||
if port_available:
|
||||
try:
|
||||
version_info = detect_chrome_version_from_remote_debugging(remote_host, remote_port)
|
||||
version_info = detect_chrome_version_from_remote_debugging(
|
||||
remote_host,
|
||||
remote_port,
|
||||
timeout = self._effective_timeout("chrome_remote_debugging")
|
||||
)
|
||||
if version_info:
|
||||
LOG.debug(" -> Detected version from existing browser: %s", version_info)
|
||||
else:
|
||||
@@ -910,7 +1032,10 @@ class WebScrapingMixin:
|
||||
binary_path = self.browser_config.binary_location
|
||||
if binary_path:
|
||||
LOG.debug(" -> No remote browser detected, trying binary detection")
|
||||
version_info = detect_chrome_version_from_binary(binary_path)
|
||||
version_info = detect_chrome_version_from_binary(
|
||||
binary_path,
|
||||
timeout = self._effective_timeout("chrome_binary_detection")
|
||||
)
|
||||
|
||||
# Validate if Chrome 136+ detected
|
||||
if version_info and version_info.is_chrome_136_plus:
|
||||
@@ -977,7 +1102,10 @@ class WebScrapingMixin:
|
||||
binary_path = self.browser_config.binary_location
|
||||
diagnostic_info = get_chrome_version_diagnostic_info(
|
||||
binary_path = binary_path,
|
||||
remote_port = remote_port if remote_port > 0 else None
|
||||
remote_host = "127.0.0.1",
|
||||
remote_port = remote_port if remote_port > 0 else None,
|
||||
remote_timeout = self._effective_timeout("chrome_remote_debugging"),
|
||||
binary_timeout = self._effective_timeout("chrome_binary_detection")
|
||||
)
|
||||
|
||||
# Report binary detection results
|
||||
|
||||
Reference in New Issue
Block a user