mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
feat: Replace custom RemoteObject wrapper with direct NoDriver 0.47+ usage (#652)
## ℹ️ Description *Replace custom RemoteObject serialization wrapper with direct NoDriver 0.47+ RemoteObject API usage for better performance and maintainability.* - **Motivation**: The custom wrapper was unnecessary complexity when NoDriver 0.47+ provides direct RemoteObject API - **Context**: Upgrading from NoDriver 0.39 to 0.47 introduced RemoteObject, and we want to use it as intended - **Goal**: Future-proof implementation using the standard NoDriver patterns ## 📋 Changes Summary - Replace custom serialization wrapper with direct RemoteObject API usage - Implement proper RemoteObject detection and conversion in web_execute() - Add comprehensive _convert_remote_object_value() method for recursive conversion - Handle key/value list format from deep_serialized_value.value - Add type guards and proper type checking for RemoteObject instances - Maintain internal API stability while using RemoteObject as intended - Add 19 comprehensive test cases covering all conversion scenarios - Application tested and working with real ad download, update and publish ### ⚙️ Type of Change - [x] ✨ New feature (adds new functionality without breaking existing usage) - [x] 🐞 Bug fix (non-breaking change which fixes an issue) ## ✅ Checklist - [x] I have reviewed my changes to ensure they meet the project's standards. - [x] I have tested my changes and ensured that all tests pass (pdm run test). - [x] I have formatted the code (pdm run format). - [x] I have verified that linting passes (pdm run lint). - [x] I have updated documentation where necessary. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
This commit is contained in:
@@ -1376,7 +1376,7 @@ def main(args:list[str]) -> None:
|
||||
try:
|
||||
bot = KleinanzeigenBot()
|
||||
atexit.register(bot.close_browser_session)
|
||||
nodriver.loop().run_until_complete(bot.run(args))
|
||||
nodriver.loop().run_until_complete(bot.run(args)) # type: ignore[attr-defined]
|
||||
except CaptchaEncountered as ex:
|
||||
raise ex
|
||||
except Exception:
|
||||
|
||||
@@ -391,9 +391,6 @@ kleinanzeigen_bot/utils/web_scraping_mixin.py:
|
||||
"4. Check browser binary permissions: %s": "4. Überprüfen Sie die Browser-Binärdatei-Berechtigungen: %s"
|
||||
"4. Check if any antivirus or security software is blocking the connection": "4. Überprüfen Sie, ob Antiviren- oder Sicherheitssoftware die Verbindung blockiert"
|
||||
|
||||
_convert_remote_object_result:
|
||||
"Failed to convert RemoteObject to dict: %s": "Fehler beim Konvertieren von RemoteObject zu dict: %s"
|
||||
|
||||
web_check:
|
||||
"Unsupported attribute: %s": "Nicht unterstütztes Attribut: %s"
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@ except ImportError:
|
||||
from typing import NoReturn as Never # Python <3.11
|
||||
|
||||
import nodriver, psutil # isort: skip
|
||||
from typing import TYPE_CHECKING, TypeGuard
|
||||
|
||||
from nodriver.core.browser import Browser
|
||||
from nodriver.core.config import Config
|
||||
from nodriver.core.element import Element
|
||||
@@ -27,6 +29,18 @@ from .chrome_version_detector import (
|
||||
)
|
||||
from .misc import T, ensure
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from nodriver.cdp.runtime import RemoteObject
|
||||
|
||||
# Constants for RemoteObject conversion
|
||||
_KEY_VALUE_PAIR_SIZE = 2
|
||||
|
||||
|
||||
def _is_remote_object(obj:Any) -> TypeGuard["RemoteObject"]:
|
||||
"""Type guard to check if an object is a RemoteObject."""
|
||||
return hasattr(obj, "__class__") and "RemoteObject" in str(type(obj))
|
||||
|
||||
|
||||
__all__ = [
|
||||
"Browser",
|
||||
"BrowserConfig",
|
||||
@@ -42,10 +56,6 @@ LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
|
||||
# see https://api.jquery.com/category/selectors/
|
||||
METACHAR_ESCAPER:Final[dict[int, str]] = str.maketrans({ch: f"\\{ch}" for ch in '!"#$%&\'()*+,./:;<=>?@[\\]^`{|}~'})
|
||||
|
||||
# Constants for RemoteObject handling
|
||||
_REMOTE_OBJECT_TYPE_VALUE_PAIR_SIZE:Final[int] = 2
|
||||
_KEY_VALUE_PAIR_SIZE:Final[int] = 2
|
||||
|
||||
|
||||
def _is_admin() -> bool:
|
||||
"""Check if the current process is running with admin/root privileges."""
|
||||
@@ -132,7 +142,7 @@ class WebScrapingMixin:
|
||||
)
|
||||
cfg.host = remote_host
|
||||
cfg.port = remote_port
|
||||
self.browser = await nodriver.start(cfg)
|
||||
self.browser = await nodriver.start(cfg) # type: ignore[attr-defined]
|
||||
LOG.info("New Browser session is %s", self.browser.websocket_url)
|
||||
return
|
||||
except Exception as e:
|
||||
@@ -250,7 +260,7 @@ class WebScrapingMixin:
|
||||
cfg.add_extension(crx_extension)
|
||||
|
||||
try:
|
||||
self.browser = await nodriver.start(cfg)
|
||||
self.browser = await nodriver.start(cfg) # type: ignore[attr-defined]
|
||||
LOG.info("New Browser session is %s", self.browser.websocket_url)
|
||||
except Exception as e:
|
||||
# Clean up any resources that were created during setup
|
||||
@@ -565,98 +575,80 @@ class WebScrapingMixin:
|
||||
"""
|
||||
Executes the given JavaScript code in the context of the current page.
|
||||
|
||||
:return: The javascript's return value
|
||||
Handles nodriver 0.47+ RemoteObject results by converting them to regular Python objects.
|
||||
Uses the RemoteObject API (value, deep_serialized_value) for proper conversion.
|
||||
|
||||
:param jscode: JavaScript code to execute
|
||||
:return: The javascript's return value as a regular Python object
|
||||
"""
|
||||
# Try to get the result with return_by_value=True first
|
||||
result = await self.page.evaluate(jscode, await_promise = True, return_by_value = True)
|
||||
|
||||
# If we got a RemoteObject, use the proper API to get properties
|
||||
if _is_remote_object(result):
|
||||
try:
|
||||
# Type cast to RemoteObject for type checker
|
||||
remote_obj:"RemoteObject" = result
|
||||
|
||||
# Use the proper RemoteObject API - try to get the value directly first
|
||||
if hasattr(remote_obj, "value") and remote_obj.value is not None:
|
||||
return remote_obj.value
|
||||
|
||||
# For complex objects, use deep_serialized_value which contains the actual data
|
||||
if hasattr(remote_obj, "deep_serialized_value") and remote_obj.deep_serialized_value:
|
||||
value = remote_obj.deep_serialized_value.value
|
||||
# Convert the complex nested structure to a proper dictionary
|
||||
return self._convert_remote_object_value(value)
|
||||
|
||||
# Fallback to the original result
|
||||
return remote_obj
|
||||
except Exception as e:
|
||||
LOG.debug("Failed to extract value from RemoteObject: %s", e)
|
||||
return result
|
||||
|
||||
# debug log the jscode but avoid excessive debug logging of window.scrollTo calls
|
||||
_prev_jscode:str = getattr(self.__class__.web_execute, "_prev_jscode", "")
|
||||
if not (jscode == _prev_jscode or (jscode.startswith("window.scrollTo") and _prev_jscode.startswith("window.scrollTo"))):
|
||||
LOG.debug("web_execute(`%s`) = `%s`", jscode, result)
|
||||
self.__class__.web_execute._prev_jscode = jscode # type: ignore[attr-defined] # noqa: SLF001 Private member accessed
|
||||
|
||||
# Handle nodriver 0.47+ RemoteObject behavior
|
||||
# If result is a RemoteObject with deep_serialized_value, convert it to a dict
|
||||
if hasattr(result, "deep_serialized_value"):
|
||||
return self._convert_remote_object_result(result)
|
||||
|
||||
# Fix for nodriver 0.47+ bug: convert list-of-pairs back to dict
|
||||
if isinstance(result, list) and all(isinstance(item, list) and len(item) == _KEY_VALUE_PAIR_SIZE for item in result):
|
||||
# This looks like a list of [key, value] pairs that should be a dict
|
||||
converted_dict = {}
|
||||
for key, value in result:
|
||||
# Recursively convert nested structures
|
||||
converted_dict[key] = self._convert_remote_object_dict(value)
|
||||
return converted_dict
|
||||
|
||||
return result
|
||||
|
||||
def _convert_remote_object_result(self, result:Any) -> Any:
|
||||
def _convert_remote_object_value(self, data:Any) -> Any:
|
||||
"""
|
||||
Converts a RemoteObject result to a regular Python object.
|
||||
Recursively converts RemoteObject values to regular Python objects.
|
||||
|
||||
Handles the deep_serialized_value conversion for nodriver 0.47+ compatibility.
|
||||
Handles the complex nested structure from deep_serialized_value.
|
||||
Converts key/value lists to dictionaries and processes type/value structures.
|
||||
|
||||
:param data: The data to convert (list, dict, or primitive)
|
||||
:return: Converted Python object
|
||||
"""
|
||||
deep_serialized = getattr(result, "deep_serialized_value", None)
|
||||
if deep_serialized is None:
|
||||
return result
|
||||
|
||||
try:
|
||||
# Convert the deep_serialized_value to a regular dict
|
||||
serialized_data = getattr(deep_serialized, "value", None)
|
||||
if serialized_data is None:
|
||||
return result
|
||||
|
||||
if isinstance(serialized_data, list):
|
||||
# Convert list of [key, value] pairs to dict, handling nested RemoteObjects
|
||||
converted_dict = {}
|
||||
for key, value in serialized_data:
|
||||
# Handle the case where value is a RemoteObject with type/value structure
|
||||
if isinstance(value, dict) and "type" in value and "value" in value:
|
||||
converted_dict[key] = self._convert_remote_object_dict(value)
|
||||
else:
|
||||
converted_dict[key] = self._convert_remote_object_dict(value)
|
||||
return converted_dict
|
||||
|
||||
if isinstance(serialized_data, dict):
|
||||
# Handle nested RemoteObject structures like {'type': 'number', 'value': 200}
|
||||
return self._convert_remote_object_dict(serialized_data)
|
||||
|
||||
return serialized_data
|
||||
except (AttributeError, TypeError, ValueError) as e:
|
||||
LOG.warning("Failed to convert RemoteObject to dict: %s", e)
|
||||
# Return the original result if conversion fails
|
||||
return result
|
||||
|
||||
def _convert_remote_object_dict(self, data:Any) -> Any:
|
||||
"""
|
||||
Recursively converts RemoteObject dict structures to regular Python objects.
|
||||
|
||||
Handles structures like {'type': 'number', 'value': 200} or {'type': 'string', 'value': 'text'}.
|
||||
"""
|
||||
if isinstance(data, dict):
|
||||
# Check if this is a RemoteObject value structure
|
||||
if "type" in data and "value" in data and len(data) == _REMOTE_OBJECT_TYPE_VALUE_PAIR_SIZE:
|
||||
# Extract the actual value and recursively convert it
|
||||
value = data["value"]
|
||||
if isinstance(value, list) and all(isinstance(item, list) and len(item) == _KEY_VALUE_PAIR_SIZE for item in value):
|
||||
# This is a list of [key, value] pairs that should be a dict
|
||||
converted_dict = {}
|
||||
for key, val in value:
|
||||
converted_dict[key] = self._convert_remote_object_dict(val)
|
||||
return converted_dict
|
||||
return self._convert_remote_object_dict(value)
|
||||
# Recursively convert nested dicts
|
||||
return {key: self._convert_remote_object_dict(value) for key, value in data.items()}
|
||||
if isinstance(data, list):
|
||||
# Check if this is a list of [key, value] pairs that should be a dict
|
||||
if all(isinstance(item, list) and len(item) == _KEY_VALUE_PAIR_SIZE for item in data):
|
||||
# Check if this is a key/value list format: [["key", "value"], ...]
|
||||
if data and isinstance(data[0], list) and len(data[0]) == _KEY_VALUE_PAIR_SIZE:
|
||||
# Convert list of [key, value] pairs to dict
|
||||
converted_dict = {}
|
||||
for key, value in data:
|
||||
converted_dict[key] = self._convert_remote_object_dict(value)
|
||||
for item in data:
|
||||
if len(item) == _KEY_VALUE_PAIR_SIZE:
|
||||
key, value = item
|
||||
# Handle nested structures in values
|
||||
if isinstance(value, dict) and "type" in value and "value" in value:
|
||||
# Extract the actual value from the type/value structure
|
||||
converted_dict[key] = self._convert_remote_object_value(value["value"])
|
||||
else:
|
||||
converted_dict[key] = self._convert_remote_object_value(value)
|
||||
return converted_dict
|
||||
# Recursively convert lists
|
||||
return [self._convert_remote_object_dict(item) for item in data]
|
||||
# Regular list - convert each item
|
||||
return [self._convert_remote_object_value(item) for item in data]
|
||||
|
||||
if isinstance(data, dict):
|
||||
# Handle type/value structures: {'type': 'string', 'value': 'actual_value'}
|
||||
if "type" in data and "value" in data:
|
||||
return self._convert_remote_object_value(data["value"])
|
||||
# Regular dict - convert each value
|
||||
return {key: self._convert_remote_object_value(value) for key, value in data.items()}
|
||||
|
||||
# Return primitive values as-is
|
||||
return data
|
||||
|
||||
|
||||
Reference in New Issue
Block a user