From 339d66ed47de990c62242be8ba1bf9259bdc30c5 Mon Sep 17 00:00:00 2001 From: Jens <1742418+1cu@users.noreply.github.com> Date: Mon, 20 Oct 2025 08:52:06 +0200 Subject: [PATCH] feat: Replace custom RemoteObject wrapper with direct NoDriver 0.47+ usage (#652) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## ℹ️ Description *Replace custom RemoteObject serialization wrapper with direct NoDriver 0.47+ RemoteObject API usage for better performance and maintainability.* - **Motivation**: The custom wrapper was unnecessary complexity when NoDriver 0.47+ provides direct RemoteObject API - **Context**: Upgrading from NoDriver 0.39 to 0.47 introduced RemoteObject, and we want to use it as intended - **Goal**: Future-proof implementation using the standard NoDriver patterns ## 📋 Changes Summary - Replace custom serialization wrapper with direct RemoteObject API usage - Implement proper RemoteObject detection and conversion in web_execute() - Add comprehensive _convert_remote_object_value() method for recursive conversion - Handle key/value list format from deep_serialized_value.value - Add type guards and proper type checking for RemoteObject instances - Maintain internal API stability while using RemoteObject as intended - Add 19 comprehensive test cases covering all conversion scenarios - Application tested and working with real ad download, update and publish ### ⚙️ Type of Change - [x] ✨ New feature (adds new functionality without breaking existing usage) - [x] 🐞 Bug fix (non-breaking change which fixes an issue) ## ✅ Checklist - [x] I have reviewed my changes to ensure they meet the project's standards. - [x] I have tested my changes and ensured that all tests pass (pdm run test). - [x] I have formatted the code (pdm run format). - [x] I have verified that linting passes (pdm run lint). - [x] I have updated documentation where necessary. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --- src/kleinanzeigen_bot/__init__.py | 2 +- .../resources/translations.de.yaml | 3 - .../utils/web_scraping_mixin.py | 154 +++--- .../test_web_scraping_mixin_integration.py | 2 +- tests/unit/test_web_scraping_mixin.py | 30 +- .../test_web_scraping_mixin_remoteobject.py | 490 ++++++------------ 6 files changed, 262 insertions(+), 419 deletions(-) diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py index 2a06de6..8c66696 100644 --- a/src/kleinanzeigen_bot/__init__.py +++ b/src/kleinanzeigen_bot/__init__.py @@ -1376,7 +1376,7 @@ def main(args:list[str]) -> None: try: bot = KleinanzeigenBot() atexit.register(bot.close_browser_session) - nodriver.loop().run_until_complete(bot.run(args)) + nodriver.loop().run_until_complete(bot.run(args)) # type: ignore[attr-defined] except CaptchaEncountered as ex: raise ex except Exception: diff --git a/src/kleinanzeigen_bot/resources/translations.de.yaml b/src/kleinanzeigen_bot/resources/translations.de.yaml index a29032a..ca76bde 100644 --- a/src/kleinanzeigen_bot/resources/translations.de.yaml +++ b/src/kleinanzeigen_bot/resources/translations.de.yaml @@ -391,9 +391,6 @@ kleinanzeigen_bot/utils/web_scraping_mixin.py: "4. Check browser binary permissions: %s": "4. Überprüfen Sie die Browser-Binärdatei-Berechtigungen: %s" "4. Check if any antivirus or security software is blocking the connection": "4. Überprüfen Sie, ob Antiviren- oder Sicherheitssoftware die Verbindung blockiert" - _convert_remote_object_result: - "Failed to convert RemoteObject to dict: %s": "Fehler beim Konvertieren von RemoteObject zu dict: %s" - web_check: "Unsupported attribute: %s": "Nicht unterstütztes Attribut: %s" diff --git a/src/kleinanzeigen_bot/utils/web_scraping_mixin.py b/src/kleinanzeigen_bot/utils/web_scraping_mixin.py index 0f7ac56..a4e4669 100644 --- a/src/kleinanzeigen_bot/utils/web_scraping_mixin.py +++ b/src/kleinanzeigen_bot/utils/web_scraping_mixin.py @@ -12,6 +12,8 @@ except ImportError: from typing import NoReturn as Never # Python <3.11 import nodriver, psutil # isort: skip +from typing import TYPE_CHECKING, TypeGuard + from nodriver.core.browser import Browser from nodriver.core.config import Config from nodriver.core.element import Element @@ -27,6 +29,18 @@ from .chrome_version_detector import ( ) from .misc import T, ensure +if TYPE_CHECKING: + from nodriver.cdp.runtime import RemoteObject + +# Constants for RemoteObject conversion +_KEY_VALUE_PAIR_SIZE = 2 + + +def _is_remote_object(obj:Any) -> TypeGuard["RemoteObject"]: + """Type guard to check if an object is a RemoteObject.""" + return hasattr(obj, "__class__") and "RemoteObject" in str(type(obj)) + + __all__ = [ "Browser", "BrowserConfig", @@ -42,10 +56,6 @@ LOG:Final[loggers.Logger] = loggers.get_logger(__name__) # see https://api.jquery.com/category/selectors/ METACHAR_ESCAPER:Final[dict[int, str]] = str.maketrans({ch: f"\\{ch}" for ch in '!"#$%&\'()*+,./:;<=>?@[\\]^`{|}~'}) -# Constants for RemoteObject handling -_REMOTE_OBJECT_TYPE_VALUE_PAIR_SIZE:Final[int] = 2 -_KEY_VALUE_PAIR_SIZE:Final[int] = 2 - def _is_admin() -> bool: """Check if the current process is running with admin/root privileges.""" @@ -132,7 +142,7 @@ class WebScrapingMixin: ) cfg.host = remote_host cfg.port = remote_port - self.browser = await nodriver.start(cfg) + self.browser = await nodriver.start(cfg) # type: ignore[attr-defined] LOG.info("New Browser session is %s", self.browser.websocket_url) return except Exception as e: @@ -250,7 +260,7 @@ class WebScrapingMixin: cfg.add_extension(crx_extension) try: - self.browser = await nodriver.start(cfg) + self.browser = await nodriver.start(cfg) # type: ignore[attr-defined] LOG.info("New Browser session is %s", self.browser.websocket_url) except Exception as e: # Clean up any resources that were created during setup @@ -565,98 +575,80 @@ class WebScrapingMixin: """ Executes the given JavaScript code in the context of the current page. - :return: The javascript's return value + Handles nodriver 0.47+ RemoteObject results by converting them to regular Python objects. + Uses the RemoteObject API (value, deep_serialized_value) for proper conversion. + + :param jscode: JavaScript code to execute + :return: The javascript's return value as a regular Python object """ + # Try to get the result with return_by_value=True first result = await self.page.evaluate(jscode, await_promise = True, return_by_value = True) + # If we got a RemoteObject, use the proper API to get properties + if _is_remote_object(result): + try: + # Type cast to RemoteObject for type checker + remote_obj:"RemoteObject" = result + + # Use the proper RemoteObject API - try to get the value directly first + if hasattr(remote_obj, "value") and remote_obj.value is not None: + return remote_obj.value + + # For complex objects, use deep_serialized_value which contains the actual data + if hasattr(remote_obj, "deep_serialized_value") and remote_obj.deep_serialized_value: + value = remote_obj.deep_serialized_value.value + # Convert the complex nested structure to a proper dictionary + return self._convert_remote_object_value(value) + + # Fallback to the original result + return remote_obj + except Exception as e: + LOG.debug("Failed to extract value from RemoteObject: %s", e) + return result + # debug log the jscode but avoid excessive debug logging of window.scrollTo calls _prev_jscode:str = getattr(self.__class__.web_execute, "_prev_jscode", "") if not (jscode == _prev_jscode or (jscode.startswith("window.scrollTo") and _prev_jscode.startswith("window.scrollTo"))): LOG.debug("web_execute(`%s`) = `%s`", jscode, result) self.__class__.web_execute._prev_jscode = jscode # type: ignore[attr-defined] # noqa: SLF001 Private member accessed - # Handle nodriver 0.47+ RemoteObject behavior - # If result is a RemoteObject with deep_serialized_value, convert it to a dict - if hasattr(result, "deep_serialized_value"): - return self._convert_remote_object_result(result) - - # Fix for nodriver 0.47+ bug: convert list-of-pairs back to dict - if isinstance(result, list) and all(isinstance(item, list) and len(item) == _KEY_VALUE_PAIR_SIZE for item in result): - # This looks like a list of [key, value] pairs that should be a dict - converted_dict = {} - for key, value in result: - # Recursively convert nested structures - converted_dict[key] = self._convert_remote_object_dict(value) - return converted_dict - return result - def _convert_remote_object_result(self, result:Any) -> Any: + def _convert_remote_object_value(self, data:Any) -> Any: """ - Converts a RemoteObject result to a regular Python object. + Recursively converts RemoteObject values to regular Python objects. - Handles the deep_serialized_value conversion for nodriver 0.47+ compatibility. + Handles the complex nested structure from deep_serialized_value. + Converts key/value lists to dictionaries and processes type/value structures. + + :param data: The data to convert (list, dict, or primitive) + :return: Converted Python object """ - deep_serialized = getattr(result, "deep_serialized_value", None) - if deep_serialized is None: - return result - - try: - # Convert the deep_serialized_value to a regular dict - serialized_data = getattr(deep_serialized, "value", None) - if serialized_data is None: - return result - - if isinstance(serialized_data, list): - # Convert list of [key, value] pairs to dict, handling nested RemoteObjects - converted_dict = {} - for key, value in serialized_data: - # Handle the case where value is a RemoteObject with type/value structure - if isinstance(value, dict) and "type" in value and "value" in value: - converted_dict[key] = self._convert_remote_object_dict(value) - else: - converted_dict[key] = self._convert_remote_object_dict(value) - return converted_dict - - if isinstance(serialized_data, dict): - # Handle nested RemoteObject structures like {'type': 'number', 'value': 200} - return self._convert_remote_object_dict(serialized_data) - - return serialized_data - except (AttributeError, TypeError, ValueError) as e: - LOG.warning("Failed to convert RemoteObject to dict: %s", e) - # Return the original result if conversion fails - return result - - def _convert_remote_object_dict(self, data:Any) -> Any: - """ - Recursively converts RemoteObject dict structures to regular Python objects. - - Handles structures like {'type': 'number', 'value': 200} or {'type': 'string', 'value': 'text'}. - """ - if isinstance(data, dict): - # Check if this is a RemoteObject value structure - if "type" in data and "value" in data and len(data) == _REMOTE_OBJECT_TYPE_VALUE_PAIR_SIZE: - # Extract the actual value and recursively convert it - value = data["value"] - if isinstance(value, list) and all(isinstance(item, list) and len(item) == _KEY_VALUE_PAIR_SIZE for item in value): - # This is a list of [key, value] pairs that should be a dict - converted_dict = {} - for key, val in value: - converted_dict[key] = self._convert_remote_object_dict(val) - return converted_dict - return self._convert_remote_object_dict(value) - # Recursively convert nested dicts - return {key: self._convert_remote_object_dict(value) for key, value in data.items()} if isinstance(data, list): - # Check if this is a list of [key, value] pairs that should be a dict - if all(isinstance(item, list) and len(item) == _KEY_VALUE_PAIR_SIZE for item in data): + # Check if this is a key/value list format: [["key", "value"], ...] + if data and isinstance(data[0], list) and len(data[0]) == _KEY_VALUE_PAIR_SIZE: + # Convert list of [key, value] pairs to dict converted_dict = {} - for key, value in data: - converted_dict[key] = self._convert_remote_object_dict(value) + for item in data: + if len(item) == _KEY_VALUE_PAIR_SIZE: + key, value = item + # Handle nested structures in values + if isinstance(value, dict) and "type" in value and "value" in value: + # Extract the actual value from the type/value structure + converted_dict[key] = self._convert_remote_object_value(value["value"]) + else: + converted_dict[key] = self._convert_remote_object_value(value) return converted_dict - # Recursively convert lists - return [self._convert_remote_object_dict(item) for item in data] + # Regular list - convert each item + return [self._convert_remote_object_value(item) for item in data] + + if isinstance(data, dict): + # Handle type/value structures: {'type': 'string', 'value': 'actual_value'} + if "type" in data and "value" in data: + return self._convert_remote_object_value(data["value"]) + # Regular dict - convert each value + return {key: self._convert_remote_object_value(value) for key, value in data.items()} + # Return primitive values as-is return data diff --git a/tests/integration/test_web_scraping_mixin_integration.py b/tests/integration/test_web_scraping_mixin_integration.py index e16c948..90fe214 100644 --- a/tests/integration/test_web_scraping_mixin_integration.py +++ b/tests/integration/test_web_scraping_mixin_integration.py @@ -34,4 +34,4 @@ async def atest_init() -> None: @pytest.mark.flaky(reruns = 4, reruns_delay = 5) @pytest.mark.itest def test_init() -> None: - nodriver.loop().run_until_complete(atest_init()) + nodriver.loop().run_until_complete(atest_init()) # type: ignore[attr-defined] diff --git a/tests/unit/test_web_scraping_mixin.py b/tests/unit/test_web_scraping_mixin.py index fbdc61d..d205a3d 100644 --- a/tests/unit/test_web_scraping_mixin.py +++ b/tests/unit/test_web_scraping_mixin.py @@ -468,13 +468,13 @@ class TestWebScrapingBrowserConfiguration: def add_extension(self, ext:str) -> None: self._extensions.append(ext) # Use private extensions list - # Mock nodriver.start to return a mock browser + # Mock nodriver.start to return a mock browser # type: ignore[attr-defined] mock_browser = AsyncMock() mock_browser.websocket_url = "ws://localhost:9222" monkeypatch.setattr(nodriver, "start", AsyncMock(return_value = mock_browser)) # Mock Config class - monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue] + monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined] # Mock os.path.exists to return True for the browser binary and use real exists for Preferences file (and Edge) edge_path = "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge" @@ -557,13 +557,13 @@ class TestWebScrapingBrowserConfiguration: def add_extension(self, ext:str) -> None: self.extensions.append(ext) - # Mock nodriver.start to return a mock browser + # Mock nodriver.start to return a mock browser # type: ignore[attr-defined] mock_browser = AsyncMock() mock_browser.websocket_url = "ws://localhost:9222" monkeypatch.setattr(nodriver, "start", AsyncMock(return_value = mock_browser)) # Mock Config class - monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue] + monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined] # Mock os.path.exists to return True for both Chrome and Edge paths monkeypatch.setattr(os.path, "exists", lambda p: p in {"/usr/bin/chrome", "/usr/bin/edge"}) @@ -576,7 +576,7 @@ class TestWebScrapingBrowserConfiguration: await scraper.create_browser_session() # Verify browser arguments - config = cast(Mock, nodriver.start).call_args[0][0] + config = cast(Mock, nodriver.start).call_args[0][0] # type: ignore[attr-defined] assert "--custom-arg=value" in config.browser_args assert "--another-arg" in config.browser_args assert "--incognito" in config.browser_args @@ -589,7 +589,7 @@ class TestWebScrapingBrowserConfiguration: await scraper.create_browser_session() # Verify Edge-specific arguments - config = cast(Mock, nodriver.start).call_args[0][0] + config = cast(Mock, nodriver.start).call_args[0][0] # type: ignore[attr-defined] assert "-inprivate" in config.browser_args assert os.environ.get("MSEDGEDRIVER_TELEMETRY_OPTOUT") == "1" @@ -620,13 +620,13 @@ class TestWebScrapingBrowserConfiguration: with zipfile.ZipFile(ext2, "w") as z: z.writestr("manifest.json", '{"name": "Test Extension 2"}') - # Mock nodriver.start to return a mock browser + # Mock nodriver.start to return a mock browser # type: ignore[attr-defined] mock_browser = AsyncMock() mock_browser.websocket_url = "ws://localhost:9222" monkeypatch.setattr(nodriver, "start", AsyncMock(return_value = mock_browser)) # Mock Config class - monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue] + monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined] # Mock os.path.exists to return True for browser binaries and extension files, real_exists for others real_exists = os.path.exists @@ -644,7 +644,7 @@ class TestWebScrapingBrowserConfiguration: await scraper.create_browser_session() # Verify extensions were loaded - config = cast(Mock, nodriver.start).call_args[0][0] + config = cast(Mock, nodriver.start).call_args[0][0] # type: ignore[attr-defined] assert len(config._extensions) == 2 for ext_path in config._extensions: assert os.path.exists(ext_path) @@ -713,11 +713,11 @@ class TestWebScrapingBrowserConfiguration: def add_extension(self, ext:str) -> None: self._extensions.append(ext) - # Mock nodriver.start to return a mock browser + # Mock nodriver.start to return a mock browser # type: ignore[attr-defined] mock_browser = AsyncMock() mock_browser.websocket_url = "ws://localhost:9222" monkeypatch.setattr(nodriver, "start", AsyncMock(return_value = mock_browser)) - monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue] + monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined] monkeypatch.setattr(os.path, "exists", lambda p: True) # Simulate state file in user_data_dir @@ -772,7 +772,7 @@ class TestWebScrapingBrowserConfiguration: temp_file = tmp_path / "temp_resource" temp_file.write_text("test") - # Mock nodriver.start to raise an exception + # Mock nodriver.start to raise an exception # type: ignore[attr-defined] async def mock_start_fail(*args:object, **kwargs:object) -> NoReturn: if temp_file.exists(): temp_file.unlink() @@ -786,7 +786,7 @@ class TestWebScrapingBrowserConfiguration: return mock_browser monkeypatch.setattr(nodriver, "start", mock_start_fail) - monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue] + monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined] # Don't mock os.path.exists globally - let the file operations work normally # Attempt to create a session @@ -801,7 +801,7 @@ class TestWebScrapingBrowserConfiguration: assert scraper.browser is None assert scraper.page is None - # Now patch nodriver.start to return a new mock browser each time + # Now patch nodriver.start to return a new mock browser each time # type: ignore[attr-defined] mock_browser = make_mock_browser() mock_page = TrulyAwaitableMockPage() mock_browser.get = AsyncMock(return_value = mock_page) @@ -846,7 +846,7 @@ class TestWebScrapingBrowserConfiguration: mock_page = TrulyAwaitableMockPage() mock_browser.get = AsyncMock(return_value = mock_page) monkeypatch.setattr(nodriver, "start", AsyncMock(return_value = mock_browser)) - monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue] + monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined] monkeypatch.setattr(os.path, "exists", lambda p: True) # Mock create_browser_session to ensure proper setup diff --git a/tests/unit/test_web_scraping_mixin_remoteobject.py b/tests/unit/test_web_scraping_mixin_remoteobject.py index dc70e92..e65c45f 100644 --- a/tests/unit/test_web_scraping_mixin_remoteobject.py +++ b/tests/unit/test_web_scraping_mixin_remoteobject.py @@ -1,25 +1,25 @@ # SPDX-FileCopyrightText: © Jens Bergmann and contributors # SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ -"""Unit tests for web_scraping_mixin.py RemoteObject handling. +"""Unit tests for web_scraping_mixin.py JavaScript serialization handling. -Tests the conversion of nodriver RemoteObject results to regular Python objects. +Tests the JSON serialization approach to ensure regular Python objects are returned. """ from typing import Any -from unittest.mock import AsyncMock, Mock, patch +from unittest.mock import AsyncMock, patch import pytest from kleinanzeigen_bot.utils.web_scraping_mixin import WebScrapingMixin -class TestWebExecuteRemoteObjectHandling: - """Test web_execute method with nodriver 0.47+ RemoteObject behavior.""" +class TestWebExecuteJavaScriptSerialization: + """Test web_execute method with JSON serialization approach.""" @pytest.mark.asyncio async def test_web_execute_with_regular_result(self) -> None: - """Test web_execute with regular (non-RemoteObject) result.""" + """Test web_execute with regular result.""" mixin = WebScrapingMixin() with patch.object(mixin, "page") as mock_page: @@ -30,386 +30,240 @@ class TestWebExecuteRemoteObjectHandling: assert result == "regular_result" @pytest.mark.asyncio - async def test_web_execute_with_remoteobject_result(self) -> None: - """Test web_execute with RemoteObject result.""" + async def test_web_execute_with_dict_result(self) -> None: + """Test web_execute with dict result.""" mixin = WebScrapingMixin() - # Mock RemoteObject - mock_remote_object = Mock() - mock_remote_object.deep_serialized_value = Mock() - mock_remote_object.deep_serialized_value.value = {"key": "value"} - with patch.object(mixin, "page") as mock_page: - mock_page.evaluate = AsyncMock(return_value = mock_remote_object) + mock_page.evaluate = AsyncMock(return_value = {"key": "value"}) result = await mixin.web_execute("window.test") assert result == {"key": "value"} @pytest.mark.asyncio - async def test_web_execute_remoteobject_with_nested_type_value_structures(self) -> None: - """Test web_execute with RemoteObject containing nested type/value structures.""" + async def test_web_execute_with_complex_dict_result(self) -> None: + """Test web_execute with complex dict result.""" mixin = WebScrapingMixin() - # Mock RemoteObject with nested type/value structures - mock_remote_object = Mock() - mock_remote_object.deep_serialized_value = Mock() - mock_remote_object.deep_serialized_value.value = [ - ["statusCode", {"type": "number", "value": 200}], - ["content", {"type": "string", "value": "success"}] - ] - with patch.object(mixin, "page") as mock_page: - mock_page.evaluate = AsyncMock(return_value = mock_remote_object) + mock_page.evaluate = AsyncMock(return_value = { + "statusCode": 200, + "content": "success", + "nested": {"key": "value"} + }) result = await mixin.web_execute("window.test") - # Should convert nested type/value structures to their values - assert result == {"statusCode": 200, "content": "success"} - - @pytest.mark.asyncio - async def test_web_execute_remoteobject_with_mixed_nested_structures(self) -> None: - """Test web_execute with RemoteObject containing mixed nested structures.""" - mixin = WebScrapingMixin() - - # Mock RemoteObject with mixed nested structures - mock_remote_object = Mock() - mock_remote_object.deep_serialized_value = Mock() - mock_remote_object.deep_serialized_value.value = { - "simple": "value", - "nested": {"type": "number", "value": 42}, - "list": [{"type": "string", "value": "item1"}, {"type": "string", "value": "item2"}] - } - - with patch.object(mixin, "page") as mock_page: - mock_page.evaluate = AsyncMock(return_value = mock_remote_object) - - result = await mixin.web_execute("window.test") - - # Should convert nested structures while preserving simple values expected = { - "simple": "value", - "nested": 42, - "list": ["item1", "item2"] + "statusCode": 200, + "content": "success", + "nested": {"key": "value"} } assert result == expected - -class TestConvertRemoteObjectResult: - """Test _convert_remote_object_result method for RemoteObject conversion.""" - - def test_convert_remote_object_result_with_none_deep_serialized_value(self) -> None: - """Test _convert_remote_object_result when deep_serialized_value is None.""" + @pytest.mark.asyncio + async def test_web_execute_with_remoteobject_conversion(self) -> None: + """Test web_execute with RemoteObject conversion.""" mixin = WebScrapingMixin() - # Mock RemoteObject with None deep_serialized_value - mock_remote_object = Mock() - mock_remote_object.deep_serialized_value = None + # Test the _convert_remote_object_value method directly + test_data = [["key1", "value1"], ["key2", "value2"]] + result = mixin._convert_remote_object_value(test_data) - result = mixin._convert_remote_object_result(mock_remote_object) - assert result == mock_remote_object - - def test_convert_remote_object_result_with_none_serialized_data(self) -> None: - """Test _convert_remote_object_result when serialized_data is None.""" - mixin = WebScrapingMixin() - - # Mock RemoteObject with None serialized_data - mock_remote_object = Mock() - mock_remote_object.deep_serialized_value = Mock() - mock_remote_object.deep_serialized_value.value = None - - result = mixin._convert_remote_object_result(mock_remote_object) - assert result == mock_remote_object - - def test_convert_remote_object_result_with_list_data(self) -> None: - """Test _convert_remote_object_result with list data.""" - mixin = WebScrapingMixin() - - # Mock RemoteObject with list data - mock_remote_object = Mock() - mock_remote_object.deep_serialized_value = Mock() - mock_remote_object.deep_serialized_value.value = [ - ["key1", "value1"], - ["key2", "value2"] - ] - - result = mixin._convert_remote_object_result(mock_remote_object) + # Should convert key/value list to dict assert result == {"key1": "value1", "key2": "value2"} - def test_convert_remote_object_result_with_dict_data(self) -> None: - """Test _convert_remote_object_result with dict data.""" + def test_convert_remote_object_value_key_value_list(self) -> None: + """Test _convert_remote_object_value with key/value list format.""" mixin = WebScrapingMixin() - # Mock RemoteObject with dict data - mock_remote_object = Mock() - mock_remote_object.deep_serialized_value = Mock() - mock_remote_object.deep_serialized_value.value = {"key": "value"} - - result = mixin._convert_remote_object_result(mock_remote_object) - assert result == {"key": "value"} - - def test_convert_remote_object_result_with_conversion_error(self) -> None: - """Test _convert_remote_object_result when conversion raises an exception.""" - mixin = WebScrapingMixin() - - # Mock RemoteObject that will raise an exception during conversion - mock_remote_object = Mock() - mock_remote_object.deep_serialized_value = Mock() - mock_remote_object.deep_serialized_value.value = "invalid_data" - - # Mock the _convert_remote_object_dict to raise an exception - with patch.object(mixin, "_convert_remote_object_dict", side_effect = ValueError("Test error")): - result = mixin._convert_remote_object_result(mock_remote_object) - # When conversion fails, it should return the original value - assert result == "invalid_data" - - -class TestConvertRemoteObjectDict: - """Test _convert_remote_object_dict method for nested RemoteObject conversion.""" - - def test_convert_remote_object_dict_with_type_value_pair(self) -> None: - """Test conversion of type/value pair structures.""" - mixin = WebScrapingMixin() - - # Test type/value pair - data = {"type": "number", "value": 200} - result = mixin._convert_remote_object_dict(data) - assert result == 200 - - # Test string type/value pair - data = {"type": "string", "value": "hello"} - result = mixin._convert_remote_object_dict(data) - assert result == "hello" - - def test_convert_remote_object_dict_with_regular_dict(self) -> None: - """Test conversion of regular dict structures.""" - mixin = WebScrapingMixin() - - # Test regular dict (not type/value pair) - data = {"key1": "value1", "key2": "value2"} - result = mixin._convert_remote_object_dict(data) + # Test key/value list format + test_data = [["key1", "value1"], ["key2", "value2"]] + result = mixin._convert_remote_object_value(test_data) assert result == {"key1": "value1", "key2": "value2"} - def test_convert_remote_object_dict_with_nested_structures(self) -> None: - """Test conversion of nested dict structures.""" + def test_convert_remote_object_value_with_nested_type_value(self) -> None: + """Test _convert_remote_object_value with nested type/value structures.""" mixin = WebScrapingMixin() - # Test nested structures - data = { - "simple": "value", - "nested": {"type": "number", "value": 42}, - "list": [{"type": "string", "value": "item1"}, {"type": "string", "value": "item2"}] - } - result = mixin._convert_remote_object_dict(data) + # Test with nested type/value structures + test_data = [["key1", {"type": "string", "value": "nested_value"}]] + result = mixin._convert_remote_object_value(test_data) + assert result == {"key1": "nested_value"} - expected = { - "simple": "value", - "nested": 42, - "list": ["item1", "item2"] - } - assert result == expected - - def test_convert_remote_object_dict_with_list(self) -> None: - """Test conversion of list structures.""" + def test_convert_remote_object_value_regular_list(self) -> None: + """Test _convert_remote_object_value with regular list.""" mixin = WebScrapingMixin() - # Test list with type/value pairs - data = [{"type": "number", "value": 1}, {"type": "string", "value": "test"}] - result = mixin._convert_remote_object_dict(data) - assert result == [1, "test"] + # Test regular list (not key/value format) + test_data = ["item1", "item2", "item3"] + result = mixin._convert_remote_object_value(test_data) + assert result == ["item1", "item2", "item3"] - def test_convert_remote_object_dict_with_primitive_values(self) -> None: - """Test conversion with primitive values.""" + def test_convert_remote_object_value_nested_list(self) -> None: + """Test _convert_remote_object_value with nested list.""" + mixin = WebScrapingMixin() + + # Test nested list that looks like key/value pairs (gets converted to dict) + test_data = [["nested", "list"], ["another", "item"]] + result = mixin._convert_remote_object_value(test_data) + assert result == {"nested": "list", "another": "item"} + + def test_convert_remote_object_value_type_value_dict(self) -> None: + """Test _convert_remote_object_value with type/value dict.""" + mixin = WebScrapingMixin() + + # Test type/value dict + test_data = {"type": "string", "value": "actual_value"} + result = mixin._convert_remote_object_value(test_data) + assert result == "actual_value" + + def test_convert_remote_object_value_regular_dict(self) -> None: + """Test _convert_remote_object_value with regular dict.""" + mixin = WebScrapingMixin() + + # Test regular dict + test_data = {"key1": "value1", "key2": "value2"} + result = mixin._convert_remote_object_value(test_data) + assert result == {"key1": "value1", "key2": "value2"} + + def test_convert_remote_object_value_nested_dict(self) -> None: + """Test _convert_remote_object_value with nested dict.""" + mixin = WebScrapingMixin() + + # Test nested dict + test_data = {"key1": {"nested": "value"}, "key2": "value2"} + result = mixin._convert_remote_object_value(test_data) + assert result == {"key1": {"nested": "value"}, "key2": "value2"} + + def test_convert_remote_object_value_primitive(self) -> None: + """Test _convert_remote_object_value with primitive values.""" mixin = WebScrapingMixin() # Test primitive values - assert mixin._convert_remote_object_dict("string") == "string" - assert mixin._convert_remote_object_dict(42) == 42 - assert mixin._convert_remote_object_dict(True) is True - assert mixin._convert_remote_object_dict(None) is None + assert mixin._convert_remote_object_value("string") == "string" + assert mixin._convert_remote_object_value(123) == 123 + assert mixin._convert_remote_object_value(True) is True + assert mixin._convert_remote_object_value(None) is None - def test_convert_remote_object_dict_with_complex_nested_structures(self) -> None: - """Test conversion with complex nested structures.""" + def test_convert_remote_object_value_malformed_key_value_pair(self) -> None: + """Test _convert_remote_object_value with malformed key/value pairs.""" mixin = WebScrapingMixin() - # Test complex nested structures - data = { - "response": { - "status": {"type": "number", "value": 200}, - "data": [ - {"type": "string", "value": "item1"}, - {"type": "string", "value": "item2"} - ], - "metadata": { - "count": {"type": "number", "value": 2}, - "type": {"type": "string", "value": "list"} - } - } - } - result = mixin._convert_remote_object_dict(data) + # Test with malformed key/value pairs (wrong length) + test_data = [["key1", "value1"], ["key2"]] # Second item has wrong length + result = mixin._convert_remote_object_value(test_data) + # Should still convert the valid pairs and skip malformed ones + assert result == {"key1": "value1"} + def test_convert_remote_object_value_empty_list(self) -> None: + """Test _convert_remote_object_value with empty list.""" + mixin = WebScrapingMixin() + + # Test empty list + test_data:list[Any] = [] + result = mixin._convert_remote_object_value(test_data) + assert result == [] + + def test_convert_remote_object_value_complex_nested_structure(self) -> None: + """Test _convert_remote_object_value with complex nested structure.""" + mixin = WebScrapingMixin() + + # Test complex nested structure + test_data = [ + ["key1", "value1"], + ["key2", {"type": "object", "value": {"nested": "value"}}], + ["key3", [["inner_key", "inner_value"]]] + ] + result = mixin._convert_remote_object_value(test_data) expected = { - "response": { - "status": 200, - "data": ["item1", "item2"], - "metadata": { - "count": 2, - "type": "list" - } - } + "key1": "value1", + "key2": {"nested": "value"}, + "key3": {"inner_key": "inner_value"} # The inner list gets converted to dict too } assert result == expected - -class TestWebRequestRemoteObjectHandling: - """Test web_request method with nodriver 0.47+ RemoteObject behavior.""" - @pytest.mark.asyncio - async def test_web_request_with_remoteobject_result(self) -> None: - """Test web_request with RemoteObject result to catch subscriptability issues.""" + async def test_web_execute_remoteobject_exception_handling(self) -> None: + """Test web_execute with RemoteObject exception handling.""" mixin = WebScrapingMixin() - # Mock RemoteObject that simulates the exact structure returned by web_request - mock_remote_object = Mock() - mock_remote_object.deep_serialized_value = Mock() - mock_remote_object.deep_serialized_value.value = { - "statusCode": 200, - "statusMessage": "OK", - "headers": {"content-type": "application/json"}, - "content": '{"success": true}' - } + # Create a mock RemoteObject that will raise an exception + mock_remote_object = type("MockRemoteObject", (), { + "__class__": type("MockClass", (), {"__name__": "RemoteObject"}), + "value": None, + "deep_serialized_value": None + })() - with patch.object(mixin, "web_execute") as mock_web_execute: - # Mock web_execute to return the converted result (simulating our fix) - mock_web_execute.return_value = { - "statusCode": 200, - "statusMessage": "OK", - "headers": {"content-type": "application/json"}, - "content": '{"success": true}' - } + with patch.object(mixin, "page") as mock_page: + mock_page.evaluate = AsyncMock(return_value = mock_remote_object) - result = await mixin.web_request("https://example.com/api") + # Mock the _convert_remote_object_value to raise an exception + with patch.object(mixin, "_convert_remote_object_value", side_effect = Exception("Test exception")): + result = await mixin.web_execute("window.test") - # Verify the result is properly converted and subscriptable - assert result["statusCode"] == 200 - assert result["statusMessage"] == "OK" - assert result["headers"]["content-type"] == "application/json" - assert result["content"] == '{"success": true}' + # Should return the original RemoteObject when exception occurs + assert result == mock_remote_object @pytest.mark.asyncio - async def test_web_request_with_remoteobject_error_response(self) -> None: - """Test web_request with RemoteObject error response.""" + async def test_web_execute_remoteobject_with_value(self) -> None: + """Test web_execute with RemoteObject that has a value.""" mixin = WebScrapingMixin() - # Mock RemoteObject for error response - mock_remote_object = Mock() - mock_remote_object.deep_serialized_value = Mock() - mock_remote_object.deep_serialized_value.value = { - "statusCode": 404, - "statusMessage": "Not Found", - "headers": {"content-type": "text/html"}, - "content": "Not Found" - } + # Create a mock RemoteObject with a value + mock_remote_object = type("MockRemoteObject", (), { + "__class__": type("MockClass", (), {"__name__": "RemoteObject"}), + "value": "test_value", + "deep_serialized_value": None + })() - with patch.object(mixin, "web_execute") as mock_web_execute: - # Mock web_execute to return the converted result (simulating our fix) - mock_web_execute.return_value = { - "statusCode": 404, - "statusMessage": "Not Found", - "headers": {"content-type": "text/html"}, - "content": "Not Found" - } + with patch.object(mixin, "page") as mock_page: + mock_page.evaluate = AsyncMock(return_value = mock_remote_object) - # This should raise an exception due to invalid status code - with pytest.raises(Exception, match = "Invalid response"): - await mixin.web_request("https://example.com/api", valid_response_codes = [200]) + result = await mixin.web_execute("window.test") + + # Should return the value directly + assert result == "test_value" @pytest.mark.asyncio - async def test_web_request_with_nested_remoteobject_structures(self) -> None: - """Test web_request with complex nested RemoteObject structures.""" + async def test_web_execute_remoteobject_with_deep_serialized_value(self) -> None: + """Test web_execute with RemoteObject that has deep_serialized_value.""" mixin = WebScrapingMixin() - # Mock RemoteObject with nested type/value structures - mock_remote_object = Mock() - mock_remote_object.deep_serialized_value = Mock() - mock_remote_object.deep_serialized_value.value = { - "statusCode": {"type": "number", "value": 200}, - "statusMessage": {"type": "string", "value": "OK"}, - "headers": { - "content-type": {"type": "string", "value": "application/json"} - }, - "content": {"type": "string", "value": '{"data": "test"}'} - } + # Create a mock RemoteObject with deep_serialized_value + mock_remote_object = type("MockRemoteObject", (), { + "__class__": type("MockClass", (), {"__name__": "RemoteObject"}), + "value": None, + "deep_serialized_value": type("MockDeepSerialized", (), { + "value": [["key1", "value1"], ["key2", "value2"]] + })() + })() - with patch.object(mixin, "web_execute") as mock_web_execute: - # Mock web_execute to return the converted result (simulating our fix) - mock_web_execute.return_value = { - "statusCode": 200, - "statusMessage": "OK", - "headers": {"content-type": "application/json"}, - "content": '{"data": "test"}' - } + with patch.object(mixin, "page") as mock_page: + mock_page.evaluate = AsyncMock(return_value = mock_remote_object) - result = await mixin.web_request("https://example.com/api") + result = await mixin.web_execute("window.test") - # Verify nested structures are properly converted - assert result["statusCode"] == 200 - assert result["statusMessage"] == "OK" - assert result["headers"]["content-type"] == "application/json" - assert result["content"] == '{"data": "test"}' - - -class TestWebRequestRemoteObjectRegression: - """Test web_request method to catch future RemoteObject regression issues.""" + # Should convert the deep_serialized_value + assert result == {"key1": "value1", "key2": "value2"} @pytest.mark.asyncio - async def test_web_request_without_remoteobject_conversion_fails(self) -> None: - """Test that web_request fails without RemoteObject conversion (regression test).""" + async def test_web_execute_remoteobject_fallback(self) -> None: + """Test web_execute with RemoteObject fallback when no value or deep_serialized_value.""" mixin = WebScrapingMixin() - # Mock RemoteObject that would cause the original error - mock_remote_object = Mock() - mock_remote_object.deep_serialized_value = Mock() - mock_remote_object.deep_serialized_value.value = { - "statusCode": 200, - "statusMessage": "OK", - "headers": {"content-type": "application/json"}, - "content": '{"success": true}' - } + # Create a mock RemoteObject with no value or deep_serialized_value + mock_remote_object = type("MockRemoteObject", (), { + "__class__": type("MockClass", (), {"__name__": "RemoteObject"}), + "value": None, + "deep_serialized_value": None + })() - # Mock web_execute to return the raw RemoteObject (simulating the bug) - with patch.object(mixin, "web_execute") as mock_web_execute: - mock_web_execute.return_value = mock_remote_object + with patch.object(mixin, "page") as mock_page: + mock_page.evaluate = AsyncMock(return_value = mock_remote_object) - # This should fail with the original error if our fix is removed - with pytest.raises(TypeError, match = "object is not subscriptable"): - await mixin.web_request("https://example.com/api") + result = await mixin.web_execute("window.test") - @pytest.mark.asyncio - async def test_web_request_with_remoteobject_conversion_succeeds(self) -> None: - """Test that web_request succeeds with RemoteObject conversion (our fix).""" - mixin = WebScrapingMixin() - - # Mock RemoteObject - mock_remote_object = Mock() - mock_remote_object.deep_serialized_value = Mock() - mock_remote_object.deep_serialized_value.value = { - "statusCode": 200, - "statusMessage": "OK", - "headers": {"content-type": "application/json"}, - "content": '{"success": true}' - } - - # Mock web_execute to simulate our conversion logic - async def mock_web_execute_with_conversion(script:str) -> Any: - # Simulate the conversion that happens in our fix - return mock_remote_object.deep_serialized_value.value - - with patch.object(mixin, "web_execute", side_effect = mock_web_execute_with_conversion): - result = await mixin.web_request("https://example.com/api") - - # Verify the result works correctly - assert result["statusCode"] == 200 - assert result["statusMessage"] == "OK" - assert result["headers"]["content-type"] == "application/json" - assert result["content"] == '{"success": true}' + # Should return the original RemoteObject as fallback + assert result == mock_remote_object