feat: Replace custom RemoteObject wrapper with direct NoDriver 0.47+ usage (#652)

## ℹ️ Description
*Replace custom RemoteObject serialization wrapper with direct NoDriver
0.47+ RemoteObject API usage for better performance and
maintainability.*

- **Motivation**: The custom wrapper was unnecessary complexity when
NoDriver 0.47+ provides direct RemoteObject API
- **Context**: Upgrading from NoDriver 0.39 to 0.47 introduced
RemoteObject, and we want to use it as intended
- **Goal**: Future-proof implementation using the standard NoDriver
patterns

## 📋 Changes Summary

- Replace custom serialization wrapper with direct RemoteObject API
usage
- Implement proper RemoteObject detection and conversion in
web_execute()
- Add comprehensive _convert_remote_object_value() method for recursive
conversion
- Handle key/value list format from deep_serialized_value.value
- Add type guards and proper type checking for RemoteObject instances
- Maintain internal API stability while using RemoteObject as intended
- Add 19 comprehensive test cases covering all conversion scenarios
- Application tested and working with real ad download, update and
publish

### ⚙️ Type of Change
- [x]  New feature (adds new functionality without breaking existing
usage)
- [x] 🐞 Bug fix (non-breaking change which fixes an issue)

##  Checklist
- [x] I have reviewed my changes to ensure they meet the project's
standards.
- [x] I have tested my changes and ensured that all tests pass (pdm run
test).
- [x] I have formatted the code (pdm run format).
- [x] I have verified that linting passes (pdm run lint).
- [x] I have updated documentation where necessary.

By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.
This commit is contained in:
Jens
2025-10-20 08:52:06 +02:00
committed by GitHub
parent a9643d916f
commit 339d66ed47
6 changed files with 262 additions and 419 deletions

View File

@@ -468,13 +468,13 @@ class TestWebScrapingBrowserConfiguration:
def add_extension(self, ext:str) -> None:
self._extensions.append(ext) # Use private extensions list
# Mock nodriver.start to return a mock browser
# Mock nodriver.start to return a mock browser # type: ignore[attr-defined]
mock_browser = AsyncMock()
mock_browser.websocket_url = "ws://localhost:9222"
monkeypatch.setattr(nodriver, "start", AsyncMock(return_value = mock_browser))
# Mock Config class
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue]
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined]
# Mock os.path.exists to return True for the browser binary and use real exists for Preferences file (and Edge)
edge_path = "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"
@@ -557,13 +557,13 @@ class TestWebScrapingBrowserConfiguration:
def add_extension(self, ext:str) -> None:
self.extensions.append(ext)
# Mock nodriver.start to return a mock browser
# Mock nodriver.start to return a mock browser # type: ignore[attr-defined]
mock_browser = AsyncMock()
mock_browser.websocket_url = "ws://localhost:9222"
monkeypatch.setattr(nodriver, "start", AsyncMock(return_value = mock_browser))
# Mock Config class
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue]
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined]
# Mock os.path.exists to return True for both Chrome and Edge paths
monkeypatch.setattr(os.path, "exists", lambda p: p in {"/usr/bin/chrome", "/usr/bin/edge"})
@@ -576,7 +576,7 @@ class TestWebScrapingBrowserConfiguration:
await scraper.create_browser_session()
# Verify browser arguments
config = cast(Mock, nodriver.start).call_args[0][0]
config = cast(Mock, nodriver.start).call_args[0][0] # type: ignore[attr-defined]
assert "--custom-arg=value" in config.browser_args
assert "--another-arg" in config.browser_args
assert "--incognito" in config.browser_args
@@ -589,7 +589,7 @@ class TestWebScrapingBrowserConfiguration:
await scraper.create_browser_session()
# Verify Edge-specific arguments
config = cast(Mock, nodriver.start).call_args[0][0]
config = cast(Mock, nodriver.start).call_args[0][0] # type: ignore[attr-defined]
assert "-inprivate" in config.browser_args
assert os.environ.get("MSEDGEDRIVER_TELEMETRY_OPTOUT") == "1"
@@ -620,13 +620,13 @@ class TestWebScrapingBrowserConfiguration:
with zipfile.ZipFile(ext2, "w") as z:
z.writestr("manifest.json", '{"name": "Test Extension 2"}')
# Mock nodriver.start to return a mock browser
# Mock nodriver.start to return a mock browser # type: ignore[attr-defined]
mock_browser = AsyncMock()
mock_browser.websocket_url = "ws://localhost:9222"
monkeypatch.setattr(nodriver, "start", AsyncMock(return_value = mock_browser))
# Mock Config class
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue]
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined]
# Mock os.path.exists to return True for browser binaries and extension files, real_exists for others
real_exists = os.path.exists
@@ -644,7 +644,7 @@ class TestWebScrapingBrowserConfiguration:
await scraper.create_browser_session()
# Verify extensions were loaded
config = cast(Mock, nodriver.start).call_args[0][0]
config = cast(Mock, nodriver.start).call_args[0][0] # type: ignore[attr-defined]
assert len(config._extensions) == 2
for ext_path in config._extensions:
assert os.path.exists(ext_path)
@@ -713,11 +713,11 @@ class TestWebScrapingBrowserConfiguration:
def add_extension(self, ext:str) -> None:
self._extensions.append(ext)
# Mock nodriver.start to return a mock browser
# Mock nodriver.start to return a mock browser # type: ignore[attr-defined]
mock_browser = AsyncMock()
mock_browser.websocket_url = "ws://localhost:9222"
monkeypatch.setattr(nodriver, "start", AsyncMock(return_value = mock_browser))
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue]
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined]
monkeypatch.setattr(os.path, "exists", lambda p: True)
# Simulate state file in user_data_dir
@@ -772,7 +772,7 @@ class TestWebScrapingBrowserConfiguration:
temp_file = tmp_path / "temp_resource"
temp_file.write_text("test")
# Mock nodriver.start to raise an exception
# Mock nodriver.start to raise an exception # type: ignore[attr-defined]
async def mock_start_fail(*args:object, **kwargs:object) -> NoReturn:
if temp_file.exists():
temp_file.unlink()
@@ -786,7 +786,7 @@ class TestWebScrapingBrowserConfiguration:
return mock_browser
monkeypatch.setattr(nodriver, "start", mock_start_fail)
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue]
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined]
# Don't mock os.path.exists globally - let the file operations work normally
# Attempt to create a session
@@ -801,7 +801,7 @@ class TestWebScrapingBrowserConfiguration:
assert scraper.browser is None
assert scraper.page is None
# Now patch nodriver.start to return a new mock browser each time
# Now patch nodriver.start to return a new mock browser each time # type: ignore[attr-defined]
mock_browser = make_mock_browser()
mock_page = TrulyAwaitableMockPage()
mock_browser.get = AsyncMock(return_value = mock_page)
@@ -846,7 +846,7 @@ class TestWebScrapingBrowserConfiguration:
mock_page = TrulyAwaitableMockPage()
mock_browser.get = AsyncMock(return_value = mock_page)
monkeypatch.setattr(nodriver, "start", AsyncMock(return_value = mock_browser))
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue]
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined]
monkeypatch.setattr(os.path, "exists", lambda p: True)
# Mock create_browser_session to ensure proper setup

View File

@@ -1,25 +1,25 @@
# SPDX-FileCopyrightText: © Jens Bergmann and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""Unit tests for web_scraping_mixin.py RemoteObject handling.
"""Unit tests for web_scraping_mixin.py JavaScript serialization handling.
Tests the conversion of nodriver RemoteObject results to regular Python objects.
Tests the JSON serialization approach to ensure regular Python objects are returned.
"""
from typing import Any
from unittest.mock import AsyncMock, Mock, patch
from unittest.mock import AsyncMock, patch
import pytest
from kleinanzeigen_bot.utils.web_scraping_mixin import WebScrapingMixin
class TestWebExecuteRemoteObjectHandling:
"""Test web_execute method with nodriver 0.47+ RemoteObject behavior."""
class TestWebExecuteJavaScriptSerialization:
"""Test web_execute method with JSON serialization approach."""
@pytest.mark.asyncio
async def test_web_execute_with_regular_result(self) -> None:
"""Test web_execute with regular (non-RemoteObject) result."""
"""Test web_execute with regular result."""
mixin = WebScrapingMixin()
with patch.object(mixin, "page") as mock_page:
@@ -30,386 +30,240 @@ class TestWebExecuteRemoteObjectHandling:
assert result == "regular_result"
@pytest.mark.asyncio
async def test_web_execute_with_remoteobject_result(self) -> None:
"""Test web_execute with RemoteObject result."""
async def test_web_execute_with_dict_result(self) -> None:
"""Test web_execute with dict result."""
mixin = WebScrapingMixin()
# Mock RemoteObject
mock_remote_object = Mock()
mock_remote_object.deep_serialized_value = Mock()
mock_remote_object.deep_serialized_value.value = {"key": "value"}
with patch.object(mixin, "page") as mock_page:
mock_page.evaluate = AsyncMock(return_value = mock_remote_object)
mock_page.evaluate = AsyncMock(return_value = {"key": "value"})
result = await mixin.web_execute("window.test")
assert result == {"key": "value"}
@pytest.mark.asyncio
async def test_web_execute_remoteobject_with_nested_type_value_structures(self) -> None:
"""Test web_execute with RemoteObject containing nested type/value structures."""
async def test_web_execute_with_complex_dict_result(self) -> None:
"""Test web_execute with complex dict result."""
mixin = WebScrapingMixin()
# Mock RemoteObject with nested type/value structures
mock_remote_object = Mock()
mock_remote_object.deep_serialized_value = Mock()
mock_remote_object.deep_serialized_value.value = [
["statusCode", {"type": "number", "value": 200}],
["content", {"type": "string", "value": "success"}]
]
with patch.object(mixin, "page") as mock_page:
mock_page.evaluate = AsyncMock(return_value = mock_remote_object)
mock_page.evaluate = AsyncMock(return_value = {
"statusCode": 200,
"content": "success",
"nested": {"key": "value"}
})
result = await mixin.web_execute("window.test")
# Should convert nested type/value structures to their values
assert result == {"statusCode": 200, "content": "success"}
@pytest.mark.asyncio
async def test_web_execute_remoteobject_with_mixed_nested_structures(self) -> None:
"""Test web_execute with RemoteObject containing mixed nested structures."""
mixin = WebScrapingMixin()
# Mock RemoteObject with mixed nested structures
mock_remote_object = Mock()
mock_remote_object.deep_serialized_value = Mock()
mock_remote_object.deep_serialized_value.value = {
"simple": "value",
"nested": {"type": "number", "value": 42},
"list": [{"type": "string", "value": "item1"}, {"type": "string", "value": "item2"}]
}
with patch.object(mixin, "page") as mock_page:
mock_page.evaluate = AsyncMock(return_value = mock_remote_object)
result = await mixin.web_execute("window.test")
# Should convert nested structures while preserving simple values
expected = {
"simple": "value",
"nested": 42,
"list": ["item1", "item2"]
"statusCode": 200,
"content": "success",
"nested": {"key": "value"}
}
assert result == expected
class TestConvertRemoteObjectResult:
"""Test _convert_remote_object_result method for RemoteObject conversion."""
def test_convert_remote_object_result_with_none_deep_serialized_value(self) -> None:
"""Test _convert_remote_object_result when deep_serialized_value is None."""
@pytest.mark.asyncio
async def test_web_execute_with_remoteobject_conversion(self) -> None:
"""Test web_execute with RemoteObject conversion."""
mixin = WebScrapingMixin()
# Mock RemoteObject with None deep_serialized_value
mock_remote_object = Mock()
mock_remote_object.deep_serialized_value = None
# Test the _convert_remote_object_value method directly
test_data = [["key1", "value1"], ["key2", "value2"]]
result = mixin._convert_remote_object_value(test_data)
result = mixin._convert_remote_object_result(mock_remote_object)
assert result == mock_remote_object
def test_convert_remote_object_result_with_none_serialized_data(self) -> None:
"""Test _convert_remote_object_result when serialized_data is None."""
mixin = WebScrapingMixin()
# Mock RemoteObject with None serialized_data
mock_remote_object = Mock()
mock_remote_object.deep_serialized_value = Mock()
mock_remote_object.deep_serialized_value.value = None
result = mixin._convert_remote_object_result(mock_remote_object)
assert result == mock_remote_object
def test_convert_remote_object_result_with_list_data(self) -> None:
"""Test _convert_remote_object_result with list data."""
mixin = WebScrapingMixin()
# Mock RemoteObject with list data
mock_remote_object = Mock()
mock_remote_object.deep_serialized_value = Mock()
mock_remote_object.deep_serialized_value.value = [
["key1", "value1"],
["key2", "value2"]
]
result = mixin._convert_remote_object_result(mock_remote_object)
# Should convert key/value list to dict
assert result == {"key1": "value1", "key2": "value2"}
def test_convert_remote_object_result_with_dict_data(self) -> None:
"""Test _convert_remote_object_result with dict data."""
def test_convert_remote_object_value_key_value_list(self) -> None:
"""Test _convert_remote_object_value with key/value list format."""
mixin = WebScrapingMixin()
# Mock RemoteObject with dict data
mock_remote_object = Mock()
mock_remote_object.deep_serialized_value = Mock()
mock_remote_object.deep_serialized_value.value = {"key": "value"}
result = mixin._convert_remote_object_result(mock_remote_object)
assert result == {"key": "value"}
def test_convert_remote_object_result_with_conversion_error(self) -> None:
"""Test _convert_remote_object_result when conversion raises an exception."""
mixin = WebScrapingMixin()
# Mock RemoteObject that will raise an exception during conversion
mock_remote_object = Mock()
mock_remote_object.deep_serialized_value = Mock()
mock_remote_object.deep_serialized_value.value = "invalid_data"
# Mock the _convert_remote_object_dict to raise an exception
with patch.object(mixin, "_convert_remote_object_dict", side_effect = ValueError("Test error")):
result = mixin._convert_remote_object_result(mock_remote_object)
# When conversion fails, it should return the original value
assert result == "invalid_data"
class TestConvertRemoteObjectDict:
"""Test _convert_remote_object_dict method for nested RemoteObject conversion."""
def test_convert_remote_object_dict_with_type_value_pair(self) -> None:
"""Test conversion of type/value pair structures."""
mixin = WebScrapingMixin()
# Test type/value pair
data = {"type": "number", "value": 200}
result = mixin._convert_remote_object_dict(data)
assert result == 200
# Test string type/value pair
data = {"type": "string", "value": "hello"}
result = mixin._convert_remote_object_dict(data)
assert result == "hello"
def test_convert_remote_object_dict_with_regular_dict(self) -> None:
"""Test conversion of regular dict structures."""
mixin = WebScrapingMixin()
# Test regular dict (not type/value pair)
data = {"key1": "value1", "key2": "value2"}
result = mixin._convert_remote_object_dict(data)
# Test key/value list format
test_data = [["key1", "value1"], ["key2", "value2"]]
result = mixin._convert_remote_object_value(test_data)
assert result == {"key1": "value1", "key2": "value2"}
def test_convert_remote_object_dict_with_nested_structures(self) -> None:
"""Test conversion of nested dict structures."""
def test_convert_remote_object_value_with_nested_type_value(self) -> None:
"""Test _convert_remote_object_value with nested type/value structures."""
mixin = WebScrapingMixin()
# Test nested structures
data = {
"simple": "value",
"nested": {"type": "number", "value": 42},
"list": [{"type": "string", "value": "item1"}, {"type": "string", "value": "item2"}]
}
result = mixin._convert_remote_object_dict(data)
# Test with nested type/value structures
test_data = [["key1", {"type": "string", "value": "nested_value"}]]
result = mixin._convert_remote_object_value(test_data)
assert result == {"key1": "nested_value"}
expected = {
"simple": "value",
"nested": 42,
"list": ["item1", "item2"]
}
assert result == expected
def test_convert_remote_object_dict_with_list(self) -> None:
"""Test conversion of list structures."""
def test_convert_remote_object_value_regular_list(self) -> None:
"""Test _convert_remote_object_value with regular list."""
mixin = WebScrapingMixin()
# Test list with type/value pairs
data = [{"type": "number", "value": 1}, {"type": "string", "value": "test"}]
result = mixin._convert_remote_object_dict(data)
assert result == [1, "test"]
# Test regular list (not key/value format)
test_data = ["item1", "item2", "item3"]
result = mixin._convert_remote_object_value(test_data)
assert result == ["item1", "item2", "item3"]
def test_convert_remote_object_dict_with_primitive_values(self) -> None:
"""Test conversion with primitive values."""
def test_convert_remote_object_value_nested_list(self) -> None:
"""Test _convert_remote_object_value with nested list."""
mixin = WebScrapingMixin()
# Test nested list that looks like key/value pairs (gets converted to dict)
test_data = [["nested", "list"], ["another", "item"]]
result = mixin._convert_remote_object_value(test_data)
assert result == {"nested": "list", "another": "item"}
def test_convert_remote_object_value_type_value_dict(self) -> None:
"""Test _convert_remote_object_value with type/value dict."""
mixin = WebScrapingMixin()
# Test type/value dict
test_data = {"type": "string", "value": "actual_value"}
result = mixin._convert_remote_object_value(test_data)
assert result == "actual_value"
def test_convert_remote_object_value_regular_dict(self) -> None:
"""Test _convert_remote_object_value with regular dict."""
mixin = WebScrapingMixin()
# Test regular dict
test_data = {"key1": "value1", "key2": "value2"}
result = mixin._convert_remote_object_value(test_data)
assert result == {"key1": "value1", "key2": "value2"}
def test_convert_remote_object_value_nested_dict(self) -> None:
"""Test _convert_remote_object_value with nested dict."""
mixin = WebScrapingMixin()
# Test nested dict
test_data = {"key1": {"nested": "value"}, "key2": "value2"}
result = mixin._convert_remote_object_value(test_data)
assert result == {"key1": {"nested": "value"}, "key2": "value2"}
def test_convert_remote_object_value_primitive(self) -> None:
"""Test _convert_remote_object_value with primitive values."""
mixin = WebScrapingMixin()
# Test primitive values
assert mixin._convert_remote_object_dict("string") == "string"
assert mixin._convert_remote_object_dict(42) == 42
assert mixin._convert_remote_object_dict(True) is True
assert mixin._convert_remote_object_dict(None) is None
assert mixin._convert_remote_object_value("string") == "string"
assert mixin._convert_remote_object_value(123) == 123
assert mixin._convert_remote_object_value(True) is True
assert mixin._convert_remote_object_value(None) is None
def test_convert_remote_object_dict_with_complex_nested_structures(self) -> None:
"""Test conversion with complex nested structures."""
def test_convert_remote_object_value_malformed_key_value_pair(self) -> None:
"""Test _convert_remote_object_value with malformed key/value pairs."""
mixin = WebScrapingMixin()
# Test complex nested structures
data = {
"response": {
"status": {"type": "number", "value": 200},
"data": [
{"type": "string", "value": "item1"},
{"type": "string", "value": "item2"}
],
"metadata": {
"count": {"type": "number", "value": 2},
"type": {"type": "string", "value": "list"}
}
}
}
result = mixin._convert_remote_object_dict(data)
# Test with malformed key/value pairs (wrong length)
test_data = [["key1", "value1"], ["key2"]] # Second item has wrong length
result = mixin._convert_remote_object_value(test_data)
# Should still convert the valid pairs and skip malformed ones
assert result == {"key1": "value1"}
def test_convert_remote_object_value_empty_list(self) -> None:
"""Test _convert_remote_object_value with empty list."""
mixin = WebScrapingMixin()
# Test empty list
test_data:list[Any] = []
result = mixin._convert_remote_object_value(test_data)
assert result == []
def test_convert_remote_object_value_complex_nested_structure(self) -> None:
"""Test _convert_remote_object_value with complex nested structure."""
mixin = WebScrapingMixin()
# Test complex nested structure
test_data = [
["key1", "value1"],
["key2", {"type": "object", "value": {"nested": "value"}}],
["key3", [["inner_key", "inner_value"]]]
]
result = mixin._convert_remote_object_value(test_data)
expected = {
"response": {
"status": 200,
"data": ["item1", "item2"],
"metadata": {
"count": 2,
"type": "list"
}
}
"key1": "value1",
"key2": {"nested": "value"},
"key3": {"inner_key": "inner_value"} # The inner list gets converted to dict too
}
assert result == expected
class TestWebRequestRemoteObjectHandling:
"""Test web_request method with nodriver 0.47+ RemoteObject behavior."""
@pytest.mark.asyncio
async def test_web_request_with_remoteobject_result(self) -> None:
"""Test web_request with RemoteObject result to catch subscriptability issues."""
async def test_web_execute_remoteobject_exception_handling(self) -> None:
"""Test web_execute with RemoteObject exception handling."""
mixin = WebScrapingMixin()
# Mock RemoteObject that simulates the exact structure returned by web_request
mock_remote_object = Mock()
mock_remote_object.deep_serialized_value = Mock()
mock_remote_object.deep_serialized_value.value = {
"statusCode": 200,
"statusMessage": "OK",
"headers": {"content-type": "application/json"},
"content": '{"success": true}'
}
# Create a mock RemoteObject that will raise an exception
mock_remote_object = type("MockRemoteObject", (), {
"__class__": type("MockClass", (), {"__name__": "RemoteObject"}),
"value": None,
"deep_serialized_value": None
})()
with patch.object(mixin, "web_execute") as mock_web_execute:
# Mock web_execute to return the converted result (simulating our fix)
mock_web_execute.return_value = {
"statusCode": 200,
"statusMessage": "OK",
"headers": {"content-type": "application/json"},
"content": '{"success": true}'
}
with patch.object(mixin, "page") as mock_page:
mock_page.evaluate = AsyncMock(return_value = mock_remote_object)
result = await mixin.web_request("https://example.com/api")
# Mock the _convert_remote_object_value to raise an exception
with patch.object(mixin, "_convert_remote_object_value", side_effect = Exception("Test exception")):
result = await mixin.web_execute("window.test")
# Verify the result is properly converted and subscriptable
assert result["statusCode"] == 200
assert result["statusMessage"] == "OK"
assert result["headers"]["content-type"] == "application/json"
assert result["content"] == '{"success": true}'
# Should return the original RemoteObject when exception occurs
assert result == mock_remote_object
@pytest.mark.asyncio
async def test_web_request_with_remoteobject_error_response(self) -> None:
"""Test web_request with RemoteObject error response."""
async def test_web_execute_remoteobject_with_value(self) -> None:
"""Test web_execute with RemoteObject that has a value."""
mixin = WebScrapingMixin()
# Mock RemoteObject for error response
mock_remote_object = Mock()
mock_remote_object.deep_serialized_value = Mock()
mock_remote_object.deep_serialized_value.value = {
"statusCode": 404,
"statusMessage": "Not Found",
"headers": {"content-type": "text/html"},
"content": "<html>Not Found</html>"
}
# Create a mock RemoteObject with a value
mock_remote_object = type("MockRemoteObject", (), {
"__class__": type("MockClass", (), {"__name__": "RemoteObject"}),
"value": "test_value",
"deep_serialized_value": None
})()
with patch.object(mixin, "web_execute") as mock_web_execute:
# Mock web_execute to return the converted result (simulating our fix)
mock_web_execute.return_value = {
"statusCode": 404,
"statusMessage": "Not Found",
"headers": {"content-type": "text/html"},
"content": "<html>Not Found</html>"
}
with patch.object(mixin, "page") as mock_page:
mock_page.evaluate = AsyncMock(return_value = mock_remote_object)
# This should raise an exception due to invalid status code
with pytest.raises(Exception, match = "Invalid response"):
await mixin.web_request("https://example.com/api", valid_response_codes = [200])
result = await mixin.web_execute("window.test")
# Should return the value directly
assert result == "test_value"
@pytest.mark.asyncio
async def test_web_request_with_nested_remoteobject_structures(self) -> None:
"""Test web_request with complex nested RemoteObject structures."""
async def test_web_execute_remoteobject_with_deep_serialized_value(self) -> None:
"""Test web_execute with RemoteObject that has deep_serialized_value."""
mixin = WebScrapingMixin()
# Mock RemoteObject with nested type/value structures
mock_remote_object = Mock()
mock_remote_object.deep_serialized_value = Mock()
mock_remote_object.deep_serialized_value.value = {
"statusCode": {"type": "number", "value": 200},
"statusMessage": {"type": "string", "value": "OK"},
"headers": {
"content-type": {"type": "string", "value": "application/json"}
},
"content": {"type": "string", "value": '{"data": "test"}'}
}
# Create a mock RemoteObject with deep_serialized_value
mock_remote_object = type("MockRemoteObject", (), {
"__class__": type("MockClass", (), {"__name__": "RemoteObject"}),
"value": None,
"deep_serialized_value": type("MockDeepSerialized", (), {
"value": [["key1", "value1"], ["key2", "value2"]]
})()
})()
with patch.object(mixin, "web_execute") as mock_web_execute:
# Mock web_execute to return the converted result (simulating our fix)
mock_web_execute.return_value = {
"statusCode": 200,
"statusMessage": "OK",
"headers": {"content-type": "application/json"},
"content": '{"data": "test"}'
}
with patch.object(mixin, "page") as mock_page:
mock_page.evaluate = AsyncMock(return_value = mock_remote_object)
result = await mixin.web_request("https://example.com/api")
result = await mixin.web_execute("window.test")
# Verify nested structures are properly converted
assert result["statusCode"] == 200
assert result["statusMessage"] == "OK"
assert result["headers"]["content-type"] == "application/json"
assert result["content"] == '{"data": "test"}'
class TestWebRequestRemoteObjectRegression:
"""Test web_request method to catch future RemoteObject regression issues."""
# Should convert the deep_serialized_value
assert result == {"key1": "value1", "key2": "value2"}
@pytest.mark.asyncio
async def test_web_request_without_remoteobject_conversion_fails(self) -> None:
"""Test that web_request fails without RemoteObject conversion (regression test)."""
async def test_web_execute_remoteobject_fallback(self) -> None:
"""Test web_execute with RemoteObject fallback when no value or deep_serialized_value."""
mixin = WebScrapingMixin()
# Mock RemoteObject that would cause the original error
mock_remote_object = Mock()
mock_remote_object.deep_serialized_value = Mock()
mock_remote_object.deep_serialized_value.value = {
"statusCode": 200,
"statusMessage": "OK",
"headers": {"content-type": "application/json"},
"content": '{"success": true}'
}
# Create a mock RemoteObject with no value or deep_serialized_value
mock_remote_object = type("MockRemoteObject", (), {
"__class__": type("MockClass", (), {"__name__": "RemoteObject"}),
"value": None,
"deep_serialized_value": None
})()
# Mock web_execute to return the raw RemoteObject (simulating the bug)
with patch.object(mixin, "web_execute") as mock_web_execute:
mock_web_execute.return_value = mock_remote_object
with patch.object(mixin, "page") as mock_page:
mock_page.evaluate = AsyncMock(return_value = mock_remote_object)
# This should fail with the original error if our fix is removed
with pytest.raises(TypeError, match = "object is not subscriptable"):
await mixin.web_request("https://example.com/api")
result = await mixin.web_execute("window.test")
@pytest.mark.asyncio
async def test_web_request_with_remoteobject_conversion_succeeds(self) -> None:
"""Test that web_request succeeds with RemoteObject conversion (our fix)."""
mixin = WebScrapingMixin()
# Mock RemoteObject
mock_remote_object = Mock()
mock_remote_object.deep_serialized_value = Mock()
mock_remote_object.deep_serialized_value.value = {
"statusCode": 200,
"statusMessage": "OK",
"headers": {"content-type": "application/json"},
"content": '{"success": true}'
}
# Mock web_execute to simulate our conversion logic
async def mock_web_execute_with_conversion(script:str) -> Any:
# Simulate the conversion that happens in our fix
return mock_remote_object.deep_serialized_value.value
with patch.object(mixin, "web_execute", side_effect = mock_web_execute_with_conversion):
result = await mixin.web_request("https://example.com/api")
# Verify the result works correctly
assert result["statusCode"] == 200
assert result["statusMessage"] == "OK"
assert result["headers"]["content-type"] == "application/json"
assert result["content"] == '{"success": true}'
# Should return the original RemoteObject as fallback
assert result == mock_remote_object