mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
## ℹ️ Description Upgrade nodriver dependency from pinned version 0.39.0 to latest 0.47.0 to resolve browser startup issues and JavaScript evaluation problems that affected versions 0.40-0.44. - Link to the related issue(s): Resolves nodriver compatibility issues - This upgrade addresses browser startup problems and window.BelenConf evaluation failures that were blocking the use of newer nodriver versions. ## 📋 Changes Summary - Updated nodriver dependency from pinned 0.39.0 to >=0.47.0 in pyproject.toml - Fixed RemoteObject handling in web_execute method for nodriver 0.47 compatibility - Added comprehensive BelenConf test fixture with real production data structure - Added integration test to validate window.BelenConf evaluation works correctly - Added German translation for new error message - Replaced real user data with privacy-safe dummy data in test fixtures ### 🔧 Type Safety Improvements **Added explicit `str()` conversions to resolve type inference issues:** The comprehensive BelenConf test fixture contains deeply nested data structures that caused pyright's type checker to infer complex dictionary types throughout the codebase. To ensure type safety and prevent runtime errors, I added explicit `str()` conversions in key locations: - **CSRF tokens**: `str(csrf_token)` - Ensures CSRF tokens are treated as strings - **Special attributes**: `str(special_attribute_value)` - Converts special attribute values to strings - **DOM attributes**: `str(special_attr_elem.attrs.id)` - Ensures element IDs are strings - **URL handling**: `str(current_img_url)` and `str(href_attributes)` - Converts URLs and href attributes to strings - **Price values**: `str(ad_cfg.price)` - Ensures price values are strings These conversions are defensive programming measures that ensure backward compatibility and prevent type-related runtime errors, even if the underlying data structures change in the future. ### ⚙️ Type of Change - [x] ✨ New feature (adds new functionality without breaking existing usage) - [ ] 🐞 Bug fix (non-breaking change which fixes an issue) - [ ] 💥 Breaking change (changes that might break existing user setups, scripts, or configurations) ## ✅ Checklist Before requesting a review, confirm the following: - [x] I have reviewed my changes to ensure they meet the project's standards. - [x] I have tested my changes and ensured that all tests pass (`pdm run test`). - [x] I have formatted the code (`pdm run format`). - [x] I have verified that linting passes (`pdm run lint`). - [x] I have updated documentation where necessary. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
106 lines
4.3 KiB
Python
106 lines
4.3 KiB
Python
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
|
import os
|
|
import platform
|
|
from typing import cast
|
|
|
|
import nodriver
|
|
import pytest
|
|
|
|
from kleinanzeigen_bot.utils import loggers
|
|
from kleinanzeigen_bot.utils.misc import ensure
|
|
from kleinanzeigen_bot.utils.web_scraping_mixin import WebScrapingMixin
|
|
|
|
if os.environ.get("CI"):
|
|
loggers.get_logger("kleinanzeigen_bot").setLevel(loggers.DEBUG)
|
|
loggers.get_logger("nodriver").setLevel(loggers.DEBUG)
|
|
|
|
|
|
async def atest_init() -> None:
|
|
web_scraping_mixin = WebScrapingMixin()
|
|
if platform.system() == "Linux":
|
|
# required for Ubuntu 24.04 or newer
|
|
cast(list[str], web_scraping_mixin.browser_config.arguments).append("--no-sandbox")
|
|
|
|
browser_path = web_scraping_mixin.get_compatible_browser()
|
|
ensure(browser_path is not None, "Browser not auto-detected")
|
|
|
|
web_scraping_mixin.close_browser_session()
|
|
try:
|
|
await web_scraping_mixin.create_browser_session()
|
|
finally:
|
|
web_scraping_mixin.close_browser_session()
|
|
|
|
|
|
@pytest.mark.flaky(reruns = 4, reruns_delay = 5)
|
|
@pytest.mark.itest
|
|
def test_init() -> None:
|
|
nodriver.loop().run_until_complete(atest_init())
|
|
|
|
|
|
async def atest_belen_conf_evaluation() -> None:
|
|
"""Test that window.BelenConf can be evaluated correctly with nodriver."""
|
|
web_scraping_mixin = WebScrapingMixin()
|
|
if platform.system() == "Linux":
|
|
# required for Ubuntu 24.04 or newer
|
|
cast(list[str], web_scraping_mixin.browser_config.arguments).append("--no-sandbox")
|
|
|
|
browser_path = web_scraping_mixin.get_compatible_browser()
|
|
ensure(browser_path is not None, "Browser not auto-detected")
|
|
|
|
web_scraping_mixin.close_browser_session()
|
|
try:
|
|
await web_scraping_mixin.create_browser_session()
|
|
|
|
# Navigate to a simple page that can execute JavaScript
|
|
html_content = (
|
|
"data:text/html,<html><body><script>"
|
|
"window.BelenConf = {test: 'data', universalAnalyticsOpts: "
|
|
"{dimensions: {dimension92: 'test', dimension108: 'art_s:test'}}};"
|
|
"</script></body></html>"
|
|
)
|
|
await web_scraping_mixin.web_open(html_content)
|
|
await web_scraping_mixin.web_sleep(1000, 2000) # Wait for page to load
|
|
|
|
# Test JavaScript evaluation - this is the critical test for nodriver 0.40-0.44 issues
|
|
belen_conf = await web_scraping_mixin.web_execute("window.BelenConf")
|
|
|
|
# Verify the evaluation worked
|
|
assert belen_conf is not None, "window.BelenConf evaluation returned None"
|
|
|
|
# In nodriver 0.47+, JavaScript objects are returned as RemoteObject instances
|
|
# We need to check if it's either a dict (old behavior) or RemoteObject (new behavior)
|
|
is_dict = isinstance(belen_conf, dict)
|
|
is_remote_object = hasattr(belen_conf, "deep_serialized_value") and belen_conf.deep_serialized_value is not None
|
|
|
|
assert is_dict or is_remote_object, f"window.BelenConf should be a dict or RemoteObject, got {type(belen_conf)}"
|
|
|
|
if is_dict:
|
|
# Old behavior - direct dict access
|
|
assert "test" in belen_conf, "window.BelenConf should contain test data"
|
|
assert "universalAnalyticsOpts" in belen_conf, "window.BelenConf should contain universalAnalyticsOpts"
|
|
else:
|
|
# New behavior - RemoteObject with deep_serialized_value
|
|
assert hasattr(belen_conf, "deep_serialized_value"), "RemoteObject should have deep_serialized_value"
|
|
assert belen_conf.deep_serialized_value is not None, "deep_serialized_value should not be None"
|
|
|
|
if is_dict:
|
|
print(f"[OK] BelenConf evaluation successful: {list(belen_conf.keys())}")
|
|
else:
|
|
print("[OK] BelenConf evaluation successful: RemoteObject with deep_serialized_value")
|
|
|
|
finally:
|
|
web_scraping_mixin.close_browser_session()
|
|
|
|
|
|
@pytest.mark.flaky(reruns = 4, reruns_delay = 5)
|
|
@pytest.mark.itest
|
|
def test_belen_conf_evaluation() -> None:
|
|
"""Test that window.BelenConf JavaScript evaluation works correctly.
|
|
|
|
This test specifically validates the issue that affected nodriver 0.40-0.44
|
|
where window.BelenConf evaluation would fail.
|
|
"""
|
|
nodriver.loop().run_until_complete(atest_belen_conf_evaluation())
|