From 36ca178574949d93672e903a5c9c7e7beafdc5ad Mon Sep 17 00:00:00 2001 From: Jens <1742418+1cu@users.noreply.github.com> Date: Sun, 12 Oct 2025 21:22:46 +0200 Subject: [PATCH] feat: upgrade nodriver from 0.39 to 0.47 (#635) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## ℹ️ Description Upgrade nodriver dependency from pinned version 0.39.0 to latest 0.47.0 to resolve browser startup issues and JavaScript evaluation problems that affected versions 0.40-0.44. - Link to the related issue(s): Resolves nodriver compatibility issues - This upgrade addresses browser startup problems and window.BelenConf evaluation failures that were blocking the use of newer nodriver versions. ## 📋 Changes Summary - Updated nodriver dependency from pinned 0.39.0 to >=0.47.0 in pyproject.toml - Fixed RemoteObject handling in web_execute method for nodriver 0.47 compatibility - Added comprehensive BelenConf test fixture with real production data structure - Added integration test to validate window.BelenConf evaluation works correctly - Added German translation for new error message - Replaced real user data with privacy-safe dummy data in test fixtures ### 🔧 Type Safety Improvements **Added explicit `str()` conversions to resolve type inference issues:** The comprehensive BelenConf test fixture contains deeply nested data structures that caused pyright's type checker to infer complex dictionary types throughout the codebase. To ensure type safety and prevent runtime errors, I added explicit `str()` conversions in key locations: - **CSRF tokens**: `str(csrf_token)` - Ensures CSRF tokens are treated as strings - **Special attributes**: `str(special_attribute_value)` - Converts special attribute values to strings - **DOM attributes**: `str(special_attr_elem.attrs.id)` - Ensures element IDs are strings - **URL handling**: `str(current_img_url)` and `str(href_attributes)` - Converts URLs and href attributes to strings - **Price values**: `str(ad_cfg.price)` - Ensures price values are strings These conversions are defensive programming measures that ensure backward compatibility and prevent type-related runtime errors, even if the underlying data structures change in the future. ### ⚙️ Type of Change - [x] ✨ New feature (adds new functionality without breaking existing usage) - [ ] 🐞 Bug fix (non-breaking change which fixes an issue) - [ ] 💥 Breaking change (changes that might break existing user setups, scripts, or configurations) ## ✅ Checklist Before requesting a review, confirm the following: - [x] I have reviewed my changes to ensure they meet the project's standards. - [x] I have tested my changes and ensured that all tests pass (`pdm run test`). - [x] I have formatted the code (`pdm run format`). - [x] I have verified that linting passes (`pdm run lint`). - [x] I have updated documentation where necessary. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --- pdm.lock | 8 +- pyproject.toml | 6 +- src/kleinanzeigen_bot/__init__.py | 18 +-- src/kleinanzeigen_bot/extract.py | 14 +- .../resources/translations.de.yaml | 3 + .../utils/web_scraping_mixin.py | 18 +++ tests/conftest.py | 18 +++ tests/fixtures/belen_conf_sample.json | 128 +++++++++++++++++ .../test_web_scraping_mixin_integration.py | 66 +++++++++ tests/unit/test_extract.py | 25 ++++ tests/unit/test_init.py | 109 +++++++++++++- .../test_web_scraping_mixin_remoteobject.py | 134 ++++++++++++++++++ 12 files changed, 526 insertions(+), 21 deletions(-) create mode 100644 tests/fixtures/belen_conf_sample.json create mode 100644 tests/unit/test_web_scraping_mixin_remoteobject.py diff --git a/pdm.lock b/pdm.lock index 7ef5d8f..9ce2596 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:3bda32de316794f1c608898e17874857e2263ee1f3a5932440c630366cc40af2" +content_hash = "sha256:eecf66c65d3597b333bec94ec2b8efa5fba4f96d772c466ab6023550b8d7dd3a" [[metadata.targets]] requires_python = ">=3.10,<3.14" @@ -772,7 +772,7 @@ files = [ [[package]] name = "nodriver" -version = "0.39" +version = "0.47.0" requires_python = ">=3.9" summary = "[Docs here](https://ultrafunkamsterdam.github.io/nodriver)" groups = ["default"] @@ -782,8 +782,8 @@ dependencies = [ "websockets>=14", ] files = [ - {file = "nodriver-0.39-py3-none-any.whl", hash = "sha256:f245be52e6328393ece340a6dcbc8d5754fd7cf0838f0e1e40076944617178fc"}, - {file = "nodriver-0.39.tar.gz", hash = "sha256:af84f76215877c74166f95c8e7615268e31f6118f4c7291d201f29003f2248ef"}, + {file = "nodriver-0.47.0-py3-none-any.whl", hash = "sha256:2bdf69eac8fa33de09249fd513d6a70da95fc80809f9f99c4901cb3ae2e06219"}, + {file = "nodriver-0.47.0.tar.gz", hash = "sha256:5fc31182a4db725ea56fc042269a29a13e55a2baf83dfdd730aa851dd5269608"}, ] [[package]] diff --git a/pyproject.toml b/pyproject.toml index cde93f1..9207389 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ dependencies = [ "certifi", "colorama", "jaraco.text", # required by pkg_resources during runtime - "nodriver==0.39.0", # 0.40-0.44 have issues starting browsers and evaluating self.web_execute("window.BelenConf") fails + "nodriver>=0.47.0", # Updated from 0.39.0 - 0.40-0.44 had issues starting browsers and evaluating self.web_execute("window.BelenConf") fails "pydantic>=2.0.0", "ruamel.yaml", "psutil", @@ -366,6 +366,10 @@ data_file = ".temp/coverage.sqlite" branch = true # track branch coverage relative_files = true +[tool.coverage.report] +precision = 2 +show_missing = true +skip_covered = false ##################### # yamlfix diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py index 15be46b..772e183 100644 --- a/src/kleinanzeigen_bot/__init__.py +++ b/src/kleinanzeigen_bot/__init__.py @@ -690,13 +690,13 @@ class KleinanzeigenBot(WebScrapingMixin): await self.web_request( url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={published_ad_id}", method = "POST", - headers = {"x-csrf-token": csrf_token} + headers = {"x-csrf-token": str(csrf_token)} ) elif ad_cfg.id: await self.web_request( url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={ad_cfg.id}", method = "POST", - headers = {"x-csrf-token": csrf_token}, + headers = {"x-csrf-token": str(csrf_token)}, valid_response_codes = [200, 404] ) @@ -1048,12 +1048,14 @@ class KleinanzeigenBot(WebScrapingMixin): LOG.debug("Found %i special attributes", len(ad_cfg.special_attributes)) for special_attribute_key, special_attribute_value in ad_cfg.special_attributes.items(): + # Ensure special_attribute_value is treated as a string + special_attribute_value_str = str(special_attribute_value) if special_attribute_key == "condition_s": - await self.__set_condition(special_attribute_value) + await self.__set_condition(special_attribute_value_str) continue - LOG.debug("Setting special attribute [%s] to [%s]...", special_attribute_key, special_attribute_value) + LOG.debug("Setting special attribute [%s] to [%s]...", special_attribute_key, special_attribute_value_str) try: # if the