From 85a5cf5224679471954873b563622626a5337b2c Mon Sep 17 00:00:00 2001 From: sebthom Date: Thu, 15 May 2025 11:48:57 +0200 Subject: [PATCH] feat: improve content_hash calculation --- pyproject.toml | 7 +- schemas/ad.schema.json | 91 +++++++++---- src/kleinanzeigen_bot/__init__.py | 81 ++++++++--- src/kleinanzeigen_bot/ads.py | 84 ------------ src/kleinanzeigen_bot/extract.py | 13 +- src/kleinanzeigen_bot/model/ad_model.py | 126 ++++++++++++++---- src/kleinanzeigen_bot/model/config_model.py | 22 +-- .../resources/translations.de.yaml | 6 + src/kleinanzeigen_bot/utils/loggers.py | 4 +- tests/unit/test_ad_model.py | 39 ++++++ tests/unit/test_ads.py | 115 ---------------- tests/unit/test_config_model.py | 62 +++++++++ tests/unit/test_init.py | 7 +- 13 files changed, 356 insertions(+), 301 deletions(-) delete mode 100644 src/kleinanzeigen_bot/ads.py create mode 100644 tests/unit/test_ad_model.py delete mode 100644 tests/unit/test_ads.py create mode 100644 tests/unit/test_config_model.py diff --git a/pyproject.toml b/pyproject.toml index c0fce64..85e0473 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -226,6 +226,7 @@ ignore = [ "E231", # Don't add whitespace after colon (:) on type declaration "E251", # Don't remove whitespace around parameter '=' sign. "E401", # Don't put imports on separate lines + "FIX002", # Line contains TODO, consider resolving the issue "PERF203", # `try`-`except` within a loop incurs performance overhead "RET504", # Unnecessary assignment to `...` before `return` statement "PLR6301", # Method `...` could be a function, class method, or static method @@ -234,6 +235,8 @@ ignore = [ "SIM105", # Use `contextlib.suppress(TimeoutError)` instead of `try`-`except`-`pass` "SIM114", # Combine `if` branches using logical `or` operator "TC006", # Add quotes to type expression in `typing.cast()` + "TD002", # Missing author in TODO + "TD003", # Missing issue link for this TODO ] [tool.ruff.lint.per-file-ignores] @@ -263,12 +266,12 @@ min-file-size = 256 # https://pylint.pycqa.org/en/latest/user_guide/configuration/all-options.html#design-checker # https://pylint.pycqa.org/en/latest/user_guide/checkers/features.html#design-checker-messages max-args = 6 # max. number of args for function / method (R0913) -# max-attributes = 15 # max. number of instance attrs for a class (R0902) +# max-attributes = 15 # TODO max. number of instance attrs for a class (R0902) max-branches = 40 # max. number of branch for function / method body (R0912) max-locals = 30 # max. number of local vars for function / method body (R0914) max-returns = 15 # max. number of return / yield for function / method body (R0911) max-statements = 150 # max. number of statements in function / method body (R0915) -max-public-methods = 20 # max. number of public methods for a class (R0904) +max-public-methods = 25 # max. number of public methods for a class (R0904) # max-positional-arguments = 5 # max. number of positional args for function / method (R0917) diff --git a/schemas/ad.schema.json b/schemas/ad.schema.json index 9e5b3d7..a2369a4 100644 --- a/schemas/ad.schema.json +++ b/schemas/ad.schema.json @@ -72,18 +72,32 @@ }, "properties": { "active": { - "default": true, - "title": "Active", - "type": "boolean" + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Active" }, "type": { - "default": "OFFER", - "enum": [ - "OFFER", - "WANTED" + "anyOf": [ + { + "enum": [ + "OFFER", + "WANTED" + ], + "type": "string" + }, + { + "type": "null" + } ], - "title": "Type", - "type": "string" + "default": null, + "title": "Type" }, "title": { "minLength": 10, @@ -150,25 +164,39 @@ "title": "Price" }, "price_type": { - "default": "NEGOTIABLE", - "enum": [ - "FIXED", - "NEGOTIABLE", - "GIVE_AWAY", - "NOT_APPLICABLE" + "anyOf": [ + { + "enum": [ + "FIXED", + "NEGOTIABLE", + "GIVE_AWAY", + "NOT_APPLICABLE" + ], + "type": "string" + }, + { + "type": "null" + } ], - "title": "Price Type", - "type": "string" + "default": null, + "title": "Price Type" }, "shipping_type": { - "default": "SHIPPING", - "enum": [ - "PICKUP", - "SHIPPING", - "NOT_APPLICABLE" + "anyOf": [ + { + "enum": [ + "PICKUP", + "SHIPPING", + "NOT_APPLICABLE" + ], + "type": "string" + }, + { + "type": "null" + } ], - "title": "Shipping Type", - "type": "string" + "default": null, + "title": "Shipping Type" }, "shipping_costs": { "anyOf": [ @@ -206,7 +234,7 @@ "type": "null" } ], - "default": false, + "default": null, "title": "Sell Directly" }, "images": { @@ -236,9 +264,16 @@ "default": null }, "republication_interval": { - "default": 7, - "title": "Republication Interval", - "type": "integer" + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Republication Interval" }, "id": { "anyOf": [ diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py index 5133cba..85d5f3a 100644 --- a/src/kleinanzeigen_bot/__init__.py +++ b/src/kleinanzeigen_bot/__init__.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: © Sebastian Thomschke and contributors # SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ -import atexit, copy, json, os, re, signal, sys, textwrap # isort: skip +import atexit, json, os, re, signal, sys, textwrap # isort: skip import getopt # pylint: disable=deprecated-module import urllib.parse as urllib_parse from gettext import gettext as _ @@ -13,8 +13,7 @@ from wcmatch import glob from . import extract, resources from ._version import __version__ -from .ads import calculate_content_hash, get_description_affixes -from .model.ad_model import MAX_DESCRIPTION_LENGTH, Ad +from .model.ad_model import MAX_DESCRIPTION_LENGTH, Ad, AdPartial from .model.config_model import Config from .utils import dicts, error_handlers, loggers, misc from .utils.exceptions import CaptchaEncountered @@ -80,6 +79,16 @@ class KleinanzeigenBot(WebScrapingMixin): LOG.info("############################################") LOG.info("DONE: No configuration errors found.") LOG.info("############################################") + case "update-content-hash": + self.configure_file_logging() + self.load_config() + self.ads_selector = "all" + if ads := self.load_ads(exclude_ads_with_id = False): + self.update_content_hashes(ads) + else: + LOG.info("############################################") + LOG.info("DONE: No active ads found.") + LOG.info("############################################") case "publish": self.configure_file_logging() self.load_config() @@ -143,6 +152,9 @@ class KleinanzeigenBot(WebScrapingMixin): verify - Überprüft die Konfigurationsdateien delete - Löscht Anzeigen download - Lädt eine oder mehrere Anzeigen herunter + update-content-hash - Berechnet den content_hash aller Anzeigen anhand der aktuellen ad_defaults neu; + nach Änderungen an den config.yaml/ad_defaults verhindert es, dass alle Anzeigen als + "geändert" gelten und neu veröffentlicht werden. -- help - Zeigt diese Hilfe an (Standardbefehl) version - Zeigt die Version der Anwendung an @@ -178,6 +190,8 @@ class KleinanzeigenBot(WebScrapingMixin): verify - verifies the configuration files delete - deletes ads download - downloads one or multiple ads + update-content-hash – recalculates each ad’s content_hash based on the current ad_defaults; + use this after changing config.yaml/ad_defaults to avoid every ad being marked "changed" and republished -- help - displays this help (default command) version - displays the application version @@ -269,9 +283,10 @@ class KleinanzeigenBot(WebScrapingMixin): def __check_ad_republication(self, ad_cfg:Ad, ad_file_relative:str) -> bool: """ Check if an ad needs to be republished based on republication interval. - Returns True if the ad should be republished based on the interval. + Note: This method does not check for content changes. Use __check_ad_changed for that. - Note: This method no longer checks for content changes. Use __check_ad_changed for that. + Returns: + True if the ad should be republished based on the interval. """ if ad_cfg.updated_on: last_updated_on = ad_cfg.updated_on @@ -299,14 +314,16 @@ class KleinanzeigenBot(WebScrapingMixin): def __check_ad_changed(self, ad_cfg:Ad, ad_cfg_orig:dict[str, Any], ad_file_relative:str) -> bool: """ Check if an ad has been changed since last publication. - Returns True if the ad has been changed. + + Returns: + True if the ad has been changed. """ if not ad_cfg.id: # New ads are not considered "changed" return False # Calculate hash on original config to match what was stored - current_hash = calculate_content_hash(ad_cfg_orig) + current_hash = AdPartial.model_validate(ad_cfg_orig).update_content_hash().content_hash stored_hash = ad_cfg_orig.get("content_hash") LOG.debug("Hash comparison for [%s]:", ad_file_relative) @@ -321,7 +338,20 @@ class KleinanzeigenBot(WebScrapingMixin): return False - def load_ads(self, *, ignore_inactive:bool = True, check_id:bool = True) -> list[tuple[str, Ad, dict[str, Any]]]: + def load_ads(self, *, ignore_inactive:bool = True, exclude_ads_with_id:bool = True) -> list[tuple[str, Ad, dict[str, Any]]]: + """ + Load and validate all ad config files, optionally filtering out inactive or already‐published ads. + + Args: + ignore_inactive (bool): + Skip ads with `active=False`. + exclude_ads_with_id (bool): + Skip ads whose raw data already contains an `id`, i.e. was published before. + + Returns: + list[tuple[str, Ad, dict[str, Any]]]: + Tuples of (file_path, validated Ad model, original raw data). + """ LOG.info("Searching for ad config files...") ad_files:dict[str, str] = {} @@ -366,9 +396,9 @@ class KleinanzeigenBot(WebScrapingMixin): should_include = True # Check for 'new' selector - if "new" in selectors and (not ad_cfg.id or not check_id): + if "new" in selectors and (not ad_cfg.id or not exclude_ads_with_id): should_include = True - elif "new" in selectors and ad_cfg.id and check_id: + elif "new" in selectors and ad_cfg.id and exclude_ads_with_id: LOG.info(" -> SKIPPED: ad [%s] is not new. already has an id assigned.", ad_file_relative) # Check for 'due' selector @@ -427,13 +457,7 @@ class KleinanzeigenBot(WebScrapingMixin): return ads def load_ad(self, ad_cfg_orig:dict[str, Any]) -> Ad: - ad_cfg_merged = dicts.apply_defaults( - target = copy.deepcopy(ad_cfg_orig), - defaults = self.config.ad_defaults.model_dump(), - ignore = lambda k, _: k == "description", - override = lambda _, v: v == "" # noqa: PLC1901 can be simplified to `not v` as an empty string is falsey - ) - return Ad.model_validate(ad_cfg_merged) + return AdPartial.model_validate(ad_cfg_orig).to_ad(self.config.ad_defaults) def load_config(self) -> None: # write default config.yaml if config file does not exist @@ -805,7 +829,7 @@ class KleinanzeigenBot(WebScrapingMixin): # Update content hash after successful publication # Calculate hash on original config to ensure consistent comparison on restart - ad_cfg_orig["content_hash"] = calculate_content_hash(ad_cfg_orig) + ad_cfg_orig["content_hash"] = AdPartial.model_validate(ad_cfg_orig).update_content_hash().content_hash ad_cfg_orig["updated_on"] = misc.now().isoformat(timespec = "seconds") if not ad_cfg.created_on and not ad_cfg.id: ad_cfg_orig["created_on"] = ad_cfg_orig["updated_on"] @@ -1052,7 +1076,7 @@ class KleinanzeigenBot(WebScrapingMixin): elif self.ads_selector == "new": # download only unsaved ads # check which ads already saved saved_ad_ids = [] - ads = self.load_ads(ignore_inactive = False, check_id = False) # do not skip because of existing IDs + ads = self.load_ads(ignore_inactive = False, exclude_ads_with_id = False) # do not skip because of existing IDs for ad in ads: ad_id = int(ad[2]["id"]) saved_ad_ids.append(ad_id) @@ -1111,7 +1135,7 @@ class KleinanzeigenBot(WebScrapingMixin): # 1. Direct ad-level prefix ad_cfg.description_prefix if ad_cfg.description_prefix is not None # 2. Global prefix from config - else get_description_affixes(self.config, prefix = True) + else self.config.ad_defaults.description_prefix or "" # Default to empty string if all sources are None ) @@ -1120,7 +1144,7 @@ class KleinanzeigenBot(WebScrapingMixin): # 1. Direct ad-level suffix ad_cfg.description_suffix if ad_cfg.description_suffix is not None # 2. Global suffix from config - else get_description_affixes(self.config, prefix = False) + else self.config.ad_defaults.description_suffix or "" # Default to empty string if all sources are None ) @@ -1137,6 +1161,21 @@ class KleinanzeigenBot(WebScrapingMixin): return final_description + def update_content_hashes(self, ads:list[tuple[str, Ad, dict[str, Any]]]) -> None: + count = 0 + + for (ad_file, ad_cfg, ad_cfg_orig) in ads: + LOG.info("Processing %s/%s: '%s' from [%s]...", count + 1, len(ads), ad_cfg.title, ad_file) + ad_cfg.update_content_hash() + if ad_cfg.content_hash != ad_cfg_orig["content_hash"]: + count += 1 + ad_cfg_orig["content_hash"] = ad_cfg.content_hash + dicts.save_dict(ad_file, ad_cfg_orig) + + LOG.info("############################################") + LOG.info("DONE: Updated [content_hash] in %s", pluralize("ad", count)) + LOG.info("############################################") + ############################# # main entry point ############################# diff --git a/src/kleinanzeigen_bot/ads.py b/src/kleinanzeigen_bot/ads.py deleted file mode 100644 index a1ff891..0000000 --- a/src/kleinanzeigen_bot/ads.py +++ /dev/null @@ -1,84 +0,0 @@ -# SPDX-FileCopyrightText: © Jens Bergman and contributors -# SPDX-License-Identifier: AGPL-3.0-or-later -# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ -import hashlib, json, os # isort: skip -from typing import Any - -from .model.config_model import Config -from .utils.misc import get_attr - - -def calculate_content_hash(ad_cfg:dict[str, Any]) -> str: - """Calculate a hash for user-modifiable fields of the ad.""" - - # Relevant fields for the hash - content = { - "active": bool(get_attr(ad_cfg, "active", default = True)), # Explicitly convert to bool - "type": str(get_attr(ad_cfg, "type", "")), # Explicitly convert to string - "title": str(get_attr(ad_cfg, "title", "")), - "description": str(get_attr(ad_cfg, "description", "")), - "category": str(get_attr(ad_cfg, "category", "")), - "price": str(get_attr(ad_cfg, "price", "")), # Price always as string - "price_type": str(get_attr(ad_cfg, "price_type", "")), - "special_attributes": dict(get_attr(ad_cfg, "special_attributes", {})), # Handle None case - "shipping_type": str(get_attr(ad_cfg, "shipping_type", "")), - "shipping_costs": str(get_attr(ad_cfg, "shipping_costs", "")), - "shipping_options": sorted([str(x) for x in get_attr(ad_cfg, "shipping_options", [])]), # Handle None case - "sell_directly": bool(get_attr(ad_cfg, "sell_directly", default = False)), # Explicitly convert to bool - "images": sorted([os.path.basename(str(img)) if img is not None else "" for img in get_attr(ad_cfg, "images", [])]), # Handle None values in images - "contact": { - "name": str(get_attr(ad_cfg, "contact.name", "")), - "street": str(get_attr(ad_cfg, "contact.street", "")), # Changed from "None" to empty string for consistency - "zipcode": str(get_attr(ad_cfg, "contact.zipcode", "")), - "phone": str(get_attr(ad_cfg, "contact.phone", "")) - } - } - - # Create sorted JSON string for consistent hashes - content_str = json.dumps(content, sort_keys = True) - return hashlib.sha256(content_str.encode()).hexdigest() - - -def get_description_affixes(config:Config, *, prefix:bool = True) -> str: - """Get prefix or suffix for description with proper precedence. - - This function handles both the new flattened format and legacy nested format: - - New format (flattened): - ad_defaults: - description_prefix: "Global Prefix" - description_suffix: "Global Suffix" - - Legacy format (nested): - ad_defaults: - description: - prefix: "Legacy Prefix" - suffix: "Legacy Suffix" - - Args: - config: Configuration dictionary containing ad_defaults - prefix: If True, get prefix, otherwise get suffix - - Returns: - The appropriate affix string, empty string if none found - - Example: - >>> config = {"ad_defaults": {"description_prefix": "Hello", "description": {"prefix": "Hi"}}} - >>> get_description_affixes(Config.model_validate(config), prefix=True) - 'Hello' - """ - affix_type = "prefix" if prefix else "suffix" - - # First try new flattened format (description_prefix/description_suffix) - flattened_key = f"description_{affix_type}" - flattened_value = getattr(config.ad_defaults, flattened_key) - if isinstance(flattened_value, str): - return flattened_value - - # Then try legacy nested format (description.prefix/description.suffix) - if config.ad_defaults.description: - nested_value = getattr(config.ad_defaults.description, affix_type) - if isinstance(nested_value, str): - return nested_value - - return "" diff --git a/src/kleinanzeigen_bot/extract.py b/src/kleinanzeigen_bot/extract.py index b14baf9..33991e5 100644 --- a/src/kleinanzeigen_bot/extract.py +++ b/src/kleinanzeigen_bot/extract.py @@ -8,7 +8,6 @@ from typing import Any, Final from kleinanzeigen_bot.model.ad_model import ContactPartial -from .ads import calculate_content_hash, get_description_affixes from .model.ad_model import AdPartial from .model.config_model import Config from .utils import dicts, i18n, loggers, misc, reflect @@ -294,8 +293,8 @@ class AdExtractor(WebScrapingMixin): raw_description = (await self.web_text(By.ID, "viewad-description-text")).strip() # Get prefix and suffix from config - prefix = get_description_affixes(self.config, prefix = True) - suffix = get_description_affixes(self.config, prefix = False) + prefix = self.config.ad_defaults.description_prefix + suffix = self.config.ad_defaults.description_suffix # Remove prefix and suffix if present description_text = raw_description @@ -334,10 +333,12 @@ class AdExtractor(WebScrapingMixin): info["created_on"] = creation_date info["updated_on"] = None # will be set later on - # Calculate the initial hash for the downloaded ad - info["content_hash"] = calculate_content_hash(info) + ad_cfg = AdPartial.model_validate(info) - return AdPartial.model_validate(info) + # calculate the initial hash for the downloaded ad + ad_cfg.content_hash = ad_cfg.to_ad(self.config.ad_defaults).update_content_hash().content_hash + + return ad_cfg async def _extract_category_from_ad_page(self) -> str: """ diff --git a/src/kleinanzeigen_bot/model/ad_model.py b/src/kleinanzeigen_bot/model/ad_model.py index 80c1a51..b2b32eb 100644 --- a/src/kleinanzeigen_bot/model/ad_model.py +++ b/src/kleinanzeigen_bot/model/ad_model.py @@ -3,20 +3,28 @@ # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ from __future__ import annotations +import hashlib, json # isort: skip from datetime import datetime # noqa: TC003 Move import into a type-checking block -from typing import Any, Dict, Final, List, Literal +from typing import Any, Dict, Final, List, Literal, Mapping, Sequence from pydantic import Field, model_validator, validator +from typing_extensions import Self +from kleinanzeigen_bot.model.config_model import AdDefaults # noqa: TC001 Move application import into a type-checking block +from kleinanzeigen_bot.utils import dicts from kleinanzeigen_bot.utils.misc import parse_datetime, parse_decimal from kleinanzeigen_bot.utils.pydantics import ContextualModel MAX_DESCRIPTION_LENGTH:Final[int] = 4000 -def _iso_datetime_field() -> Any: +def _OPTIONAL() -> Any: + return Field(default = None) + + +def _ISO_DATETIME(default:datetime | None = None) -> Any: return Field( - default = None, + default = default, description = "ISO-8601 timestamp with optional timezone (e.g. 2024-12-25T00:00:00 or 2024-12-25T00:00:00Z)", json_schema_extra = { "anyOf": [ @@ -36,37 +44,37 @@ def _iso_datetime_field() -> Any: class ContactPartial(ContextualModel): - name:str | None = None - street:str | None = None - zipcode:int | str | None = None - location:str | None = None + name:str | None = _OPTIONAL() + street:str | None = _OPTIONAL() + zipcode:int | str | None = _OPTIONAL() + location:str | None = _OPTIONAL() - phone:str | None = None + phone:str | None = _OPTIONAL() class AdPartial(ContextualModel): - active:bool = True - type:Literal["OFFER", "WANTED"] = "OFFER" + active:bool | None = _OPTIONAL() + type:Literal["OFFER", "WANTED"] | None = _OPTIONAL() title:str = Field(..., min_length = 10) description:str - description_prefix:str | None = None - description_suffix:str | None = None + description_prefix:str | None = _OPTIONAL() + description_suffix:str | None = _OPTIONAL() category:str - special_attributes:Dict[str, str] | None = Field(default = None) - price:int | None = None - price_type:Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"] = "NEGOTIABLE" - shipping_type:Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"] = "SHIPPING" - shipping_costs:float | None = None - shipping_options:List[str] | None = Field(default = None) - sell_directly:bool | None = False - images:List[str] | None = Field(default = None) - contact:ContactPartial | None = None - republication_interval:int = 7 + special_attributes:Dict[str, str] | None = _OPTIONAL() + price:int | None = _OPTIONAL() + price_type:Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"] | None = _OPTIONAL() + shipping_type:Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"] | None = _OPTIONAL() + shipping_costs:float | None = _OPTIONAL() + shipping_options:List[str] | None = _OPTIONAL() + sell_directly:bool | None = _OPTIONAL() + images:List[str] | None = _OPTIONAL() + contact:ContactPartial | None = _OPTIONAL() + republication_interval:int | None = _OPTIONAL() - id:int | None = None - created_on:datetime | None = _iso_datetime_field() - updated_on:datetime | None = _iso_datetime_field() - content_hash:str | None = None + id:int | None = _OPTIONAL() + created_on:datetime | None = _ISO_DATETIME() + updated_on:datetime | None = _ISO_DATETIME() + content_hash:str | None = _OPTIONAL() @validator("created_on", "updated_on", pre = True) @classmethod @@ -105,11 +113,71 @@ class AdPartial(ContextualModel): raise ValueError("shipping_options entries must be non-empty") return v + def update_content_hash(self) -> Self: + """Calculate and updates the content_hash value for user-modifiable fields of the ad.""" + # 1) Dump to a plain dict, excluding the metadata fields: + raw = self.model_dump( + exclude = {"id", "created_on", "updated_on", "content_hash"}, + exclude_none = True, + exclude_unset = True, + ) + + # 2) Recursively prune any empty containers: + def prune(obj:Any) -> Any: + if isinstance(obj, Mapping): + return { + k: prune(v) + for k, v in obj.items() + # drop keys whose values are empty list/dict/set + if not (isinstance(v, (Mapping, Sequence, set)) and not isinstance(v, (str, bytes)) and len(v) == 0) + } + if isinstance(obj, Sequence) and not isinstance(obj, (str, bytes)): + return [ + prune(v) + for v in obj + if not (isinstance(v, (Mapping, Sequence, set)) and not isinstance(v, (str, bytes)) and len(v) == 0) + ] + return obj + + pruned = prune(raw) + + # 3) Produce a canonical JSON string and hash it: + json_string = json.dumps(pruned, sort_keys = True) + self.content_hash = hashlib.sha256(json_string.encode()).hexdigest() + return self + + def to_ad(self, ad_defaults:AdDefaults) -> Ad: + """ + Returns a complete, validated Ad by merging this partial with values from ad_defaults. + + Any field that is `None` or `""` is filled from `ad_defaults`. + + Raises `ValidationError` when, after merging with `ad_defaults`, not all fields required by `Ad` are populated. + """ + ad_cfg = self.model_dump() + dicts.apply_defaults( + target = ad_cfg, + defaults = ad_defaults.model_dump(), + ignore = lambda k, _: k == "description", # ignore legacy global description config + override = lambda _, v: v in {None, ""} # noqa: PLC1901 can be simplified + ) + return Ad.model_validate(ad_cfg) + + +# pyright: reportGeneralTypeIssues=false, reportIncompatibleVariableOverride=false class Contact(ContactPartial): - name:str # pyright: ignore[reportGeneralTypeIssues, reportIncompatibleVariableOverride] - zipcode:int | str # pyright: ignore[reportGeneralTypeIssues, reportIncompatibleVariableOverride] + name:str + zipcode:int | str +# pyright: reportGeneralTypeIssues=false, reportIncompatibleVariableOverride=false class Ad(AdPartial): - contact:Contact # pyright: ignore[reportGeneralTypeIssues, reportIncompatibleVariableOverride] + active:bool + type:Literal["OFFER", "WANTED"] + description:str + price_type:Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"] + shipping_type:Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"] + sell_directly:bool + contact:Contact + republication_interval:int diff --git a/src/kleinanzeigen_bot/model/config_model.py b/src/kleinanzeigen_bot/model/config_model.py index cbb195b..f86cc05 100644 --- a/src/kleinanzeigen_bot/model/config_model.py +++ b/src/kleinanzeigen_bot/model/config_model.py @@ -4,12 +4,13 @@ from __future__ import annotations import copy -from typing import Any, Dict, List, Literal +from typing import Any, List, Literal from pydantic import Field, model_validator, validator from typing_extensions import deprecated from kleinanzeigen_bot.utils import dicts +from kleinanzeigen_bot.utils.misc import get_attr from kleinanzeigen_bot.utils.pydantics import ContextualModel @@ -40,16 +41,17 @@ class AdDefaults(ContextualModel): @model_validator(mode = "before") @classmethod - def unify_description(cls, values:Dict[str, Any]) -> Dict[str, Any]: + def migrate_legacy_description(cls, values:dict[str, Any]) -> dict[str, Any]: # Ensure flat prefix/suffix take precedence over deprecated nested "description" - desc = values.get("description") - flat_prefix = values.get("description_prefix") - flat_suffix = values.get("description_suffix") + description_prefix = values.get("description_prefix") + description_suffix = values.get("description_suffix") + legacy_prefix = get_attr(values, "description.prefix") + legacy_suffix = get_attr(values, "description.suffix") - if not flat_prefix and isinstance(desc, dict) and desc.get("prefix") is not None: - values["description_prefix"] = desc.get("prefix", "") - if not flat_suffix and isinstance(desc, dict) and desc.get("suffix") is not None: - values["description_suffix"] = desc.get("suffix", "") + if not description_prefix and legacy_prefix is not None: + values["description_prefix"] = legacy_prefix + if not description_suffix and legacy_suffix is not None: + values["description_suffix"] = legacy_suffix return values @@ -115,7 +117,7 @@ if relative paths are specified, then they are relative to this configuration fi description = "Default values for ads, can be overwritten in each ad configuration file" ) - categories:Dict[str, str] = Field(default_factory = dict, description = """ + categories:dict[str, str] = Field(default_factory = dict, description = """ additional name to category ID mappings, see default list at https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml diff --git a/src/kleinanzeigen_bot/resources/translations.de.yaml b/src/kleinanzeigen_bot/resources/translations.de.yaml index 258395b..d53a078 100644 --- a/src/kleinanzeigen_bot/resources/translations.de.yaml +++ b/src/kleinanzeigen_bot/resources/translations.de.yaml @@ -133,6 +133,7 @@ kleinanzeigen_bot/__init__.py: run: "DONE: No configuration errors found.": "FERTIG: Keine Konfigurationsfehler gefunden." + "DONE: No active ads found.": "FERTIG: Keine aktiven Anzeigen gefunden." "You provided no ads selector. Defaulting to \"due\".": "Es wurden keine Anzeigen-Selektor angegeben. Es wird \"due\" verwendet." "DONE: No new/outdated ads found.": "FERTIG: Keine neuen/veralteten Anzeigen gefunden." "DONE: No ads to delete found.": "FERTIG: Keine zu löschnenden Anzeigen gefunden." @@ -148,6 +149,11 @@ kleinanzeigen_bot/__init__.py: __set_shipping_options: "Unable to close shipping dialog!": "Versanddialog konnte nicht geschlossen werden!" + update_content_hashes: + "DONE: Updated [content_hash] in %s": "FERTIG: [content_hash] in %s aktualisiert." + "Processing %s/%s: '%s' from [%s]...": "Verarbeite %s/%s: '%s' von [%s]..." + "ad": "Anzeige" + ################################################# kleinanzeigen_bot/extract.py: ################################################# diff --git a/src/kleinanzeigen_bot/utils/loggers.py b/src/kleinanzeigen_bot/utils/loggers.py index 7d2d172..09ffdbf 100644 --- a/src/kleinanzeigen_bot/utils/loggers.py +++ b/src/kleinanzeigen_bot/utils/loggers.py @@ -67,7 +67,7 @@ def configure_console_logging() -> None: CRITICAL: colorama.Fore.MAGENTA, } - def _relativize_paths_under_cwd(self, record: logging.LogRecord) -> None: + def _relativize_paths_under_cwd(self, record:logging.LogRecord) -> None: """ Mutate record.args in-place, converting any absolute-path strings under the current working directory into relative paths. @@ -78,7 +78,7 @@ def configure_console_logging() -> None: cwd = os.getcwd() - def _rel_if_subpath(val: Any) -> Any: + def _rel_if_subpath(val:Any) -> Any: if isinstance(val, str) and os.path.isabs(val): # don't relativize log-file paths if val.endswith(".log"): diff --git a/tests/unit/test_ad_model.py b/tests/unit/test_ad_model.py new file mode 100644 index 0000000..018d27b --- /dev/null +++ b/tests/unit/test_ad_model.py @@ -0,0 +1,39 @@ +# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ +from kleinanzeigen_bot.model.ad_model import AdPartial + + +def test_update_content_hash() -> None: + minimal_ad_cfg = { + "id": "123456789", + "title": "Test Ad Title", + "category": "160", + "description": "Test Description", + } + minimal_ad_cfg_hash = "ae3defaccd6b41f379eb8de17263caa1bd306e35e74b11aa03a4738621e96ece" + + assert AdPartial.model_validate(minimal_ad_cfg).update_content_hash().content_hash == minimal_ad_cfg_hash + + assert AdPartial.model_validate(minimal_ad_cfg | { + "id": "123456789", + "created_on": "2025-05-08T09:34:03", + "updated_on": "2025-05-14T20:43:16", + "content_hash": "5753ead7cf42b0ace5fe658ecb930b3a8f57ef49bd52b7ea2d64b91b2c75517e" + }).update_content_hash().content_hash == minimal_ad_cfg_hash + + assert AdPartial.model_validate(minimal_ad_cfg | { + "active": None, + "images": None, + "shipping_options": None, + "special_attributes": None, + "contact": None, + }).update_content_hash().content_hash == minimal_ad_cfg_hash + + assert AdPartial.model_validate(minimal_ad_cfg | { + "active": True, + "images": [], + "shipping_options": [], + "special_attributes": {}, + "contact": {}, + }).update_content_hash().content_hash != minimal_ad_cfg_hash diff --git a/tests/unit/test_ads.py b/tests/unit/test_ads.py deleted file mode 100644 index abd1ed1..0000000 --- a/tests/unit/test_ads.py +++ /dev/null @@ -1,115 +0,0 @@ -# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors -# SPDX-License-Identifier: AGPL-3.0-or-later -# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ -from typing import Any - -import pytest - -from kleinanzeigen_bot import ads -from kleinanzeigen_bot.model.config_model import Config - - -def test_calculate_content_hash_with_none_values() -> None: - """Test calculate_content_hash with None values in the ad configuration.""" - ad_cfg = { - # Minimal configuration with None values as described in bug report - "id": "123456789", - "created_on": "2022-07-19T07:30:20.489289", - "updated_on": "2025-01-22T19:46:46.735896", - "title": "Test Ad", - "description": "Test Description", - "images": [None, "/path/to/image.jpg", None], # List containing None values - "shipping_options": None, # None instead of list - "special_attributes": None, # None instead of dictionary - "contact": { - "street": None # None value in contact - } - } - - # Should not raise TypeError - hash_value = ads.calculate_content_hash(ad_cfg) - assert isinstance(hash_value, str) - assert len(hash_value) == 64 # SHA-256 hash is 64 characters long - - -@pytest.mark.parametrize(("config", "prefix", "expected"), [ - # Test new flattened format - prefix - ( - {"ad_defaults": {"description_prefix": "Hello"}}, - True, - "Hello" - ), - # Test new flattened format - suffix - ( - {"ad_defaults": {"description_suffix": "Bye"}}, - False, - "Bye" - ), - # Test legacy nested format - prefix - ( - {"ad_defaults": {"description": {"prefix": "Hi"}}}, - True, - "Hi" - ), - # Test legacy nested format - suffix - ( - {"ad_defaults": {"description": {"suffix": "Ciao"}}}, - False, - "Ciao" - ), - # Test precedence (new format over legacy) - prefix - ( - { - "ad_defaults": { - "description_prefix": "Hello", - "description": {"prefix": "Hi"} - } - }, - True, - "Hello" - ), - # Test precedence (new format over legacy) - suffix - ( - { - "ad_defaults": { - "description_suffix": "Bye", - "description": {"suffix": "Ciao"} - } - }, - False, - "Bye" - ), - # Test empty config - ( - {"ad_defaults": {}}, - True, - "" - ), - # Test None values - ( - {"ad_defaults": {"description_prefix": None, "description_suffix": None}}, - True, - "" - ), - # Add test for malformed config - ( - {}, # Empty config - True, - "" - ), - # Test for missing ad_defaults - ( - {"some_other_key": {}}, - True, - "" - ), -]) -def test_get_description_affixes( - config:dict[str, Any], - prefix:bool, - expected:str, - test_bot_config:Config -) -> None: - """Test get_description_affixes function with various inputs.""" - result = ads.get_description_affixes(test_bot_config.with_values(config), prefix = prefix) - assert result == expected diff --git a/tests/unit/test_config_model.py b/tests/unit/test_config_model.py new file mode 100644 index 0000000..999134f --- /dev/null +++ b/tests/unit/test_config_model.py @@ -0,0 +1,62 @@ +# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ +from kleinanzeigen_bot.model.config_model import AdDefaults + + +def test_migrate_legacy_description_prefix() -> None: + assert AdDefaults.model_validate({ + }).description_prefix is None + + assert AdDefaults.model_validate({ + "description_prefix": "Prefix" + }).description_prefix == "Prefix" + + assert AdDefaults.model_validate({ + "description_prefix": "Prefix", + "description": { + "prefix": "Legacy Prefix" + } + }).description_prefix == "Prefix" + + assert AdDefaults.model_validate({ + "description": { + "prefix": "Legacy Prefix" + } + }).description_prefix == "Legacy Prefix" + + assert AdDefaults.model_validate({ + "description_prefix": "", + "description": { + "prefix": "Legacy Prefix" + } + }).description_prefix == "Legacy Prefix" + + +def test_migrate_legacy_description_suffix() -> None: + assert AdDefaults.model_validate({ + }).description_suffix is None + + assert AdDefaults.model_validate({ + "description_suffix": "Suffix" + }).description_suffix == "Suffix" + + assert AdDefaults.model_validate({ + "description_suffix": "Suffix", + "description": { + "suffix": "Legacy Suffix" + } + }).description_suffix == "Suffix" + + assert AdDefaults.model_validate({ + "description": { + "suffix": "Legacy Suffix" + } + }).description_suffix == "Legacy Suffix" + + assert AdDefaults.model_validate({ + "description_suffix": "", + "description": { + "suffix": "Legacy Suffix" + } + }).description_suffix == "Legacy Suffix" diff --git a/tests/unit/test_init.py b/tests/unit/test_init.py index 8793f8a..92db718 100644 --- a/tests/unit/test_init.py +++ b/tests/unit/test_init.py @@ -14,7 +14,6 @@ from pydantic import ValidationError from kleinanzeigen_bot import LOG, KleinanzeigenBot, misc from kleinanzeigen_bot._version import __version__ -from kleinanzeigen_bot.ads import calculate_content_hash from kleinanzeigen_bot.model.ad_model import Ad from kleinanzeigen_bot.model.config_model import AdDefaults, Config, PublishingConfig from kleinanzeigen_bot.utils import dicts, loggers @@ -844,7 +843,7 @@ class TestKleinanzeigenBotAdRepublication: # Calculate hash before making the copy to ensure they match ad_cfg_orig = ad_cfg.model_dump() - current_hash = calculate_content_hash(ad_cfg_orig) + current_hash = ad_cfg.update_content_hash().content_hash ad_cfg_orig["content_hash"] = current_hash # Mock the config to prevent actual file operations @@ -874,8 +873,8 @@ class TestKleinanzeigenBotShippingOptions: }) # Create the original ad config and published ads list + ad_cfg.update_content_hash() # Add content hash to prevent republication ad_cfg_orig = ad_cfg.model_dump() - ad_cfg_orig["content_hash"] = calculate_content_hash(ad_cfg_orig) # Add content hash to prevent republication published_ads:list[dict[str, Any]] = [] # Set up default config values needed for the test @@ -1155,7 +1154,7 @@ class TestKleinanzeigenBotChangedAds: # Calculate hash for changed_ad and add it to the config # Then modify the ad to simulate a change changed_ad = ad_cfg.model_dump() - changed_hash = calculate_content_hash(changed_ad) + changed_hash = ad_cfg.update_content_hash().content_hash changed_ad["content_hash"] = changed_hash # Now modify the ad to make it "changed" changed_ad["title"] = "Changed Ad - Modified"