feat: improve content_hash calculation

This commit is contained in:
sebthom
2025-05-15 11:48:57 +02:00
committed by Sebastian Thomschke
parent f1cd597dd8
commit 85a5cf5224
13 changed files with 356 additions and 301 deletions

View File

@@ -226,6 +226,7 @@ ignore = [
"E231", # Don't add whitespace after colon (:) on type declaration "E231", # Don't add whitespace after colon (:) on type declaration
"E251", # Don't remove whitespace around parameter '=' sign. "E251", # Don't remove whitespace around parameter '=' sign.
"E401", # Don't put imports on separate lines "E401", # Don't put imports on separate lines
"FIX002", # Line contains TODO, consider resolving the issue
"PERF203", # `try`-`except` within a loop incurs performance overhead "PERF203", # `try`-`except` within a loop incurs performance overhead
"RET504", # Unnecessary assignment to `...` before `return` statement "RET504", # Unnecessary assignment to `...` before `return` statement
"PLR6301", # Method `...` could be a function, class method, or static method "PLR6301", # Method `...` could be a function, class method, or static method
@@ -234,6 +235,8 @@ ignore = [
"SIM105", # Use `contextlib.suppress(TimeoutError)` instead of `try`-`except`-`pass` "SIM105", # Use `contextlib.suppress(TimeoutError)` instead of `try`-`except`-`pass`
"SIM114", # Combine `if` branches using logical `or` operator "SIM114", # Combine `if` branches using logical `or` operator
"TC006", # Add quotes to type expression in `typing.cast()` "TC006", # Add quotes to type expression in `typing.cast()`
"TD002", # Missing author in TODO
"TD003", # Missing issue link for this TODO
] ]
[tool.ruff.lint.per-file-ignores] [tool.ruff.lint.per-file-ignores]
@@ -263,12 +266,12 @@ min-file-size = 256
# https://pylint.pycqa.org/en/latest/user_guide/configuration/all-options.html#design-checker # https://pylint.pycqa.org/en/latest/user_guide/configuration/all-options.html#design-checker
# https://pylint.pycqa.org/en/latest/user_guide/checkers/features.html#design-checker-messages # https://pylint.pycqa.org/en/latest/user_guide/checkers/features.html#design-checker-messages
max-args = 6 # max. number of args for function / method (R0913) max-args = 6 # max. number of args for function / method (R0913)
# max-attributes = 15 # max. number of instance attrs for a class (R0902) # max-attributes = 15 # TODO max. number of instance attrs for a class (R0902)
max-branches = 40 # max. number of branch for function / method body (R0912) max-branches = 40 # max. number of branch for function / method body (R0912)
max-locals = 30 # max. number of local vars for function / method body (R0914) max-locals = 30 # max. number of local vars for function / method body (R0914)
max-returns = 15 # max. number of return / yield for function / method body (R0911) max-returns = 15 # max. number of return / yield for function / method body (R0911)
max-statements = 150 # max. number of statements in function / method body (R0915) max-statements = 150 # max. number of statements in function / method body (R0915)
max-public-methods = 20 # max. number of public methods for a class (R0904) max-public-methods = 25 # max. number of public methods for a class (R0904)
# max-positional-arguments = 5 # max. number of positional args for function / method (R0917) # max-positional-arguments = 5 # max. number of positional args for function / method (R0917)

View File

@@ -72,19 +72,33 @@
}, },
"properties": { "properties": {
"active": { "active": {
"default": true, "anyOf": [
"title": "Active", {
"type": "boolean" "type": "boolean"
}, },
{
"type": "null"
}
],
"default": null,
"title": "Active"
},
"type": { "type": {
"default": "OFFER", "anyOf": [
{
"enum": [ "enum": [
"OFFER", "OFFER",
"WANTED" "WANTED"
], ],
"title": "Type",
"type": "string" "type": "string"
}, },
{
"type": "null"
}
],
"default": null,
"title": "Type"
},
"title": { "title": {
"minLength": 10, "minLength": 10,
"title": "Title", "title": "Title",
@@ -150,26 +164,40 @@
"title": "Price" "title": "Price"
}, },
"price_type": { "price_type": {
"default": "NEGOTIABLE", "anyOf": [
{
"enum": [ "enum": [
"FIXED", "FIXED",
"NEGOTIABLE", "NEGOTIABLE",
"GIVE_AWAY", "GIVE_AWAY",
"NOT_APPLICABLE" "NOT_APPLICABLE"
], ],
"title": "Price Type",
"type": "string" "type": "string"
}, },
{
"type": "null"
}
],
"default": null,
"title": "Price Type"
},
"shipping_type": { "shipping_type": {
"default": "SHIPPING", "anyOf": [
{
"enum": [ "enum": [
"PICKUP", "PICKUP",
"SHIPPING", "SHIPPING",
"NOT_APPLICABLE" "NOT_APPLICABLE"
], ],
"title": "Shipping Type",
"type": "string" "type": "string"
}, },
{
"type": "null"
}
],
"default": null,
"title": "Shipping Type"
},
"shipping_costs": { "shipping_costs": {
"anyOf": [ "anyOf": [
{ {
@@ -206,7 +234,7 @@
"type": "null" "type": "null"
} }
], ],
"default": false, "default": null,
"title": "Sell Directly" "title": "Sell Directly"
}, },
"images": { "images": {
@@ -236,10 +264,17 @@
"default": null "default": null
}, },
"republication_interval": { "republication_interval": {
"default": 7, "anyOf": [
"title": "Republication Interval", {
"type": "integer" "type": "integer"
}, },
{
"type": "null"
}
],
"default": null,
"title": "Republication Interval"
},
"id": { "id": {
"anyOf": [ "anyOf": [
{ {

View File

@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors # SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import atexit, copy, json, os, re, signal, sys, textwrap # isort: skip import atexit, json, os, re, signal, sys, textwrap # isort: skip
import getopt # pylint: disable=deprecated-module import getopt # pylint: disable=deprecated-module
import urllib.parse as urllib_parse import urllib.parse as urllib_parse
from gettext import gettext as _ from gettext import gettext as _
@@ -13,8 +13,7 @@ from wcmatch import glob
from . import extract, resources from . import extract, resources
from ._version import __version__ from ._version import __version__
from .ads import calculate_content_hash, get_description_affixes from .model.ad_model import MAX_DESCRIPTION_LENGTH, Ad, AdPartial
from .model.ad_model import MAX_DESCRIPTION_LENGTH, Ad
from .model.config_model import Config from .model.config_model import Config
from .utils import dicts, error_handlers, loggers, misc from .utils import dicts, error_handlers, loggers, misc
from .utils.exceptions import CaptchaEncountered from .utils.exceptions import CaptchaEncountered
@@ -80,6 +79,16 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.info("############################################") LOG.info("############################################")
LOG.info("DONE: No configuration errors found.") LOG.info("DONE: No configuration errors found.")
LOG.info("############################################") LOG.info("############################################")
case "update-content-hash":
self.configure_file_logging()
self.load_config()
self.ads_selector = "all"
if ads := self.load_ads(exclude_ads_with_id = False):
self.update_content_hashes(ads)
else:
LOG.info("############################################")
LOG.info("DONE: No active ads found.")
LOG.info("############################################")
case "publish": case "publish":
self.configure_file_logging() self.configure_file_logging()
self.load_config() self.load_config()
@@ -143,6 +152,9 @@ class KleinanzeigenBot(WebScrapingMixin):
verify - Überprüft die Konfigurationsdateien verify - Überprüft die Konfigurationsdateien
delete - Löscht Anzeigen delete - Löscht Anzeigen
download - Lädt eine oder mehrere Anzeigen herunter download - Lädt eine oder mehrere Anzeigen herunter
update-content-hash - Berechnet den content_hash aller Anzeigen anhand der aktuellen ad_defaults neu;
nach Änderungen an den config.yaml/ad_defaults verhindert es, dass alle Anzeigen als
"geändert" gelten und neu veröffentlicht werden.
-- --
help - Zeigt diese Hilfe an (Standardbefehl) help - Zeigt diese Hilfe an (Standardbefehl)
version - Zeigt die Version der Anwendung an version - Zeigt die Version der Anwendung an
@@ -178,6 +190,8 @@ class KleinanzeigenBot(WebScrapingMixin):
verify - verifies the configuration files verify - verifies the configuration files
delete - deletes ads delete - deletes ads
download - downloads one or multiple ads download - downloads one or multiple ads
update-content-hash recalculates each ads content_hash based on the current ad_defaults;
use this after changing config.yaml/ad_defaults to avoid every ad being marked "changed" and republished
-- --
help - displays this help (default command) help - displays this help (default command)
version - displays the application version version - displays the application version
@@ -269,9 +283,10 @@ class KleinanzeigenBot(WebScrapingMixin):
def __check_ad_republication(self, ad_cfg:Ad, ad_file_relative:str) -> bool: def __check_ad_republication(self, ad_cfg:Ad, ad_file_relative:str) -> bool:
""" """
Check if an ad needs to be republished based on republication interval. Check if an ad needs to be republished based on republication interval.
Returns True if the ad should be republished based on the interval. Note: This method does not check for content changes. Use __check_ad_changed for that.
Note: This method no longer checks for content changes. Use __check_ad_changed for that. Returns:
True if the ad should be republished based on the interval.
""" """
if ad_cfg.updated_on: if ad_cfg.updated_on:
last_updated_on = ad_cfg.updated_on last_updated_on = ad_cfg.updated_on
@@ -299,14 +314,16 @@ class KleinanzeigenBot(WebScrapingMixin):
def __check_ad_changed(self, ad_cfg:Ad, ad_cfg_orig:dict[str, Any], ad_file_relative:str) -> bool: def __check_ad_changed(self, ad_cfg:Ad, ad_cfg_orig:dict[str, Any], ad_file_relative:str) -> bool:
""" """
Check if an ad has been changed since last publication. Check if an ad has been changed since last publication.
Returns True if the ad has been changed.
Returns:
True if the ad has been changed.
""" """
if not ad_cfg.id: if not ad_cfg.id:
# New ads are not considered "changed" # New ads are not considered "changed"
return False return False
# Calculate hash on original config to match what was stored # Calculate hash on original config to match what was stored
current_hash = calculate_content_hash(ad_cfg_orig) current_hash = AdPartial.model_validate(ad_cfg_orig).update_content_hash().content_hash
stored_hash = ad_cfg_orig.get("content_hash") stored_hash = ad_cfg_orig.get("content_hash")
LOG.debug("Hash comparison for [%s]:", ad_file_relative) LOG.debug("Hash comparison for [%s]:", ad_file_relative)
@@ -321,7 +338,20 @@ class KleinanzeigenBot(WebScrapingMixin):
return False return False
def load_ads(self, *, ignore_inactive:bool = True, check_id:bool = True) -> list[tuple[str, Ad, dict[str, Any]]]: def load_ads(self, *, ignore_inactive:bool = True, exclude_ads_with_id:bool = True) -> list[tuple[str, Ad, dict[str, Any]]]:
"""
Load and validate all ad config files, optionally filtering out inactive or alreadypublished ads.
Args:
ignore_inactive (bool):
Skip ads with `active=False`.
exclude_ads_with_id (bool):
Skip ads whose raw data already contains an `id`, i.e. was published before.
Returns:
list[tuple[str, Ad, dict[str, Any]]]:
Tuples of (file_path, validated Ad model, original raw data).
"""
LOG.info("Searching for ad config files...") LOG.info("Searching for ad config files...")
ad_files:dict[str, str] = {} ad_files:dict[str, str] = {}
@@ -366,9 +396,9 @@ class KleinanzeigenBot(WebScrapingMixin):
should_include = True should_include = True
# Check for 'new' selector # Check for 'new' selector
if "new" in selectors and (not ad_cfg.id or not check_id): if "new" in selectors and (not ad_cfg.id or not exclude_ads_with_id):
should_include = True should_include = True
elif "new" in selectors and ad_cfg.id and check_id: elif "new" in selectors and ad_cfg.id and exclude_ads_with_id:
LOG.info(" -> SKIPPED: ad [%s] is not new. already has an id assigned.", ad_file_relative) LOG.info(" -> SKIPPED: ad [%s] is not new. already has an id assigned.", ad_file_relative)
# Check for 'due' selector # Check for 'due' selector
@@ -427,13 +457,7 @@ class KleinanzeigenBot(WebScrapingMixin):
return ads return ads
def load_ad(self, ad_cfg_orig:dict[str, Any]) -> Ad: def load_ad(self, ad_cfg_orig:dict[str, Any]) -> Ad:
ad_cfg_merged = dicts.apply_defaults( return AdPartial.model_validate(ad_cfg_orig).to_ad(self.config.ad_defaults)
target = copy.deepcopy(ad_cfg_orig),
defaults = self.config.ad_defaults.model_dump(),
ignore = lambda k, _: k == "description",
override = lambda _, v: v == "" # noqa: PLC1901 can be simplified to `not v` as an empty string is falsey
)
return Ad.model_validate(ad_cfg_merged)
def load_config(self) -> None: def load_config(self) -> None:
# write default config.yaml if config file does not exist # write default config.yaml if config file does not exist
@@ -805,7 +829,7 @@ class KleinanzeigenBot(WebScrapingMixin):
# Update content hash after successful publication # Update content hash after successful publication
# Calculate hash on original config to ensure consistent comparison on restart # Calculate hash on original config to ensure consistent comparison on restart
ad_cfg_orig["content_hash"] = calculate_content_hash(ad_cfg_orig) ad_cfg_orig["content_hash"] = AdPartial.model_validate(ad_cfg_orig).update_content_hash().content_hash
ad_cfg_orig["updated_on"] = misc.now().isoformat(timespec = "seconds") ad_cfg_orig["updated_on"] = misc.now().isoformat(timespec = "seconds")
if not ad_cfg.created_on and not ad_cfg.id: if not ad_cfg.created_on and not ad_cfg.id:
ad_cfg_orig["created_on"] = ad_cfg_orig["updated_on"] ad_cfg_orig["created_on"] = ad_cfg_orig["updated_on"]
@@ -1052,7 +1076,7 @@ class KleinanzeigenBot(WebScrapingMixin):
elif self.ads_selector == "new": # download only unsaved ads elif self.ads_selector == "new": # download only unsaved ads
# check which ads already saved # check which ads already saved
saved_ad_ids = [] saved_ad_ids = []
ads = self.load_ads(ignore_inactive = False, check_id = False) # do not skip because of existing IDs ads = self.load_ads(ignore_inactive = False, exclude_ads_with_id = False) # do not skip because of existing IDs
for ad in ads: for ad in ads:
ad_id = int(ad[2]["id"]) ad_id = int(ad[2]["id"])
saved_ad_ids.append(ad_id) saved_ad_ids.append(ad_id)
@@ -1111,7 +1135,7 @@ class KleinanzeigenBot(WebScrapingMixin):
# 1. Direct ad-level prefix # 1. Direct ad-level prefix
ad_cfg.description_prefix if ad_cfg.description_prefix is not None ad_cfg.description_prefix if ad_cfg.description_prefix is not None
# 2. Global prefix from config # 2. Global prefix from config
else get_description_affixes(self.config, prefix = True) else self.config.ad_defaults.description_prefix
or "" # Default to empty string if all sources are None or "" # Default to empty string if all sources are None
) )
@@ -1120,7 +1144,7 @@ class KleinanzeigenBot(WebScrapingMixin):
# 1. Direct ad-level suffix # 1. Direct ad-level suffix
ad_cfg.description_suffix if ad_cfg.description_suffix is not None ad_cfg.description_suffix if ad_cfg.description_suffix is not None
# 2. Global suffix from config # 2. Global suffix from config
else get_description_affixes(self.config, prefix = False) else self.config.ad_defaults.description_suffix
or "" # Default to empty string if all sources are None or "" # Default to empty string if all sources are None
) )
@@ -1137,6 +1161,21 @@ class KleinanzeigenBot(WebScrapingMixin):
return final_description return final_description
def update_content_hashes(self, ads:list[tuple[str, Ad, dict[str, Any]]]) -> None:
count = 0
for (ad_file, ad_cfg, ad_cfg_orig) in ads:
LOG.info("Processing %s/%s: '%s' from [%s]...", count + 1, len(ads), ad_cfg.title, ad_file)
ad_cfg.update_content_hash()
if ad_cfg.content_hash != ad_cfg_orig["content_hash"]:
count += 1
ad_cfg_orig["content_hash"] = ad_cfg.content_hash
dicts.save_dict(ad_file, ad_cfg_orig)
LOG.info("############################################")
LOG.info("DONE: Updated [content_hash] in %s", pluralize("ad", count))
LOG.info("############################################")
############################# #############################
# main entry point # main entry point
############################# #############################

View File

@@ -1,84 +0,0 @@
# SPDX-FileCopyrightText: © Jens Bergman and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import hashlib, json, os # isort: skip
from typing import Any
from .model.config_model import Config
from .utils.misc import get_attr
def calculate_content_hash(ad_cfg:dict[str, Any]) -> str:
"""Calculate a hash for user-modifiable fields of the ad."""
# Relevant fields for the hash
content = {
"active": bool(get_attr(ad_cfg, "active", default = True)), # Explicitly convert to bool
"type": str(get_attr(ad_cfg, "type", "")), # Explicitly convert to string
"title": str(get_attr(ad_cfg, "title", "")),
"description": str(get_attr(ad_cfg, "description", "")),
"category": str(get_attr(ad_cfg, "category", "")),
"price": str(get_attr(ad_cfg, "price", "")), # Price always as string
"price_type": str(get_attr(ad_cfg, "price_type", "")),
"special_attributes": dict(get_attr(ad_cfg, "special_attributes", {})), # Handle None case
"shipping_type": str(get_attr(ad_cfg, "shipping_type", "")),
"shipping_costs": str(get_attr(ad_cfg, "shipping_costs", "")),
"shipping_options": sorted([str(x) for x in get_attr(ad_cfg, "shipping_options", [])]), # Handle None case
"sell_directly": bool(get_attr(ad_cfg, "sell_directly", default = False)), # Explicitly convert to bool
"images": sorted([os.path.basename(str(img)) if img is not None else "" for img in get_attr(ad_cfg, "images", [])]), # Handle None values in images
"contact": {
"name": str(get_attr(ad_cfg, "contact.name", "")),
"street": str(get_attr(ad_cfg, "contact.street", "")), # Changed from "None" to empty string for consistency
"zipcode": str(get_attr(ad_cfg, "contact.zipcode", "")),
"phone": str(get_attr(ad_cfg, "contact.phone", ""))
}
}
# Create sorted JSON string for consistent hashes
content_str = json.dumps(content, sort_keys = True)
return hashlib.sha256(content_str.encode()).hexdigest()
def get_description_affixes(config:Config, *, prefix:bool = True) -> str:
"""Get prefix or suffix for description with proper precedence.
This function handles both the new flattened format and legacy nested format:
New format (flattened):
ad_defaults:
description_prefix: "Global Prefix"
description_suffix: "Global Suffix"
Legacy format (nested):
ad_defaults:
description:
prefix: "Legacy Prefix"
suffix: "Legacy Suffix"
Args:
config: Configuration dictionary containing ad_defaults
prefix: If True, get prefix, otherwise get suffix
Returns:
The appropriate affix string, empty string if none found
Example:
>>> config = {"ad_defaults": {"description_prefix": "Hello", "description": {"prefix": "Hi"}}}
>>> get_description_affixes(Config.model_validate(config), prefix=True)
'Hello'
"""
affix_type = "prefix" if prefix else "suffix"
# First try new flattened format (description_prefix/description_suffix)
flattened_key = f"description_{affix_type}"
flattened_value = getattr(config.ad_defaults, flattened_key)
if isinstance(flattened_value, str):
return flattened_value
# Then try legacy nested format (description.prefix/description.suffix)
if config.ad_defaults.description:
nested_value = getattr(config.ad_defaults.description, affix_type)
if isinstance(nested_value, str):
return nested_value
return ""

View File

@@ -8,7 +8,6 @@ from typing import Any, Final
from kleinanzeigen_bot.model.ad_model import ContactPartial from kleinanzeigen_bot.model.ad_model import ContactPartial
from .ads import calculate_content_hash, get_description_affixes
from .model.ad_model import AdPartial from .model.ad_model import AdPartial
from .model.config_model import Config from .model.config_model import Config
from .utils import dicts, i18n, loggers, misc, reflect from .utils import dicts, i18n, loggers, misc, reflect
@@ -294,8 +293,8 @@ class AdExtractor(WebScrapingMixin):
raw_description = (await self.web_text(By.ID, "viewad-description-text")).strip() raw_description = (await self.web_text(By.ID, "viewad-description-text")).strip()
# Get prefix and suffix from config # Get prefix and suffix from config
prefix = get_description_affixes(self.config, prefix = True) prefix = self.config.ad_defaults.description_prefix
suffix = get_description_affixes(self.config, prefix = False) suffix = self.config.ad_defaults.description_suffix
# Remove prefix and suffix if present # Remove prefix and suffix if present
description_text = raw_description description_text = raw_description
@@ -334,10 +333,12 @@ class AdExtractor(WebScrapingMixin):
info["created_on"] = creation_date info["created_on"] = creation_date
info["updated_on"] = None # will be set later on info["updated_on"] = None # will be set later on
# Calculate the initial hash for the downloaded ad ad_cfg = AdPartial.model_validate(info)
info["content_hash"] = calculate_content_hash(info)
return AdPartial.model_validate(info) # calculate the initial hash for the downloaded ad
ad_cfg.content_hash = ad_cfg.to_ad(self.config.ad_defaults).update_content_hash().content_hash
return ad_cfg
async def _extract_category_from_ad_page(self) -> str: async def _extract_category_from_ad_page(self) -> str:
""" """

View File

@@ -3,20 +3,28 @@
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
from __future__ import annotations from __future__ import annotations
import hashlib, json # isort: skip
from datetime import datetime # noqa: TC003 Move import into a type-checking block from datetime import datetime # noqa: TC003 Move import into a type-checking block
from typing import Any, Dict, Final, List, Literal from typing import Any, Dict, Final, List, Literal, Mapping, Sequence
from pydantic import Field, model_validator, validator from pydantic import Field, model_validator, validator
from typing_extensions import Self
from kleinanzeigen_bot.model.config_model import AdDefaults # noqa: TC001 Move application import into a type-checking block
from kleinanzeigen_bot.utils import dicts
from kleinanzeigen_bot.utils.misc import parse_datetime, parse_decimal from kleinanzeigen_bot.utils.misc import parse_datetime, parse_decimal
from kleinanzeigen_bot.utils.pydantics import ContextualModel from kleinanzeigen_bot.utils.pydantics import ContextualModel
MAX_DESCRIPTION_LENGTH:Final[int] = 4000 MAX_DESCRIPTION_LENGTH:Final[int] = 4000
def _iso_datetime_field() -> Any: def _OPTIONAL() -> Any:
return Field(default = None)
def _ISO_DATETIME(default:datetime | None = None) -> Any:
return Field( return Field(
default = None, default = default,
description = "ISO-8601 timestamp with optional timezone (e.g. 2024-12-25T00:00:00 or 2024-12-25T00:00:00Z)", description = "ISO-8601 timestamp with optional timezone (e.g. 2024-12-25T00:00:00 or 2024-12-25T00:00:00Z)",
json_schema_extra = { json_schema_extra = {
"anyOf": [ "anyOf": [
@@ -36,37 +44,37 @@ def _iso_datetime_field() -> Any:
class ContactPartial(ContextualModel): class ContactPartial(ContextualModel):
name:str | None = None name:str | None = _OPTIONAL()
street:str | None = None street:str | None = _OPTIONAL()
zipcode:int | str | None = None zipcode:int | str | None = _OPTIONAL()
location:str | None = None location:str | None = _OPTIONAL()
phone:str | None = None phone:str | None = _OPTIONAL()
class AdPartial(ContextualModel): class AdPartial(ContextualModel):
active:bool = True active:bool | None = _OPTIONAL()
type:Literal["OFFER", "WANTED"] = "OFFER" type:Literal["OFFER", "WANTED"] | None = _OPTIONAL()
title:str = Field(..., min_length = 10) title:str = Field(..., min_length = 10)
description:str description:str
description_prefix:str | None = None description_prefix:str | None = _OPTIONAL()
description_suffix:str | None = None description_suffix:str | None = _OPTIONAL()
category:str category:str
special_attributes:Dict[str, str] | None = Field(default = None) special_attributes:Dict[str, str] | None = _OPTIONAL()
price:int | None = None price:int | None = _OPTIONAL()
price_type:Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"] = "NEGOTIABLE" price_type:Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"] | None = _OPTIONAL()
shipping_type:Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"] = "SHIPPING" shipping_type:Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"] | None = _OPTIONAL()
shipping_costs:float | None = None shipping_costs:float | None = _OPTIONAL()
shipping_options:List[str] | None = Field(default = None) shipping_options:List[str] | None = _OPTIONAL()
sell_directly:bool | None = False sell_directly:bool | None = _OPTIONAL()
images:List[str] | None = Field(default = None) images:List[str] | None = _OPTIONAL()
contact:ContactPartial | None = None contact:ContactPartial | None = _OPTIONAL()
republication_interval:int = 7 republication_interval:int | None = _OPTIONAL()
id:int | None = None id:int | None = _OPTIONAL()
created_on:datetime | None = _iso_datetime_field() created_on:datetime | None = _ISO_DATETIME()
updated_on:datetime | None = _iso_datetime_field() updated_on:datetime | None = _ISO_DATETIME()
content_hash:str | None = None content_hash:str | None = _OPTIONAL()
@validator("created_on", "updated_on", pre = True) @validator("created_on", "updated_on", pre = True)
@classmethod @classmethod
@@ -105,11 +113,71 @@ class AdPartial(ContextualModel):
raise ValueError("shipping_options entries must be non-empty") raise ValueError("shipping_options entries must be non-empty")
return v return v
def update_content_hash(self) -> Self:
"""Calculate and updates the content_hash value for user-modifiable fields of the ad."""
# 1) Dump to a plain dict, excluding the metadata fields:
raw = self.model_dump(
exclude = {"id", "created_on", "updated_on", "content_hash"},
exclude_none = True,
exclude_unset = True,
)
# 2) Recursively prune any empty containers:
def prune(obj:Any) -> Any:
if isinstance(obj, Mapping):
return {
k: prune(v)
for k, v in obj.items()
# drop keys whose values are empty list/dict/set
if not (isinstance(v, (Mapping, Sequence, set)) and not isinstance(v, (str, bytes)) and len(v) == 0)
}
if isinstance(obj, Sequence) and not isinstance(obj, (str, bytes)):
return [
prune(v)
for v in obj
if not (isinstance(v, (Mapping, Sequence, set)) and not isinstance(v, (str, bytes)) and len(v) == 0)
]
return obj
pruned = prune(raw)
# 3) Produce a canonical JSON string and hash it:
json_string = json.dumps(pruned, sort_keys = True)
self.content_hash = hashlib.sha256(json_string.encode()).hexdigest()
return self
def to_ad(self, ad_defaults:AdDefaults) -> Ad:
"""
Returns a complete, validated Ad by merging this partial with values from ad_defaults.
Any field that is `None` or `""` is filled from `ad_defaults`.
Raises `ValidationError` when, after merging with `ad_defaults`, not all fields required by `Ad` are populated.
"""
ad_cfg = self.model_dump()
dicts.apply_defaults(
target = ad_cfg,
defaults = ad_defaults.model_dump(),
ignore = lambda k, _: k == "description", # ignore legacy global description config
override = lambda _, v: v in {None, ""} # noqa: PLC1901 can be simplified
)
return Ad.model_validate(ad_cfg)
# pyright: reportGeneralTypeIssues=false, reportIncompatibleVariableOverride=false
class Contact(ContactPartial): class Contact(ContactPartial):
name:str # pyright: ignore[reportGeneralTypeIssues, reportIncompatibleVariableOverride] name:str
zipcode:int | str # pyright: ignore[reportGeneralTypeIssues, reportIncompatibleVariableOverride] zipcode:int | str
# pyright: reportGeneralTypeIssues=false, reportIncompatibleVariableOverride=false
class Ad(AdPartial): class Ad(AdPartial):
contact:Contact # pyright: ignore[reportGeneralTypeIssues, reportIncompatibleVariableOverride] active:bool
type:Literal["OFFER", "WANTED"]
description:str
price_type:Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"]
shipping_type:Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"]
sell_directly:bool
contact:Contact
republication_interval:int

View File

@@ -4,12 +4,13 @@
from __future__ import annotations from __future__ import annotations
import copy import copy
from typing import Any, Dict, List, Literal from typing import Any, List, Literal
from pydantic import Field, model_validator, validator from pydantic import Field, model_validator, validator
from typing_extensions import deprecated from typing_extensions import deprecated
from kleinanzeigen_bot.utils import dicts from kleinanzeigen_bot.utils import dicts
from kleinanzeigen_bot.utils.misc import get_attr
from kleinanzeigen_bot.utils.pydantics import ContextualModel from kleinanzeigen_bot.utils.pydantics import ContextualModel
@@ -40,16 +41,17 @@ class AdDefaults(ContextualModel):
@model_validator(mode = "before") @model_validator(mode = "before")
@classmethod @classmethod
def unify_description(cls, values:Dict[str, Any]) -> Dict[str, Any]: def migrate_legacy_description(cls, values:dict[str, Any]) -> dict[str, Any]:
# Ensure flat prefix/suffix take precedence over deprecated nested "description" # Ensure flat prefix/suffix take precedence over deprecated nested "description"
desc = values.get("description") description_prefix = values.get("description_prefix")
flat_prefix = values.get("description_prefix") description_suffix = values.get("description_suffix")
flat_suffix = values.get("description_suffix") legacy_prefix = get_attr(values, "description.prefix")
legacy_suffix = get_attr(values, "description.suffix")
if not flat_prefix and isinstance(desc, dict) and desc.get("prefix") is not None: if not description_prefix and legacy_prefix is not None:
values["description_prefix"] = desc.get("prefix", "") values["description_prefix"] = legacy_prefix
if not flat_suffix and isinstance(desc, dict) and desc.get("suffix") is not None: if not description_suffix and legacy_suffix is not None:
values["description_suffix"] = desc.get("suffix", "") values["description_suffix"] = legacy_suffix
return values return values
@@ -115,7 +117,7 @@ if relative paths are specified, then they are relative to this configuration fi
description = "Default values for ads, can be overwritten in each ad configuration file" description = "Default values for ads, can be overwritten in each ad configuration file"
) )
categories:Dict[str, str] = Field(default_factory = dict, description = """ categories:dict[str, str] = Field(default_factory = dict, description = """
additional name to category ID mappings, see default list at additional name to category ID mappings, see default list at
https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml

View File

@@ -133,6 +133,7 @@ kleinanzeigen_bot/__init__.py:
run: run:
"DONE: No configuration errors found.": "FERTIG: Keine Konfigurationsfehler gefunden." "DONE: No configuration errors found.": "FERTIG: Keine Konfigurationsfehler gefunden."
"DONE: No active ads found.": "FERTIG: Keine aktiven Anzeigen gefunden."
"You provided no ads selector. Defaulting to \"due\".": "Es wurden keine Anzeigen-Selektor angegeben. Es wird \"due\" verwendet." "You provided no ads selector. Defaulting to \"due\".": "Es wurden keine Anzeigen-Selektor angegeben. Es wird \"due\" verwendet."
"DONE: No new/outdated ads found.": "FERTIG: Keine neuen/veralteten Anzeigen gefunden." "DONE: No new/outdated ads found.": "FERTIG: Keine neuen/veralteten Anzeigen gefunden."
"DONE: No ads to delete found.": "FERTIG: Keine zu löschnenden Anzeigen gefunden." "DONE: No ads to delete found.": "FERTIG: Keine zu löschnenden Anzeigen gefunden."
@@ -148,6 +149,11 @@ kleinanzeigen_bot/__init__.py:
__set_shipping_options: __set_shipping_options:
"Unable to close shipping dialog!": "Versanddialog konnte nicht geschlossen werden!" "Unable to close shipping dialog!": "Versanddialog konnte nicht geschlossen werden!"
update_content_hashes:
"DONE: Updated [content_hash] in %s": "FERTIG: [content_hash] in %s aktualisiert."
"Processing %s/%s: '%s' from [%s]...": "Verarbeite %s/%s: '%s' von [%s]..."
"ad": "Anzeige"
################################################# #################################################
kleinanzeigen_bot/extract.py: kleinanzeigen_bot/extract.py:
################################################# #################################################

View File

@@ -67,7 +67,7 @@ def configure_console_logging() -> None:
CRITICAL: colorama.Fore.MAGENTA, CRITICAL: colorama.Fore.MAGENTA,
} }
def _relativize_paths_under_cwd(self, record: logging.LogRecord) -> None: def _relativize_paths_under_cwd(self, record:logging.LogRecord) -> None:
""" """
Mutate record.args in-place, converting any absolute-path strings Mutate record.args in-place, converting any absolute-path strings
under the current working directory into relative paths. under the current working directory into relative paths.
@@ -78,7 +78,7 @@ def configure_console_logging() -> None:
cwd = os.getcwd() cwd = os.getcwd()
def _rel_if_subpath(val: Any) -> Any: def _rel_if_subpath(val:Any) -> Any:
if isinstance(val, str) and os.path.isabs(val): if isinstance(val, str) and os.path.isabs(val):
# don't relativize log-file paths # don't relativize log-file paths
if val.endswith(".log"): if val.endswith(".log"):

View File

@@ -0,0 +1,39 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
from kleinanzeigen_bot.model.ad_model import AdPartial
def test_update_content_hash() -> None:
minimal_ad_cfg = {
"id": "123456789",
"title": "Test Ad Title",
"category": "160",
"description": "Test Description",
}
minimal_ad_cfg_hash = "ae3defaccd6b41f379eb8de17263caa1bd306e35e74b11aa03a4738621e96ece"
assert AdPartial.model_validate(minimal_ad_cfg).update_content_hash().content_hash == minimal_ad_cfg_hash
assert AdPartial.model_validate(minimal_ad_cfg | {
"id": "123456789",
"created_on": "2025-05-08T09:34:03",
"updated_on": "2025-05-14T20:43:16",
"content_hash": "5753ead7cf42b0ace5fe658ecb930b3a8f57ef49bd52b7ea2d64b91b2c75517e"
}).update_content_hash().content_hash == minimal_ad_cfg_hash
assert AdPartial.model_validate(minimal_ad_cfg | {
"active": None,
"images": None,
"shipping_options": None,
"special_attributes": None,
"contact": None,
}).update_content_hash().content_hash == minimal_ad_cfg_hash
assert AdPartial.model_validate(minimal_ad_cfg | {
"active": True,
"images": [],
"shipping_options": [],
"special_attributes": {},
"contact": {},
}).update_content_hash().content_hash != minimal_ad_cfg_hash

View File

@@ -1,115 +0,0 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
from typing import Any
import pytest
from kleinanzeigen_bot import ads
from kleinanzeigen_bot.model.config_model import Config
def test_calculate_content_hash_with_none_values() -> None:
"""Test calculate_content_hash with None values in the ad configuration."""
ad_cfg = {
# Minimal configuration with None values as described in bug report
"id": "123456789",
"created_on": "2022-07-19T07:30:20.489289",
"updated_on": "2025-01-22T19:46:46.735896",
"title": "Test Ad",
"description": "Test Description",
"images": [None, "/path/to/image.jpg", None], # List containing None values
"shipping_options": None, # None instead of list
"special_attributes": None, # None instead of dictionary
"contact": {
"street": None # None value in contact
}
}
# Should not raise TypeError
hash_value = ads.calculate_content_hash(ad_cfg)
assert isinstance(hash_value, str)
assert len(hash_value) == 64 # SHA-256 hash is 64 characters long
@pytest.mark.parametrize(("config", "prefix", "expected"), [
# Test new flattened format - prefix
(
{"ad_defaults": {"description_prefix": "Hello"}},
True,
"Hello"
),
# Test new flattened format - suffix
(
{"ad_defaults": {"description_suffix": "Bye"}},
False,
"Bye"
),
# Test legacy nested format - prefix
(
{"ad_defaults": {"description": {"prefix": "Hi"}}},
True,
"Hi"
),
# Test legacy nested format - suffix
(
{"ad_defaults": {"description": {"suffix": "Ciao"}}},
False,
"Ciao"
),
# Test precedence (new format over legacy) - prefix
(
{
"ad_defaults": {
"description_prefix": "Hello",
"description": {"prefix": "Hi"}
}
},
True,
"Hello"
),
# Test precedence (new format over legacy) - suffix
(
{
"ad_defaults": {
"description_suffix": "Bye",
"description": {"suffix": "Ciao"}
}
},
False,
"Bye"
),
# Test empty config
(
{"ad_defaults": {}},
True,
""
),
# Test None values
(
{"ad_defaults": {"description_prefix": None, "description_suffix": None}},
True,
""
),
# Add test for malformed config
(
{}, # Empty config
True,
""
),
# Test for missing ad_defaults
(
{"some_other_key": {}},
True,
""
),
])
def test_get_description_affixes(
config:dict[str, Any],
prefix:bool,
expected:str,
test_bot_config:Config
) -> None:
"""Test get_description_affixes function with various inputs."""
result = ads.get_description_affixes(test_bot_config.with_values(config), prefix = prefix)
assert result == expected

View File

@@ -0,0 +1,62 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
from kleinanzeigen_bot.model.config_model import AdDefaults
def test_migrate_legacy_description_prefix() -> None:
assert AdDefaults.model_validate({
}).description_prefix is None
assert AdDefaults.model_validate({
"description_prefix": "Prefix"
}).description_prefix == "Prefix"
assert AdDefaults.model_validate({
"description_prefix": "Prefix",
"description": {
"prefix": "Legacy Prefix"
}
}).description_prefix == "Prefix"
assert AdDefaults.model_validate({
"description": {
"prefix": "Legacy Prefix"
}
}).description_prefix == "Legacy Prefix"
assert AdDefaults.model_validate({
"description_prefix": "",
"description": {
"prefix": "Legacy Prefix"
}
}).description_prefix == "Legacy Prefix"
def test_migrate_legacy_description_suffix() -> None:
assert AdDefaults.model_validate({
}).description_suffix is None
assert AdDefaults.model_validate({
"description_suffix": "Suffix"
}).description_suffix == "Suffix"
assert AdDefaults.model_validate({
"description_suffix": "Suffix",
"description": {
"suffix": "Legacy Suffix"
}
}).description_suffix == "Suffix"
assert AdDefaults.model_validate({
"description": {
"suffix": "Legacy Suffix"
}
}).description_suffix == "Legacy Suffix"
assert AdDefaults.model_validate({
"description_suffix": "",
"description": {
"suffix": "Legacy Suffix"
}
}).description_suffix == "Legacy Suffix"

View File

@@ -14,7 +14,6 @@ from pydantic import ValidationError
from kleinanzeigen_bot import LOG, KleinanzeigenBot, misc from kleinanzeigen_bot import LOG, KleinanzeigenBot, misc
from kleinanzeigen_bot._version import __version__ from kleinanzeigen_bot._version import __version__
from kleinanzeigen_bot.ads import calculate_content_hash
from kleinanzeigen_bot.model.ad_model import Ad from kleinanzeigen_bot.model.ad_model import Ad
from kleinanzeigen_bot.model.config_model import AdDefaults, Config, PublishingConfig from kleinanzeigen_bot.model.config_model import AdDefaults, Config, PublishingConfig
from kleinanzeigen_bot.utils import dicts, loggers from kleinanzeigen_bot.utils import dicts, loggers
@@ -844,7 +843,7 @@ class TestKleinanzeigenBotAdRepublication:
# Calculate hash before making the copy to ensure they match # Calculate hash before making the copy to ensure they match
ad_cfg_orig = ad_cfg.model_dump() ad_cfg_orig = ad_cfg.model_dump()
current_hash = calculate_content_hash(ad_cfg_orig) current_hash = ad_cfg.update_content_hash().content_hash
ad_cfg_orig["content_hash"] = current_hash ad_cfg_orig["content_hash"] = current_hash
# Mock the config to prevent actual file operations # Mock the config to prevent actual file operations
@@ -874,8 +873,8 @@ class TestKleinanzeigenBotShippingOptions:
}) })
# Create the original ad config and published ads list # Create the original ad config and published ads list
ad_cfg.update_content_hash() # Add content hash to prevent republication
ad_cfg_orig = ad_cfg.model_dump() ad_cfg_orig = ad_cfg.model_dump()
ad_cfg_orig["content_hash"] = calculate_content_hash(ad_cfg_orig) # Add content hash to prevent republication
published_ads:list[dict[str, Any]] = [] published_ads:list[dict[str, Any]] = []
# Set up default config values needed for the test # Set up default config values needed for the test
@@ -1155,7 +1154,7 @@ class TestKleinanzeigenBotChangedAds:
# Calculate hash for changed_ad and add it to the config # Calculate hash for changed_ad and add it to the config
# Then modify the ad to simulate a change # Then modify the ad to simulate a change
changed_ad = ad_cfg.model_dump() changed_ad = ad_cfg.model_dump()
changed_hash = calculate_content_hash(changed_ad) changed_hash = ad_cfg.update_content_hash().content_hash
changed_ad["content_hash"] = changed_hash changed_ad["content_hash"] = changed_hash
# Now modify the ad to make it "changed" # Now modify the ad to make it "changed"
changed_ad["title"] = "Changed Ad - Modified" changed_ad["title"] = "Changed Ad - Modified"