mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
feat: improve content_hash calculation
This commit is contained in:
committed by
Sebastian Thomschke
parent
f1cd597dd8
commit
85a5cf5224
@@ -226,6 +226,7 @@ ignore = [
|
|||||||
"E231", # Don't add whitespace after colon (:) on type declaration
|
"E231", # Don't add whitespace after colon (:) on type declaration
|
||||||
"E251", # Don't remove whitespace around parameter '=' sign.
|
"E251", # Don't remove whitespace around parameter '=' sign.
|
||||||
"E401", # Don't put imports on separate lines
|
"E401", # Don't put imports on separate lines
|
||||||
|
"FIX002", # Line contains TODO, consider resolving the issue
|
||||||
"PERF203", # `try`-`except` within a loop incurs performance overhead
|
"PERF203", # `try`-`except` within a loop incurs performance overhead
|
||||||
"RET504", # Unnecessary assignment to `...` before `return` statement
|
"RET504", # Unnecessary assignment to `...` before `return` statement
|
||||||
"PLR6301", # Method `...` could be a function, class method, or static method
|
"PLR6301", # Method `...` could be a function, class method, or static method
|
||||||
@@ -234,6 +235,8 @@ ignore = [
|
|||||||
"SIM105", # Use `contextlib.suppress(TimeoutError)` instead of `try`-`except`-`pass`
|
"SIM105", # Use `contextlib.suppress(TimeoutError)` instead of `try`-`except`-`pass`
|
||||||
"SIM114", # Combine `if` branches using logical `or` operator
|
"SIM114", # Combine `if` branches using logical `or` operator
|
||||||
"TC006", # Add quotes to type expression in `typing.cast()`
|
"TC006", # Add quotes to type expression in `typing.cast()`
|
||||||
|
"TD002", # Missing author in TODO
|
||||||
|
"TD003", # Missing issue link for this TODO
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.ruff.lint.per-file-ignores]
|
[tool.ruff.lint.per-file-ignores]
|
||||||
@@ -263,12 +266,12 @@ min-file-size = 256
|
|||||||
# https://pylint.pycqa.org/en/latest/user_guide/configuration/all-options.html#design-checker
|
# https://pylint.pycqa.org/en/latest/user_guide/configuration/all-options.html#design-checker
|
||||||
# https://pylint.pycqa.org/en/latest/user_guide/checkers/features.html#design-checker-messages
|
# https://pylint.pycqa.org/en/latest/user_guide/checkers/features.html#design-checker-messages
|
||||||
max-args = 6 # max. number of args for function / method (R0913)
|
max-args = 6 # max. number of args for function / method (R0913)
|
||||||
# max-attributes = 15 # max. number of instance attrs for a class (R0902)
|
# max-attributes = 15 # TODO max. number of instance attrs for a class (R0902)
|
||||||
max-branches = 40 # max. number of branch for function / method body (R0912)
|
max-branches = 40 # max. number of branch for function / method body (R0912)
|
||||||
max-locals = 30 # max. number of local vars for function / method body (R0914)
|
max-locals = 30 # max. number of local vars for function / method body (R0914)
|
||||||
max-returns = 15 # max. number of return / yield for function / method body (R0911)
|
max-returns = 15 # max. number of return / yield for function / method body (R0911)
|
||||||
max-statements = 150 # max. number of statements in function / method body (R0915)
|
max-statements = 150 # max. number of statements in function / method body (R0915)
|
||||||
max-public-methods = 20 # max. number of public methods for a class (R0904)
|
max-public-methods = 25 # max. number of public methods for a class (R0904)
|
||||||
# max-positional-arguments = 5 # max. number of positional args for function / method (R0917)
|
# max-positional-arguments = 5 # max. number of positional args for function / method (R0917)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -72,19 +72,33 @@
|
|||||||
},
|
},
|
||||||
"properties": {
|
"properties": {
|
||||||
"active": {
|
"active": {
|
||||||
"default": true,
|
"anyOf": [
|
||||||
"title": "Active",
|
{
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"default": null,
|
||||||
|
"title": "Active"
|
||||||
|
},
|
||||||
"type": {
|
"type": {
|
||||||
"default": "OFFER",
|
"anyOf": [
|
||||||
|
{
|
||||||
"enum": [
|
"enum": [
|
||||||
"OFFER",
|
"OFFER",
|
||||||
"WANTED"
|
"WANTED"
|
||||||
],
|
],
|
||||||
"title": "Type",
|
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"default": null,
|
||||||
|
"title": "Type"
|
||||||
|
},
|
||||||
"title": {
|
"title": {
|
||||||
"minLength": 10,
|
"minLength": 10,
|
||||||
"title": "Title",
|
"title": "Title",
|
||||||
@@ -150,26 +164,40 @@
|
|||||||
"title": "Price"
|
"title": "Price"
|
||||||
},
|
},
|
||||||
"price_type": {
|
"price_type": {
|
||||||
"default": "NEGOTIABLE",
|
"anyOf": [
|
||||||
|
{
|
||||||
"enum": [
|
"enum": [
|
||||||
"FIXED",
|
"FIXED",
|
||||||
"NEGOTIABLE",
|
"NEGOTIABLE",
|
||||||
"GIVE_AWAY",
|
"GIVE_AWAY",
|
||||||
"NOT_APPLICABLE"
|
"NOT_APPLICABLE"
|
||||||
],
|
],
|
||||||
"title": "Price Type",
|
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"default": null,
|
||||||
|
"title": "Price Type"
|
||||||
|
},
|
||||||
"shipping_type": {
|
"shipping_type": {
|
||||||
"default": "SHIPPING",
|
"anyOf": [
|
||||||
|
{
|
||||||
"enum": [
|
"enum": [
|
||||||
"PICKUP",
|
"PICKUP",
|
||||||
"SHIPPING",
|
"SHIPPING",
|
||||||
"NOT_APPLICABLE"
|
"NOT_APPLICABLE"
|
||||||
],
|
],
|
||||||
"title": "Shipping Type",
|
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"default": null,
|
||||||
|
"title": "Shipping Type"
|
||||||
|
},
|
||||||
"shipping_costs": {
|
"shipping_costs": {
|
||||||
"anyOf": [
|
"anyOf": [
|
||||||
{
|
{
|
||||||
@@ -206,7 +234,7 @@
|
|||||||
"type": "null"
|
"type": "null"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"default": false,
|
"default": null,
|
||||||
"title": "Sell Directly"
|
"title": "Sell Directly"
|
||||||
},
|
},
|
||||||
"images": {
|
"images": {
|
||||||
@@ -236,10 +264,17 @@
|
|||||||
"default": null
|
"default": null
|
||||||
},
|
},
|
||||||
"republication_interval": {
|
"republication_interval": {
|
||||||
"default": 7,
|
"anyOf": [
|
||||||
"title": "Republication Interval",
|
{
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"default": null,
|
||||||
|
"title": "Republication Interval"
|
||||||
|
},
|
||||||
"id": {
|
"id": {
|
||||||
"anyOf": [
|
"anyOf": [
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
import atexit, copy, json, os, re, signal, sys, textwrap # isort: skip
|
import atexit, json, os, re, signal, sys, textwrap # isort: skip
|
||||||
import getopt # pylint: disable=deprecated-module
|
import getopt # pylint: disable=deprecated-module
|
||||||
import urllib.parse as urllib_parse
|
import urllib.parse as urllib_parse
|
||||||
from gettext import gettext as _
|
from gettext import gettext as _
|
||||||
@@ -13,8 +13,7 @@ from wcmatch import glob
|
|||||||
|
|
||||||
from . import extract, resources
|
from . import extract, resources
|
||||||
from ._version import __version__
|
from ._version import __version__
|
||||||
from .ads import calculate_content_hash, get_description_affixes
|
from .model.ad_model import MAX_DESCRIPTION_LENGTH, Ad, AdPartial
|
||||||
from .model.ad_model import MAX_DESCRIPTION_LENGTH, Ad
|
|
||||||
from .model.config_model import Config
|
from .model.config_model import Config
|
||||||
from .utils import dicts, error_handlers, loggers, misc
|
from .utils import dicts, error_handlers, loggers, misc
|
||||||
from .utils.exceptions import CaptchaEncountered
|
from .utils.exceptions import CaptchaEncountered
|
||||||
@@ -80,6 +79,16 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
LOG.info("############################################")
|
LOG.info("############################################")
|
||||||
LOG.info("DONE: No configuration errors found.")
|
LOG.info("DONE: No configuration errors found.")
|
||||||
LOG.info("############################################")
|
LOG.info("############################################")
|
||||||
|
case "update-content-hash":
|
||||||
|
self.configure_file_logging()
|
||||||
|
self.load_config()
|
||||||
|
self.ads_selector = "all"
|
||||||
|
if ads := self.load_ads(exclude_ads_with_id = False):
|
||||||
|
self.update_content_hashes(ads)
|
||||||
|
else:
|
||||||
|
LOG.info("############################################")
|
||||||
|
LOG.info("DONE: No active ads found.")
|
||||||
|
LOG.info("############################################")
|
||||||
case "publish":
|
case "publish":
|
||||||
self.configure_file_logging()
|
self.configure_file_logging()
|
||||||
self.load_config()
|
self.load_config()
|
||||||
@@ -143,6 +152,9 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
verify - Überprüft die Konfigurationsdateien
|
verify - Überprüft die Konfigurationsdateien
|
||||||
delete - Löscht Anzeigen
|
delete - Löscht Anzeigen
|
||||||
download - Lädt eine oder mehrere Anzeigen herunter
|
download - Lädt eine oder mehrere Anzeigen herunter
|
||||||
|
update-content-hash - Berechnet den content_hash aller Anzeigen anhand der aktuellen ad_defaults neu;
|
||||||
|
nach Änderungen an den config.yaml/ad_defaults verhindert es, dass alle Anzeigen als
|
||||||
|
"geändert" gelten und neu veröffentlicht werden.
|
||||||
--
|
--
|
||||||
help - Zeigt diese Hilfe an (Standardbefehl)
|
help - Zeigt diese Hilfe an (Standardbefehl)
|
||||||
version - Zeigt die Version der Anwendung an
|
version - Zeigt die Version der Anwendung an
|
||||||
@@ -178,6 +190,8 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
verify - verifies the configuration files
|
verify - verifies the configuration files
|
||||||
delete - deletes ads
|
delete - deletes ads
|
||||||
download - downloads one or multiple ads
|
download - downloads one or multiple ads
|
||||||
|
update-content-hash – recalculates each ad’s content_hash based on the current ad_defaults;
|
||||||
|
use this after changing config.yaml/ad_defaults to avoid every ad being marked "changed" and republished
|
||||||
--
|
--
|
||||||
help - displays this help (default command)
|
help - displays this help (default command)
|
||||||
version - displays the application version
|
version - displays the application version
|
||||||
@@ -269,9 +283,10 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
def __check_ad_republication(self, ad_cfg:Ad, ad_file_relative:str) -> bool:
|
def __check_ad_republication(self, ad_cfg:Ad, ad_file_relative:str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if an ad needs to be republished based on republication interval.
|
Check if an ad needs to be republished based on republication interval.
|
||||||
Returns True if the ad should be republished based on the interval.
|
Note: This method does not check for content changes. Use __check_ad_changed for that.
|
||||||
|
|
||||||
Note: This method no longer checks for content changes. Use __check_ad_changed for that.
|
Returns:
|
||||||
|
True if the ad should be republished based on the interval.
|
||||||
"""
|
"""
|
||||||
if ad_cfg.updated_on:
|
if ad_cfg.updated_on:
|
||||||
last_updated_on = ad_cfg.updated_on
|
last_updated_on = ad_cfg.updated_on
|
||||||
@@ -299,14 +314,16 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
def __check_ad_changed(self, ad_cfg:Ad, ad_cfg_orig:dict[str, Any], ad_file_relative:str) -> bool:
|
def __check_ad_changed(self, ad_cfg:Ad, ad_cfg_orig:dict[str, Any], ad_file_relative:str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if an ad has been changed since last publication.
|
Check if an ad has been changed since last publication.
|
||||||
Returns True if the ad has been changed.
|
|
||||||
|
Returns:
|
||||||
|
True if the ad has been changed.
|
||||||
"""
|
"""
|
||||||
if not ad_cfg.id:
|
if not ad_cfg.id:
|
||||||
# New ads are not considered "changed"
|
# New ads are not considered "changed"
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Calculate hash on original config to match what was stored
|
# Calculate hash on original config to match what was stored
|
||||||
current_hash = calculate_content_hash(ad_cfg_orig)
|
current_hash = AdPartial.model_validate(ad_cfg_orig).update_content_hash().content_hash
|
||||||
stored_hash = ad_cfg_orig.get("content_hash")
|
stored_hash = ad_cfg_orig.get("content_hash")
|
||||||
|
|
||||||
LOG.debug("Hash comparison for [%s]:", ad_file_relative)
|
LOG.debug("Hash comparison for [%s]:", ad_file_relative)
|
||||||
@@ -321,7 +338,20 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def load_ads(self, *, ignore_inactive:bool = True, check_id:bool = True) -> list[tuple[str, Ad, dict[str, Any]]]:
|
def load_ads(self, *, ignore_inactive:bool = True, exclude_ads_with_id:bool = True) -> list[tuple[str, Ad, dict[str, Any]]]:
|
||||||
|
"""
|
||||||
|
Load and validate all ad config files, optionally filtering out inactive or already‐published ads.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ignore_inactive (bool):
|
||||||
|
Skip ads with `active=False`.
|
||||||
|
exclude_ads_with_id (bool):
|
||||||
|
Skip ads whose raw data already contains an `id`, i.e. was published before.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[tuple[str, Ad, dict[str, Any]]]:
|
||||||
|
Tuples of (file_path, validated Ad model, original raw data).
|
||||||
|
"""
|
||||||
LOG.info("Searching for ad config files...")
|
LOG.info("Searching for ad config files...")
|
||||||
|
|
||||||
ad_files:dict[str, str] = {}
|
ad_files:dict[str, str] = {}
|
||||||
@@ -366,9 +396,9 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
should_include = True
|
should_include = True
|
||||||
|
|
||||||
# Check for 'new' selector
|
# Check for 'new' selector
|
||||||
if "new" in selectors and (not ad_cfg.id or not check_id):
|
if "new" in selectors and (not ad_cfg.id or not exclude_ads_with_id):
|
||||||
should_include = True
|
should_include = True
|
||||||
elif "new" in selectors and ad_cfg.id and check_id:
|
elif "new" in selectors and ad_cfg.id and exclude_ads_with_id:
|
||||||
LOG.info(" -> SKIPPED: ad [%s] is not new. already has an id assigned.", ad_file_relative)
|
LOG.info(" -> SKIPPED: ad [%s] is not new. already has an id assigned.", ad_file_relative)
|
||||||
|
|
||||||
# Check for 'due' selector
|
# Check for 'due' selector
|
||||||
@@ -427,13 +457,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
return ads
|
return ads
|
||||||
|
|
||||||
def load_ad(self, ad_cfg_orig:dict[str, Any]) -> Ad:
|
def load_ad(self, ad_cfg_orig:dict[str, Any]) -> Ad:
|
||||||
ad_cfg_merged = dicts.apply_defaults(
|
return AdPartial.model_validate(ad_cfg_orig).to_ad(self.config.ad_defaults)
|
||||||
target = copy.deepcopy(ad_cfg_orig),
|
|
||||||
defaults = self.config.ad_defaults.model_dump(),
|
|
||||||
ignore = lambda k, _: k == "description",
|
|
||||||
override = lambda _, v: v == "" # noqa: PLC1901 can be simplified to `not v` as an empty string is falsey
|
|
||||||
)
|
|
||||||
return Ad.model_validate(ad_cfg_merged)
|
|
||||||
|
|
||||||
def load_config(self) -> None:
|
def load_config(self) -> None:
|
||||||
# write default config.yaml if config file does not exist
|
# write default config.yaml if config file does not exist
|
||||||
@@ -805,7 +829,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
|
|
||||||
# Update content hash after successful publication
|
# Update content hash after successful publication
|
||||||
# Calculate hash on original config to ensure consistent comparison on restart
|
# Calculate hash on original config to ensure consistent comparison on restart
|
||||||
ad_cfg_orig["content_hash"] = calculate_content_hash(ad_cfg_orig)
|
ad_cfg_orig["content_hash"] = AdPartial.model_validate(ad_cfg_orig).update_content_hash().content_hash
|
||||||
ad_cfg_orig["updated_on"] = misc.now().isoformat(timespec = "seconds")
|
ad_cfg_orig["updated_on"] = misc.now().isoformat(timespec = "seconds")
|
||||||
if not ad_cfg.created_on and not ad_cfg.id:
|
if not ad_cfg.created_on and not ad_cfg.id:
|
||||||
ad_cfg_orig["created_on"] = ad_cfg_orig["updated_on"]
|
ad_cfg_orig["created_on"] = ad_cfg_orig["updated_on"]
|
||||||
@@ -1052,7 +1076,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
elif self.ads_selector == "new": # download only unsaved ads
|
elif self.ads_selector == "new": # download only unsaved ads
|
||||||
# check which ads already saved
|
# check which ads already saved
|
||||||
saved_ad_ids = []
|
saved_ad_ids = []
|
||||||
ads = self.load_ads(ignore_inactive = False, check_id = False) # do not skip because of existing IDs
|
ads = self.load_ads(ignore_inactive = False, exclude_ads_with_id = False) # do not skip because of existing IDs
|
||||||
for ad in ads:
|
for ad in ads:
|
||||||
ad_id = int(ad[2]["id"])
|
ad_id = int(ad[2]["id"])
|
||||||
saved_ad_ids.append(ad_id)
|
saved_ad_ids.append(ad_id)
|
||||||
@@ -1111,7 +1135,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
# 1. Direct ad-level prefix
|
# 1. Direct ad-level prefix
|
||||||
ad_cfg.description_prefix if ad_cfg.description_prefix is not None
|
ad_cfg.description_prefix if ad_cfg.description_prefix is not None
|
||||||
# 2. Global prefix from config
|
# 2. Global prefix from config
|
||||||
else get_description_affixes(self.config, prefix = True)
|
else self.config.ad_defaults.description_prefix
|
||||||
or "" # Default to empty string if all sources are None
|
or "" # Default to empty string if all sources are None
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1120,7 +1144,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
# 1. Direct ad-level suffix
|
# 1. Direct ad-level suffix
|
||||||
ad_cfg.description_suffix if ad_cfg.description_suffix is not None
|
ad_cfg.description_suffix if ad_cfg.description_suffix is not None
|
||||||
# 2. Global suffix from config
|
# 2. Global suffix from config
|
||||||
else get_description_affixes(self.config, prefix = False)
|
else self.config.ad_defaults.description_suffix
|
||||||
or "" # Default to empty string if all sources are None
|
or "" # Default to empty string if all sources are None
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1137,6 +1161,21 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
|
|
||||||
return final_description
|
return final_description
|
||||||
|
|
||||||
|
def update_content_hashes(self, ads:list[tuple[str, Ad, dict[str, Any]]]) -> None:
|
||||||
|
count = 0
|
||||||
|
|
||||||
|
for (ad_file, ad_cfg, ad_cfg_orig) in ads:
|
||||||
|
LOG.info("Processing %s/%s: '%s' from [%s]...", count + 1, len(ads), ad_cfg.title, ad_file)
|
||||||
|
ad_cfg.update_content_hash()
|
||||||
|
if ad_cfg.content_hash != ad_cfg_orig["content_hash"]:
|
||||||
|
count += 1
|
||||||
|
ad_cfg_orig["content_hash"] = ad_cfg.content_hash
|
||||||
|
dicts.save_dict(ad_file, ad_cfg_orig)
|
||||||
|
|
||||||
|
LOG.info("############################################")
|
||||||
|
LOG.info("DONE: Updated [content_hash] in %s", pluralize("ad", count))
|
||||||
|
LOG.info("############################################")
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# main entry point
|
# main entry point
|
||||||
#############################
|
#############################
|
||||||
|
|||||||
@@ -1,84 +0,0 @@
|
|||||||
# SPDX-FileCopyrightText: © Jens Bergman and contributors
|
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
|
||||||
import hashlib, json, os # isort: skip
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from .model.config_model import Config
|
|
||||||
from .utils.misc import get_attr
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_content_hash(ad_cfg:dict[str, Any]) -> str:
|
|
||||||
"""Calculate a hash for user-modifiable fields of the ad."""
|
|
||||||
|
|
||||||
# Relevant fields for the hash
|
|
||||||
content = {
|
|
||||||
"active": bool(get_attr(ad_cfg, "active", default = True)), # Explicitly convert to bool
|
|
||||||
"type": str(get_attr(ad_cfg, "type", "")), # Explicitly convert to string
|
|
||||||
"title": str(get_attr(ad_cfg, "title", "")),
|
|
||||||
"description": str(get_attr(ad_cfg, "description", "")),
|
|
||||||
"category": str(get_attr(ad_cfg, "category", "")),
|
|
||||||
"price": str(get_attr(ad_cfg, "price", "")), # Price always as string
|
|
||||||
"price_type": str(get_attr(ad_cfg, "price_type", "")),
|
|
||||||
"special_attributes": dict(get_attr(ad_cfg, "special_attributes", {})), # Handle None case
|
|
||||||
"shipping_type": str(get_attr(ad_cfg, "shipping_type", "")),
|
|
||||||
"shipping_costs": str(get_attr(ad_cfg, "shipping_costs", "")),
|
|
||||||
"shipping_options": sorted([str(x) for x in get_attr(ad_cfg, "shipping_options", [])]), # Handle None case
|
|
||||||
"sell_directly": bool(get_attr(ad_cfg, "sell_directly", default = False)), # Explicitly convert to bool
|
|
||||||
"images": sorted([os.path.basename(str(img)) if img is not None else "" for img in get_attr(ad_cfg, "images", [])]), # Handle None values in images
|
|
||||||
"contact": {
|
|
||||||
"name": str(get_attr(ad_cfg, "contact.name", "")),
|
|
||||||
"street": str(get_attr(ad_cfg, "contact.street", "")), # Changed from "None" to empty string for consistency
|
|
||||||
"zipcode": str(get_attr(ad_cfg, "contact.zipcode", "")),
|
|
||||||
"phone": str(get_attr(ad_cfg, "contact.phone", ""))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Create sorted JSON string for consistent hashes
|
|
||||||
content_str = json.dumps(content, sort_keys = True)
|
|
||||||
return hashlib.sha256(content_str.encode()).hexdigest()
|
|
||||||
|
|
||||||
|
|
||||||
def get_description_affixes(config:Config, *, prefix:bool = True) -> str:
|
|
||||||
"""Get prefix or suffix for description with proper precedence.
|
|
||||||
|
|
||||||
This function handles both the new flattened format and legacy nested format:
|
|
||||||
|
|
||||||
New format (flattened):
|
|
||||||
ad_defaults:
|
|
||||||
description_prefix: "Global Prefix"
|
|
||||||
description_suffix: "Global Suffix"
|
|
||||||
|
|
||||||
Legacy format (nested):
|
|
||||||
ad_defaults:
|
|
||||||
description:
|
|
||||||
prefix: "Legacy Prefix"
|
|
||||||
suffix: "Legacy Suffix"
|
|
||||||
|
|
||||||
Args:
|
|
||||||
config: Configuration dictionary containing ad_defaults
|
|
||||||
prefix: If True, get prefix, otherwise get suffix
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The appropriate affix string, empty string if none found
|
|
||||||
|
|
||||||
Example:
|
|
||||||
>>> config = {"ad_defaults": {"description_prefix": "Hello", "description": {"prefix": "Hi"}}}
|
|
||||||
>>> get_description_affixes(Config.model_validate(config), prefix=True)
|
|
||||||
'Hello'
|
|
||||||
"""
|
|
||||||
affix_type = "prefix" if prefix else "suffix"
|
|
||||||
|
|
||||||
# First try new flattened format (description_prefix/description_suffix)
|
|
||||||
flattened_key = f"description_{affix_type}"
|
|
||||||
flattened_value = getattr(config.ad_defaults, flattened_key)
|
|
||||||
if isinstance(flattened_value, str):
|
|
||||||
return flattened_value
|
|
||||||
|
|
||||||
# Then try legacy nested format (description.prefix/description.suffix)
|
|
||||||
if config.ad_defaults.description:
|
|
||||||
nested_value = getattr(config.ad_defaults.description, affix_type)
|
|
||||||
if isinstance(nested_value, str):
|
|
||||||
return nested_value
|
|
||||||
|
|
||||||
return ""
|
|
||||||
@@ -8,7 +8,6 @@ from typing import Any, Final
|
|||||||
|
|
||||||
from kleinanzeigen_bot.model.ad_model import ContactPartial
|
from kleinanzeigen_bot.model.ad_model import ContactPartial
|
||||||
|
|
||||||
from .ads import calculate_content_hash, get_description_affixes
|
|
||||||
from .model.ad_model import AdPartial
|
from .model.ad_model import AdPartial
|
||||||
from .model.config_model import Config
|
from .model.config_model import Config
|
||||||
from .utils import dicts, i18n, loggers, misc, reflect
|
from .utils import dicts, i18n, loggers, misc, reflect
|
||||||
@@ -294,8 +293,8 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
raw_description = (await self.web_text(By.ID, "viewad-description-text")).strip()
|
raw_description = (await self.web_text(By.ID, "viewad-description-text")).strip()
|
||||||
|
|
||||||
# Get prefix and suffix from config
|
# Get prefix and suffix from config
|
||||||
prefix = get_description_affixes(self.config, prefix = True)
|
prefix = self.config.ad_defaults.description_prefix
|
||||||
suffix = get_description_affixes(self.config, prefix = False)
|
suffix = self.config.ad_defaults.description_suffix
|
||||||
|
|
||||||
# Remove prefix and suffix if present
|
# Remove prefix and suffix if present
|
||||||
description_text = raw_description
|
description_text = raw_description
|
||||||
@@ -334,10 +333,12 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
info["created_on"] = creation_date
|
info["created_on"] = creation_date
|
||||||
info["updated_on"] = None # will be set later on
|
info["updated_on"] = None # will be set later on
|
||||||
|
|
||||||
# Calculate the initial hash for the downloaded ad
|
ad_cfg = AdPartial.model_validate(info)
|
||||||
info["content_hash"] = calculate_content_hash(info)
|
|
||||||
|
|
||||||
return AdPartial.model_validate(info)
|
# calculate the initial hash for the downloaded ad
|
||||||
|
ad_cfg.content_hash = ad_cfg.to_ad(self.config.ad_defaults).update_content_hash().content_hash
|
||||||
|
|
||||||
|
return ad_cfg
|
||||||
|
|
||||||
async def _extract_category_from_ad_page(self) -> str:
|
async def _extract_category_from_ad_page(self) -> str:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -3,20 +3,28 @@
|
|||||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib, json # isort: skip
|
||||||
from datetime import datetime # noqa: TC003 Move import into a type-checking block
|
from datetime import datetime # noqa: TC003 Move import into a type-checking block
|
||||||
from typing import Any, Dict, Final, List, Literal
|
from typing import Any, Dict, Final, List, Literal, Mapping, Sequence
|
||||||
|
|
||||||
from pydantic import Field, model_validator, validator
|
from pydantic import Field, model_validator, validator
|
||||||
|
from typing_extensions import Self
|
||||||
|
|
||||||
|
from kleinanzeigen_bot.model.config_model import AdDefaults # noqa: TC001 Move application import into a type-checking block
|
||||||
|
from kleinanzeigen_bot.utils import dicts
|
||||||
from kleinanzeigen_bot.utils.misc import parse_datetime, parse_decimal
|
from kleinanzeigen_bot.utils.misc import parse_datetime, parse_decimal
|
||||||
from kleinanzeigen_bot.utils.pydantics import ContextualModel
|
from kleinanzeigen_bot.utils.pydantics import ContextualModel
|
||||||
|
|
||||||
MAX_DESCRIPTION_LENGTH:Final[int] = 4000
|
MAX_DESCRIPTION_LENGTH:Final[int] = 4000
|
||||||
|
|
||||||
|
|
||||||
def _iso_datetime_field() -> Any:
|
def _OPTIONAL() -> Any:
|
||||||
|
return Field(default = None)
|
||||||
|
|
||||||
|
|
||||||
|
def _ISO_DATETIME(default:datetime | None = None) -> Any:
|
||||||
return Field(
|
return Field(
|
||||||
default = None,
|
default = default,
|
||||||
description = "ISO-8601 timestamp with optional timezone (e.g. 2024-12-25T00:00:00 or 2024-12-25T00:00:00Z)",
|
description = "ISO-8601 timestamp with optional timezone (e.g. 2024-12-25T00:00:00 or 2024-12-25T00:00:00Z)",
|
||||||
json_schema_extra = {
|
json_schema_extra = {
|
||||||
"anyOf": [
|
"anyOf": [
|
||||||
@@ -36,37 +44,37 @@ def _iso_datetime_field() -> Any:
|
|||||||
|
|
||||||
|
|
||||||
class ContactPartial(ContextualModel):
|
class ContactPartial(ContextualModel):
|
||||||
name:str | None = None
|
name:str | None = _OPTIONAL()
|
||||||
street:str | None = None
|
street:str | None = _OPTIONAL()
|
||||||
zipcode:int | str | None = None
|
zipcode:int | str | None = _OPTIONAL()
|
||||||
location:str | None = None
|
location:str | None = _OPTIONAL()
|
||||||
|
|
||||||
phone:str | None = None
|
phone:str | None = _OPTIONAL()
|
||||||
|
|
||||||
|
|
||||||
class AdPartial(ContextualModel):
|
class AdPartial(ContextualModel):
|
||||||
active:bool = True
|
active:bool | None = _OPTIONAL()
|
||||||
type:Literal["OFFER", "WANTED"] = "OFFER"
|
type:Literal["OFFER", "WANTED"] | None = _OPTIONAL()
|
||||||
title:str = Field(..., min_length = 10)
|
title:str = Field(..., min_length = 10)
|
||||||
description:str
|
description:str
|
||||||
description_prefix:str | None = None
|
description_prefix:str | None = _OPTIONAL()
|
||||||
description_suffix:str | None = None
|
description_suffix:str | None = _OPTIONAL()
|
||||||
category:str
|
category:str
|
||||||
special_attributes:Dict[str, str] | None = Field(default = None)
|
special_attributes:Dict[str, str] | None = _OPTIONAL()
|
||||||
price:int | None = None
|
price:int | None = _OPTIONAL()
|
||||||
price_type:Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"] = "NEGOTIABLE"
|
price_type:Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"] | None = _OPTIONAL()
|
||||||
shipping_type:Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"] = "SHIPPING"
|
shipping_type:Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"] | None = _OPTIONAL()
|
||||||
shipping_costs:float | None = None
|
shipping_costs:float | None = _OPTIONAL()
|
||||||
shipping_options:List[str] | None = Field(default = None)
|
shipping_options:List[str] | None = _OPTIONAL()
|
||||||
sell_directly:bool | None = False
|
sell_directly:bool | None = _OPTIONAL()
|
||||||
images:List[str] | None = Field(default = None)
|
images:List[str] | None = _OPTIONAL()
|
||||||
contact:ContactPartial | None = None
|
contact:ContactPartial | None = _OPTIONAL()
|
||||||
republication_interval:int = 7
|
republication_interval:int | None = _OPTIONAL()
|
||||||
|
|
||||||
id:int | None = None
|
id:int | None = _OPTIONAL()
|
||||||
created_on:datetime | None = _iso_datetime_field()
|
created_on:datetime | None = _ISO_DATETIME()
|
||||||
updated_on:datetime | None = _iso_datetime_field()
|
updated_on:datetime | None = _ISO_DATETIME()
|
||||||
content_hash:str | None = None
|
content_hash:str | None = _OPTIONAL()
|
||||||
|
|
||||||
@validator("created_on", "updated_on", pre = True)
|
@validator("created_on", "updated_on", pre = True)
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -105,11 +113,71 @@ class AdPartial(ContextualModel):
|
|||||||
raise ValueError("shipping_options entries must be non-empty")
|
raise ValueError("shipping_options entries must be non-empty")
|
||||||
return v
|
return v
|
||||||
|
|
||||||
|
def update_content_hash(self) -> Self:
|
||||||
|
"""Calculate and updates the content_hash value for user-modifiable fields of the ad."""
|
||||||
|
|
||||||
|
# 1) Dump to a plain dict, excluding the metadata fields:
|
||||||
|
raw = self.model_dump(
|
||||||
|
exclude = {"id", "created_on", "updated_on", "content_hash"},
|
||||||
|
exclude_none = True,
|
||||||
|
exclude_unset = True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2) Recursively prune any empty containers:
|
||||||
|
def prune(obj:Any) -> Any:
|
||||||
|
if isinstance(obj, Mapping):
|
||||||
|
return {
|
||||||
|
k: prune(v)
|
||||||
|
for k, v in obj.items()
|
||||||
|
# drop keys whose values are empty list/dict/set
|
||||||
|
if not (isinstance(v, (Mapping, Sequence, set)) and not isinstance(v, (str, bytes)) and len(v) == 0)
|
||||||
|
}
|
||||||
|
if isinstance(obj, Sequence) and not isinstance(obj, (str, bytes)):
|
||||||
|
return [
|
||||||
|
prune(v)
|
||||||
|
for v in obj
|
||||||
|
if not (isinstance(v, (Mapping, Sequence, set)) and not isinstance(v, (str, bytes)) and len(v) == 0)
|
||||||
|
]
|
||||||
|
return obj
|
||||||
|
|
||||||
|
pruned = prune(raw)
|
||||||
|
|
||||||
|
# 3) Produce a canonical JSON string and hash it:
|
||||||
|
json_string = json.dumps(pruned, sort_keys = True)
|
||||||
|
self.content_hash = hashlib.sha256(json_string.encode()).hexdigest()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def to_ad(self, ad_defaults:AdDefaults) -> Ad:
|
||||||
|
"""
|
||||||
|
Returns a complete, validated Ad by merging this partial with values from ad_defaults.
|
||||||
|
|
||||||
|
Any field that is `None` or `""` is filled from `ad_defaults`.
|
||||||
|
|
||||||
|
Raises `ValidationError` when, after merging with `ad_defaults`, not all fields required by `Ad` are populated.
|
||||||
|
"""
|
||||||
|
ad_cfg = self.model_dump()
|
||||||
|
dicts.apply_defaults(
|
||||||
|
target = ad_cfg,
|
||||||
|
defaults = ad_defaults.model_dump(),
|
||||||
|
ignore = lambda k, _: k == "description", # ignore legacy global description config
|
||||||
|
override = lambda _, v: v in {None, ""} # noqa: PLC1901 can be simplified
|
||||||
|
)
|
||||||
|
return Ad.model_validate(ad_cfg)
|
||||||
|
|
||||||
|
|
||||||
|
# pyright: reportGeneralTypeIssues=false, reportIncompatibleVariableOverride=false
|
||||||
class Contact(ContactPartial):
|
class Contact(ContactPartial):
|
||||||
name:str # pyright: ignore[reportGeneralTypeIssues, reportIncompatibleVariableOverride]
|
name:str
|
||||||
zipcode:int | str # pyright: ignore[reportGeneralTypeIssues, reportIncompatibleVariableOverride]
|
zipcode:int | str
|
||||||
|
|
||||||
|
|
||||||
|
# pyright: reportGeneralTypeIssues=false, reportIncompatibleVariableOverride=false
|
||||||
class Ad(AdPartial):
|
class Ad(AdPartial):
|
||||||
contact:Contact # pyright: ignore[reportGeneralTypeIssues, reportIncompatibleVariableOverride]
|
active:bool
|
||||||
|
type:Literal["OFFER", "WANTED"]
|
||||||
|
description:str
|
||||||
|
price_type:Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"]
|
||||||
|
shipping_type:Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"]
|
||||||
|
sell_directly:bool
|
||||||
|
contact:Contact
|
||||||
|
republication_interval:int
|
||||||
|
|||||||
@@ -4,12 +4,13 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
from typing import Any, Dict, List, Literal
|
from typing import Any, List, Literal
|
||||||
|
|
||||||
from pydantic import Field, model_validator, validator
|
from pydantic import Field, model_validator, validator
|
||||||
from typing_extensions import deprecated
|
from typing_extensions import deprecated
|
||||||
|
|
||||||
from kleinanzeigen_bot.utils import dicts
|
from kleinanzeigen_bot.utils import dicts
|
||||||
|
from kleinanzeigen_bot.utils.misc import get_attr
|
||||||
from kleinanzeigen_bot.utils.pydantics import ContextualModel
|
from kleinanzeigen_bot.utils.pydantics import ContextualModel
|
||||||
|
|
||||||
|
|
||||||
@@ -40,16 +41,17 @@ class AdDefaults(ContextualModel):
|
|||||||
|
|
||||||
@model_validator(mode = "before")
|
@model_validator(mode = "before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def unify_description(cls, values:Dict[str, Any]) -> Dict[str, Any]:
|
def migrate_legacy_description(cls, values:dict[str, Any]) -> dict[str, Any]:
|
||||||
# Ensure flat prefix/suffix take precedence over deprecated nested "description"
|
# Ensure flat prefix/suffix take precedence over deprecated nested "description"
|
||||||
desc = values.get("description")
|
description_prefix = values.get("description_prefix")
|
||||||
flat_prefix = values.get("description_prefix")
|
description_suffix = values.get("description_suffix")
|
||||||
flat_suffix = values.get("description_suffix")
|
legacy_prefix = get_attr(values, "description.prefix")
|
||||||
|
legacy_suffix = get_attr(values, "description.suffix")
|
||||||
|
|
||||||
if not flat_prefix and isinstance(desc, dict) and desc.get("prefix") is not None:
|
if not description_prefix and legacy_prefix is not None:
|
||||||
values["description_prefix"] = desc.get("prefix", "")
|
values["description_prefix"] = legacy_prefix
|
||||||
if not flat_suffix and isinstance(desc, dict) and desc.get("suffix") is not None:
|
if not description_suffix and legacy_suffix is not None:
|
||||||
values["description_suffix"] = desc.get("suffix", "")
|
values["description_suffix"] = legacy_suffix
|
||||||
return values
|
return values
|
||||||
|
|
||||||
|
|
||||||
@@ -115,7 +117,7 @@ if relative paths are specified, then they are relative to this configuration fi
|
|||||||
description = "Default values for ads, can be overwritten in each ad configuration file"
|
description = "Default values for ads, can be overwritten in each ad configuration file"
|
||||||
)
|
)
|
||||||
|
|
||||||
categories:Dict[str, str] = Field(default_factory = dict, description = """
|
categories:dict[str, str] = Field(default_factory = dict, description = """
|
||||||
additional name to category ID mappings, see default list at
|
additional name to category ID mappings, see default list at
|
||||||
https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml
|
https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml
|
||||||
|
|
||||||
|
|||||||
@@ -133,6 +133,7 @@ kleinanzeigen_bot/__init__.py:
|
|||||||
|
|
||||||
run:
|
run:
|
||||||
"DONE: No configuration errors found.": "FERTIG: Keine Konfigurationsfehler gefunden."
|
"DONE: No configuration errors found.": "FERTIG: Keine Konfigurationsfehler gefunden."
|
||||||
|
"DONE: No active ads found.": "FERTIG: Keine aktiven Anzeigen gefunden."
|
||||||
"You provided no ads selector. Defaulting to \"due\".": "Es wurden keine Anzeigen-Selektor angegeben. Es wird \"due\" verwendet."
|
"You provided no ads selector. Defaulting to \"due\".": "Es wurden keine Anzeigen-Selektor angegeben. Es wird \"due\" verwendet."
|
||||||
"DONE: No new/outdated ads found.": "FERTIG: Keine neuen/veralteten Anzeigen gefunden."
|
"DONE: No new/outdated ads found.": "FERTIG: Keine neuen/veralteten Anzeigen gefunden."
|
||||||
"DONE: No ads to delete found.": "FERTIG: Keine zu löschnenden Anzeigen gefunden."
|
"DONE: No ads to delete found.": "FERTIG: Keine zu löschnenden Anzeigen gefunden."
|
||||||
@@ -148,6 +149,11 @@ kleinanzeigen_bot/__init__.py:
|
|||||||
__set_shipping_options:
|
__set_shipping_options:
|
||||||
"Unable to close shipping dialog!": "Versanddialog konnte nicht geschlossen werden!"
|
"Unable to close shipping dialog!": "Versanddialog konnte nicht geschlossen werden!"
|
||||||
|
|
||||||
|
update_content_hashes:
|
||||||
|
"DONE: Updated [content_hash] in %s": "FERTIG: [content_hash] in %s aktualisiert."
|
||||||
|
"Processing %s/%s: '%s' from [%s]...": "Verarbeite %s/%s: '%s' von [%s]..."
|
||||||
|
"ad": "Anzeige"
|
||||||
|
|
||||||
#################################################
|
#################################################
|
||||||
kleinanzeigen_bot/extract.py:
|
kleinanzeigen_bot/extract.py:
|
||||||
#################################################
|
#################################################
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ def configure_console_logging() -> None:
|
|||||||
CRITICAL: colorama.Fore.MAGENTA,
|
CRITICAL: colorama.Fore.MAGENTA,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _relativize_paths_under_cwd(self, record: logging.LogRecord) -> None:
|
def _relativize_paths_under_cwd(self, record:logging.LogRecord) -> None:
|
||||||
"""
|
"""
|
||||||
Mutate record.args in-place, converting any absolute-path strings
|
Mutate record.args in-place, converting any absolute-path strings
|
||||||
under the current working directory into relative paths.
|
under the current working directory into relative paths.
|
||||||
@@ -78,7 +78,7 @@ def configure_console_logging() -> None:
|
|||||||
|
|
||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
|
|
||||||
def _rel_if_subpath(val: Any) -> Any:
|
def _rel_if_subpath(val:Any) -> Any:
|
||||||
if isinstance(val, str) and os.path.isabs(val):
|
if isinstance(val, str) and os.path.isabs(val):
|
||||||
# don't relativize log-file paths
|
# don't relativize log-file paths
|
||||||
if val.endswith(".log"):
|
if val.endswith(".log"):
|
||||||
|
|||||||
39
tests/unit/test_ad_model.py
Normal file
39
tests/unit/test_ad_model.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
|
from kleinanzeigen_bot.model.ad_model import AdPartial
|
||||||
|
|
||||||
|
|
||||||
|
def test_update_content_hash() -> None:
|
||||||
|
minimal_ad_cfg = {
|
||||||
|
"id": "123456789",
|
||||||
|
"title": "Test Ad Title",
|
||||||
|
"category": "160",
|
||||||
|
"description": "Test Description",
|
||||||
|
}
|
||||||
|
minimal_ad_cfg_hash = "ae3defaccd6b41f379eb8de17263caa1bd306e35e74b11aa03a4738621e96ece"
|
||||||
|
|
||||||
|
assert AdPartial.model_validate(minimal_ad_cfg).update_content_hash().content_hash == minimal_ad_cfg_hash
|
||||||
|
|
||||||
|
assert AdPartial.model_validate(minimal_ad_cfg | {
|
||||||
|
"id": "123456789",
|
||||||
|
"created_on": "2025-05-08T09:34:03",
|
||||||
|
"updated_on": "2025-05-14T20:43:16",
|
||||||
|
"content_hash": "5753ead7cf42b0ace5fe658ecb930b3a8f57ef49bd52b7ea2d64b91b2c75517e"
|
||||||
|
}).update_content_hash().content_hash == minimal_ad_cfg_hash
|
||||||
|
|
||||||
|
assert AdPartial.model_validate(minimal_ad_cfg | {
|
||||||
|
"active": None,
|
||||||
|
"images": None,
|
||||||
|
"shipping_options": None,
|
||||||
|
"special_attributes": None,
|
||||||
|
"contact": None,
|
||||||
|
}).update_content_hash().content_hash == minimal_ad_cfg_hash
|
||||||
|
|
||||||
|
assert AdPartial.model_validate(minimal_ad_cfg | {
|
||||||
|
"active": True,
|
||||||
|
"images": [],
|
||||||
|
"shipping_options": [],
|
||||||
|
"special_attributes": {},
|
||||||
|
"contact": {},
|
||||||
|
}).update_content_hash().content_hash != minimal_ad_cfg_hash
|
||||||
@@ -1,115 +0,0 @@
|
|||||||
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from kleinanzeigen_bot import ads
|
|
||||||
from kleinanzeigen_bot.model.config_model import Config
|
|
||||||
|
|
||||||
|
|
||||||
def test_calculate_content_hash_with_none_values() -> None:
|
|
||||||
"""Test calculate_content_hash with None values in the ad configuration."""
|
|
||||||
ad_cfg = {
|
|
||||||
# Minimal configuration with None values as described in bug report
|
|
||||||
"id": "123456789",
|
|
||||||
"created_on": "2022-07-19T07:30:20.489289",
|
|
||||||
"updated_on": "2025-01-22T19:46:46.735896",
|
|
||||||
"title": "Test Ad",
|
|
||||||
"description": "Test Description",
|
|
||||||
"images": [None, "/path/to/image.jpg", None], # List containing None values
|
|
||||||
"shipping_options": None, # None instead of list
|
|
||||||
"special_attributes": None, # None instead of dictionary
|
|
||||||
"contact": {
|
|
||||||
"street": None # None value in contact
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Should not raise TypeError
|
|
||||||
hash_value = ads.calculate_content_hash(ad_cfg)
|
|
||||||
assert isinstance(hash_value, str)
|
|
||||||
assert len(hash_value) == 64 # SHA-256 hash is 64 characters long
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(("config", "prefix", "expected"), [
|
|
||||||
# Test new flattened format - prefix
|
|
||||||
(
|
|
||||||
{"ad_defaults": {"description_prefix": "Hello"}},
|
|
||||||
True,
|
|
||||||
"Hello"
|
|
||||||
),
|
|
||||||
# Test new flattened format - suffix
|
|
||||||
(
|
|
||||||
{"ad_defaults": {"description_suffix": "Bye"}},
|
|
||||||
False,
|
|
||||||
"Bye"
|
|
||||||
),
|
|
||||||
# Test legacy nested format - prefix
|
|
||||||
(
|
|
||||||
{"ad_defaults": {"description": {"prefix": "Hi"}}},
|
|
||||||
True,
|
|
||||||
"Hi"
|
|
||||||
),
|
|
||||||
# Test legacy nested format - suffix
|
|
||||||
(
|
|
||||||
{"ad_defaults": {"description": {"suffix": "Ciao"}}},
|
|
||||||
False,
|
|
||||||
"Ciao"
|
|
||||||
),
|
|
||||||
# Test precedence (new format over legacy) - prefix
|
|
||||||
(
|
|
||||||
{
|
|
||||||
"ad_defaults": {
|
|
||||||
"description_prefix": "Hello",
|
|
||||||
"description": {"prefix": "Hi"}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
True,
|
|
||||||
"Hello"
|
|
||||||
),
|
|
||||||
# Test precedence (new format over legacy) - suffix
|
|
||||||
(
|
|
||||||
{
|
|
||||||
"ad_defaults": {
|
|
||||||
"description_suffix": "Bye",
|
|
||||||
"description": {"suffix": "Ciao"}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
False,
|
|
||||||
"Bye"
|
|
||||||
),
|
|
||||||
# Test empty config
|
|
||||||
(
|
|
||||||
{"ad_defaults": {}},
|
|
||||||
True,
|
|
||||||
""
|
|
||||||
),
|
|
||||||
# Test None values
|
|
||||||
(
|
|
||||||
{"ad_defaults": {"description_prefix": None, "description_suffix": None}},
|
|
||||||
True,
|
|
||||||
""
|
|
||||||
),
|
|
||||||
# Add test for malformed config
|
|
||||||
(
|
|
||||||
{}, # Empty config
|
|
||||||
True,
|
|
||||||
""
|
|
||||||
),
|
|
||||||
# Test for missing ad_defaults
|
|
||||||
(
|
|
||||||
{"some_other_key": {}},
|
|
||||||
True,
|
|
||||||
""
|
|
||||||
),
|
|
||||||
])
|
|
||||||
def test_get_description_affixes(
|
|
||||||
config:dict[str, Any],
|
|
||||||
prefix:bool,
|
|
||||||
expected:str,
|
|
||||||
test_bot_config:Config
|
|
||||||
) -> None:
|
|
||||||
"""Test get_description_affixes function with various inputs."""
|
|
||||||
result = ads.get_description_affixes(test_bot_config.with_values(config), prefix = prefix)
|
|
||||||
assert result == expected
|
|
||||||
62
tests/unit/test_config_model.py
Normal file
62
tests/unit/test_config_model.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
|
from kleinanzeigen_bot.model.config_model import AdDefaults
|
||||||
|
|
||||||
|
|
||||||
|
def test_migrate_legacy_description_prefix() -> None:
|
||||||
|
assert AdDefaults.model_validate({
|
||||||
|
}).description_prefix is None
|
||||||
|
|
||||||
|
assert AdDefaults.model_validate({
|
||||||
|
"description_prefix": "Prefix"
|
||||||
|
}).description_prefix == "Prefix"
|
||||||
|
|
||||||
|
assert AdDefaults.model_validate({
|
||||||
|
"description_prefix": "Prefix",
|
||||||
|
"description": {
|
||||||
|
"prefix": "Legacy Prefix"
|
||||||
|
}
|
||||||
|
}).description_prefix == "Prefix"
|
||||||
|
|
||||||
|
assert AdDefaults.model_validate({
|
||||||
|
"description": {
|
||||||
|
"prefix": "Legacy Prefix"
|
||||||
|
}
|
||||||
|
}).description_prefix == "Legacy Prefix"
|
||||||
|
|
||||||
|
assert AdDefaults.model_validate({
|
||||||
|
"description_prefix": "",
|
||||||
|
"description": {
|
||||||
|
"prefix": "Legacy Prefix"
|
||||||
|
}
|
||||||
|
}).description_prefix == "Legacy Prefix"
|
||||||
|
|
||||||
|
|
||||||
|
def test_migrate_legacy_description_suffix() -> None:
|
||||||
|
assert AdDefaults.model_validate({
|
||||||
|
}).description_suffix is None
|
||||||
|
|
||||||
|
assert AdDefaults.model_validate({
|
||||||
|
"description_suffix": "Suffix"
|
||||||
|
}).description_suffix == "Suffix"
|
||||||
|
|
||||||
|
assert AdDefaults.model_validate({
|
||||||
|
"description_suffix": "Suffix",
|
||||||
|
"description": {
|
||||||
|
"suffix": "Legacy Suffix"
|
||||||
|
}
|
||||||
|
}).description_suffix == "Suffix"
|
||||||
|
|
||||||
|
assert AdDefaults.model_validate({
|
||||||
|
"description": {
|
||||||
|
"suffix": "Legacy Suffix"
|
||||||
|
}
|
||||||
|
}).description_suffix == "Legacy Suffix"
|
||||||
|
|
||||||
|
assert AdDefaults.model_validate({
|
||||||
|
"description_suffix": "",
|
||||||
|
"description": {
|
||||||
|
"suffix": "Legacy Suffix"
|
||||||
|
}
|
||||||
|
}).description_suffix == "Legacy Suffix"
|
||||||
@@ -14,7 +14,6 @@ from pydantic import ValidationError
|
|||||||
|
|
||||||
from kleinanzeigen_bot import LOG, KleinanzeigenBot, misc
|
from kleinanzeigen_bot import LOG, KleinanzeigenBot, misc
|
||||||
from kleinanzeigen_bot._version import __version__
|
from kleinanzeigen_bot._version import __version__
|
||||||
from kleinanzeigen_bot.ads import calculate_content_hash
|
|
||||||
from kleinanzeigen_bot.model.ad_model import Ad
|
from kleinanzeigen_bot.model.ad_model import Ad
|
||||||
from kleinanzeigen_bot.model.config_model import AdDefaults, Config, PublishingConfig
|
from kleinanzeigen_bot.model.config_model import AdDefaults, Config, PublishingConfig
|
||||||
from kleinanzeigen_bot.utils import dicts, loggers
|
from kleinanzeigen_bot.utils import dicts, loggers
|
||||||
@@ -844,7 +843,7 @@ class TestKleinanzeigenBotAdRepublication:
|
|||||||
|
|
||||||
# Calculate hash before making the copy to ensure they match
|
# Calculate hash before making the copy to ensure they match
|
||||||
ad_cfg_orig = ad_cfg.model_dump()
|
ad_cfg_orig = ad_cfg.model_dump()
|
||||||
current_hash = calculate_content_hash(ad_cfg_orig)
|
current_hash = ad_cfg.update_content_hash().content_hash
|
||||||
ad_cfg_orig["content_hash"] = current_hash
|
ad_cfg_orig["content_hash"] = current_hash
|
||||||
|
|
||||||
# Mock the config to prevent actual file operations
|
# Mock the config to prevent actual file operations
|
||||||
@@ -874,8 +873,8 @@ class TestKleinanzeigenBotShippingOptions:
|
|||||||
})
|
})
|
||||||
|
|
||||||
# Create the original ad config and published ads list
|
# Create the original ad config and published ads list
|
||||||
|
ad_cfg.update_content_hash() # Add content hash to prevent republication
|
||||||
ad_cfg_orig = ad_cfg.model_dump()
|
ad_cfg_orig = ad_cfg.model_dump()
|
||||||
ad_cfg_orig["content_hash"] = calculate_content_hash(ad_cfg_orig) # Add content hash to prevent republication
|
|
||||||
published_ads:list[dict[str, Any]] = []
|
published_ads:list[dict[str, Any]] = []
|
||||||
|
|
||||||
# Set up default config values needed for the test
|
# Set up default config values needed for the test
|
||||||
@@ -1155,7 +1154,7 @@ class TestKleinanzeigenBotChangedAds:
|
|||||||
# Calculate hash for changed_ad and add it to the config
|
# Calculate hash for changed_ad and add it to the config
|
||||||
# Then modify the ad to simulate a change
|
# Then modify the ad to simulate a change
|
||||||
changed_ad = ad_cfg.model_dump()
|
changed_ad = ad_cfg.model_dump()
|
||||||
changed_hash = calculate_content_hash(changed_ad)
|
changed_hash = ad_cfg.update_content_hash().content_hash
|
||||||
changed_ad["content_hash"] = changed_hash
|
changed_ad["content_hash"] = changed_hash
|
||||||
# Now modify the ad to make it "changed"
|
# Now modify the ad to make it "changed"
|
||||||
changed_ad["title"] = "Changed Ad - Modified"
|
changed_ad["title"] = "Changed Ad - Modified"
|
||||||
|
|||||||
Reference in New Issue
Block a user