From 6ede14596dfeef352294450fed620953caa223e1 Mon Sep 17 00:00:00 2001 From: sebthom Date: Thu, 15 May 2025 00:10:45 +0200 Subject: [PATCH] feat: add type safe Ad model --- .github/workflows/build.yml | 6 +- README.md | 1 + pyproject.toml | 6 +- schemas/ad.schema.json | 304 ++++++++++++++++ scripts/generate_schemas.py | 7 +- src/kleinanzeigen_bot/__init__.py | 319 ++++++++--------- src/kleinanzeigen_bot/ads.py | 39 +-- src/kleinanzeigen_bot/extract.py | 21 +- src/kleinanzeigen_bot/model/ad_model.py | 115 +++++++ .../resources/ad_fields.yaml | 24 -- src/kleinanzeigen_bot/utils/dicts.py | 7 +- src/kleinanzeigen_bot/utils/misc.py | 61 +++- tests/conftest.py | 3 +- tests/unit/test_extract.py | 39 ++- tests/unit/test_init.py | 324 +++++++----------- 15 files changed, 817 insertions(+), 459 deletions(-) create mode 100644 schemas/ad.schema.json create mode 100644 src/kleinanzeigen_bot/model/ad_model.py delete mode 100644 src/kleinanzeigen_bot/resources/ad_fields.yaml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f5fe355..e073222 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -153,7 +153,7 @@ jobs: - name: Run unit tests - run: pdm run utest:cov --cov=src/kleinanzeigen_bot + run: pdm run utest:cov -vv --cov=src/kleinanzeigen_bot - name: Run integration tests @@ -163,9 +163,9 @@ jobs: case "${{ matrix.os }}" in ubuntu-*) sudo apt-get install --no-install-recommends -y xvfb - xvfb-run pdm run itest:cov + xvfb-run pdm run itest:cov -vv ;; - *) pdm run itest:cov + *) pdm run itest:cov -vv ;; esac diff --git a/README.md b/README.md index ea283ab..20496e2 100644 --- a/README.md +++ b/README.md @@ -305,6 +305,7 @@ Parameter values specified in the `ad_defaults` section of the `config.yaml` fil The following parameters can be configured: ```yaml +# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/ad.schema.json active: # true or false (default: true) type: # one of: OFFER, WANTED (default: OFFER) title: diff --git a/pyproject.toml b/pyproject.toml index eb10e86..8b7b852 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,9 +104,9 @@ lint = { composite = ["lint:ruff", "lint:mypy", "lint:pyright"] } "lint:fix" = {shell = "ruff check --preview --fix" } # tests -test = "python -m pytest --capture=tee-sys -v" -utest = "python -m pytest --capture=tee-sys -v -m 'not itest'" -itest = "python -m pytest --capture=tee-sys -v -m 'itest'" +test = "python -m pytest --capture=tee-sys" +utest = "python -m pytest --capture=tee-sys -m 'not itest'" +itest = "python -m pytest --capture=tee-sys -m 'itest'" "test:cov" = { composite = ["test --cov=src/kleinanzeigen_bot"] } "utest:cov" = { composite = ["utest --cov=src/kleinanzeigen_bot"] } "itest:cov" = { composite = ["itest --cov=src/kleinanzeigen_bot"] } diff --git a/schemas/ad.schema.json b/schemas/ad.schema.json new file mode 100644 index 0000000..9e5b3d7 --- /dev/null +++ b/schemas/ad.schema.json @@ -0,0 +1,304 @@ +{ + "$defs": { + "ContactPartial": { + "properties": { + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Name" + }, + "street": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Street" + }, + "zipcode": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Zipcode" + }, + "location": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Location" + }, + "phone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Phone" + } + }, + "title": "ContactPartial", + "type": "object" + } + }, + "properties": { + "active": { + "default": true, + "title": "Active", + "type": "boolean" + }, + "type": { + "default": "OFFER", + "enum": [ + "OFFER", + "WANTED" + ], + "title": "Type", + "type": "string" + }, + "title": { + "minLength": 10, + "title": "Title", + "type": "string" + }, + "description": { + "title": "Description", + "type": "string" + }, + "description_prefix": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description Prefix" + }, + "description_suffix": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description Suffix" + }, + "category": { + "title": "Category", + "type": "string" + }, + "special_attributes": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Special Attributes" + }, + "price": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Price" + }, + "price_type": { + "default": "NEGOTIABLE", + "enum": [ + "FIXED", + "NEGOTIABLE", + "GIVE_AWAY", + "NOT_APPLICABLE" + ], + "title": "Price Type", + "type": "string" + }, + "shipping_type": { + "default": "SHIPPING", + "enum": [ + "PICKUP", + "SHIPPING", + "NOT_APPLICABLE" + ], + "title": "Shipping Type", + "type": "string" + }, + "shipping_costs": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Shipping Costs" + }, + "shipping_options": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Shipping Options" + }, + "sell_directly": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": false, + "title": "Sell Directly" + }, + "images": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Images" + }, + "contact": { + "anyOf": [ + { + "$ref": "#/$defs/ContactPartial" + }, + { + "type": "null" + } + ], + "default": null + }, + "republication_interval": { + "default": 7, + "title": "Republication Interval", + "type": "integer" + }, + "id": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Id" + }, + "created_on": { + "anyOf": [ + { + "type": "null" + }, + { + "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{1,6})?(?:Z|[+-]\\d{2}:\\d{2})?$", + "type": "string" + } + ], + "default": null, + "description": "ISO-8601 timestamp with optional timezone (e.g. 2024-12-25T00:00:00 or 2024-12-25T00:00:00Z)", + "title": "Created On" + }, + "updated_on": { + "anyOf": [ + { + "type": "null" + }, + { + "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{1,6})?(?:Z|[+-]\\d{2}:\\d{2})?$", + "type": "string" + } + ], + "default": null, + "description": "ISO-8601 timestamp with optional timezone (e.g. 2024-12-25T00:00:00 or 2024-12-25T00:00:00Z)", + "title": "Updated On" + }, + "content_hash": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Content Hash" + } + }, + "required": [ + "title", + "description", + "category" + ], + "title": "AdPartial", + "type": "object", + "description": "Auto-generated JSON Schema for Ad" +} diff --git a/scripts/generate_schemas.py b/scripts/generate_schemas.py index 5aa7e55..889a819 100644 --- a/scripts/generate_schemas.py +++ b/scripts/generate_schemas.py @@ -7,14 +7,14 @@ from typing import Type from pydantic import BaseModel +from kleinanzeigen_bot.model.ad_model import AdPartial from kleinanzeigen_bot.model.config_model import Config -def generate_schema(model:Type[BaseModel], out_dir:Path) -> None: +def generate_schema(model:Type[BaseModel], name:str, out_dir:Path) -> None: """ Generate and write JSON schema for the given model. """ - name = model.__name__ print(f"[+] Generating schema for model [{name}]...") # Create JSON Schema dict @@ -35,5 +35,6 @@ out_dir = project_root / "schemas" out_dir.mkdir(parents = True, exist_ok = True) print(f"Generating schemas in: {out_dir.resolve()}") -generate_schema(Config, out_dir) +generate_schema(Config, "Config", out_dir) +generate_schema(AdPartial, "Ad", out_dir) print("All schemas generated successfully.") diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py index 45f776b..5133cba 100644 --- a/src/kleinanzeigen_bot/__init__.py +++ b/src/kleinanzeigen_bot/__init__.py @@ -4,7 +4,6 @@ import atexit, copy, json, os, re, signal, sys, textwrap # isort: skip import getopt # pylint: disable=deprecated-module import urllib.parse as urllib_parse -from collections.abc import Iterable from gettext import gettext as _ from typing import Any, Final @@ -14,13 +13,14 @@ from wcmatch import glob from . import extract, resources from ._version import __version__ -from .ads import MAX_DESCRIPTION_LENGTH, calculate_content_hash, get_description_affixes +from .ads import calculate_content_hash, get_description_affixes +from .model.ad_model import MAX_DESCRIPTION_LENGTH, Ad from .model.config_model import Config from .utils import dicts, error_handlers, loggers, misc from .utils.exceptions import CaptchaEncountered from .utils.files import abspath from .utils.i18n import Locale, get_current_locale, pluralize, set_current_locale -from .utils.misc import ainput, ensure, is_frozen, parse_datetime, parse_decimal +from .utils.misc import ainput, ensure, is_frozen from .utils.web_scraping_mixin import By, Element, Is, WebScrapingMixin # W0406: possibly a bug, see https://github.com/PyCQA/pylint/issues/3933 @@ -266,17 +266,17 @@ class KleinanzeigenBot(WebScrapingMixin): LOG.info("App version: %s", self.get_version()) LOG.info("Python version: %s", sys.version) - def __check_ad_republication(self, ad_cfg:dict[str, Any], ad_file_relative:str) -> bool: + def __check_ad_republication(self, ad_cfg:Ad, ad_file_relative:str) -> bool: """ Check if an ad needs to be republished based on republication interval. Returns True if the ad should be republished based on the interval. Note: This method no longer checks for content changes. Use __check_ad_changed for that. """ - if ad_cfg["updated_on"]: - last_updated_on = parse_datetime(ad_cfg["updated_on"]) - elif ad_cfg["created_on"]: - last_updated_on = parse_datetime(ad_cfg["created_on"]) + if ad_cfg.updated_on: + last_updated_on = ad_cfg.updated_on + elif ad_cfg.created_on: + last_updated_on = ad_cfg.created_on else: return True @@ -285,23 +285,23 @@ class KleinanzeigenBot(WebScrapingMixin): # Check republication interval ad_age = misc.now() - last_updated_on - if ad_age.days <= ad_cfg["republication_interval"]: + if ad_age.days <= ad_cfg.republication_interval: LOG.info( " -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days", ad_file_relative, ad_age.days, - ad_cfg["republication_interval"] + ad_cfg.republication_interval ) return False return True - def __check_ad_changed(self, ad_cfg:dict[str, Any], ad_cfg_orig:dict[str, Any], ad_file_relative:str) -> bool: + def __check_ad_changed(self, ad_cfg:Ad, ad_cfg_orig:dict[str, Any], ad_file_relative:str) -> bool: """ Check if an ad has been changed since last publication. Returns True if the ad has been changed. """ - if not ad_cfg["id"]: + if not ad_cfg.id: # New ads are not considered "changed" return False @@ -321,7 +321,7 @@ class KleinanzeigenBot(WebScrapingMixin): return False - def load_ads(self, *, ignore_inactive:bool = True, check_id:bool = True) -> list[tuple[str, dict[str, Any], dict[str, Any]]]: + def load_ads(self, *, ignore_inactive:bool = True, check_id:bool = True) -> list[tuple[str, Ad, dict[str, Any]]]: LOG.info("Searching for ad config files...") ad_files:dict[str, str] = {} @@ -344,24 +344,17 @@ class KleinanzeigenBot(WebScrapingMixin): LOG.info("Start fetch task for the ad(s) with id(s):") LOG.info(" | ".join([str(id_) for id_ in ids])) - ad_fields = dicts.load_dict_from_module(resources, "ad_fields.yaml") ads = [] for ad_file, ad_file_relative in sorted(ad_files.items()): - ad_cfg_orig = dicts.load_dict(ad_file, "ad") - ad_cfg = copy.deepcopy(ad_cfg_orig) - dicts.apply_defaults(ad_cfg, - self.config.ad_defaults.model_dump(), - ignore = lambda k, _: k == "description", - override = lambda _, v: v == "" # noqa: PLC1901 can be simplified to `not v` as an empty string is falsey - ) - dicts.apply_defaults(ad_cfg, ad_fields) + ad_cfg_orig:dict[str, Any] = dicts.load_dict(ad_file, "ad") + ad_cfg:Ad = self.load_ad(ad_cfg_orig) - if ignore_inactive and not ad_cfg["active"]: + if ignore_inactive and not ad_cfg.active: LOG.info(" -> SKIPPED: inactive ad [%s]", ad_file_relative) continue if use_specific_ads: - if ad_cfg["id"] not in ids: + if ad_cfg.id not in ids: LOG.info(" -> SKIPPED: ad [%s] is not in list of given ids.", ad_file_relative) continue else: @@ -373,9 +366,9 @@ class KleinanzeigenBot(WebScrapingMixin): should_include = True # Check for 'new' selector - if "new" in selectors and (not ad_cfg["id"] or not check_id): + if "new" in selectors and (not ad_cfg.id or not check_id): should_include = True - elif "new" in selectors and ad_cfg["id"] and check_id: + elif "new" in selectors and ad_cfg.id and check_id: LOG.info(" -> SKIPPED: ad [%s] is not new. already has an id assigned.", ad_file_relative) # Check for 'due' selector @@ -391,56 +384,27 @@ class KleinanzeigenBot(WebScrapingMixin): if not should_include: continue - def assert_one_of(path:str, allowed:Iterable[str]) -> None: - # ruff: noqa: B023 function-uses-loop-variable - ensure(dicts.safe_get(ad_cfg, *path.split(".")) in allowed, f"-> property [{path}] must be one of: {allowed} @ [{ad_file}]") - - def assert_min_len(path:str, minlen:int) -> None: - ensure(len(dicts.safe_get(ad_cfg, *path.split("."))) >= minlen, - f"-> property [{path}] must be at least {minlen} characters long @ [{ad_file}]") - - def assert_has_value(path:str) -> None: - ensure(dicts.safe_get(ad_cfg, *path.split(".")), f"-> property [{path}] not specified @ [{ad_file}]") - # pylint: enable=cell-var-from-loop - - assert_one_of("type", {"OFFER", "WANTED"}) - assert_min_len("title", 10) ensure(self.__get_description(ad_cfg, with_affixes = False), f"-> property [description] not specified @ [{ad_file}]") self.__get_description(ad_cfg, with_affixes = True) # validates complete description - assert_one_of("price_type", {"FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"}) - if ad_cfg["price_type"] == "GIVE_AWAY": - ensure(not dicts.safe_get(ad_cfg, "price"), f"-> [price] must not be specified for GIVE_AWAY ad @ [{ad_file}]") - elif ad_cfg["price_type"] == "FIXED": - assert_has_value("price") - assert_one_of("shipping_type", {"PICKUP", "SHIPPING", "NOT_APPLICABLE"}) - assert_has_value("contact.name") - assert_has_value("republication_interval") - - if ad_cfg["id"]: - ad_cfg["id"] = int(ad_cfg["id"]) - - if ad_cfg["category"]: - resolved_category_id = self.categories.get(ad_cfg["category"]) - if not resolved_category_id and ">" in ad_cfg["category"]: + if ad_cfg.category: + resolved_category_id = self.categories.get(ad_cfg.category) + if not resolved_category_id and ">" in ad_cfg.category: # this maps actually to the sonstiges/weiteres sub-category - parent_category = ad_cfg["category"].rpartition(">")[0].strip() + parent_category = ad_cfg.category.rpartition(">")[0].strip() resolved_category_id = self.categories.get(parent_category) if resolved_category_id: LOG.warning( "Category [%s] unknown. Using category [%s] with ID [%s] instead.", - ad_cfg["category"], parent_category, resolved_category_id) + ad_cfg.category, parent_category, resolved_category_id) if resolved_category_id: - ad_cfg["category"] = resolved_category_id + ad_cfg.category = resolved_category_id - if ad_cfg["shipping_costs"]: - ad_cfg["shipping_costs"] = str(round(parse_decimal(ad_cfg["shipping_costs"]), 2)) - - if ad_cfg["images"]: + if ad_cfg.images: images = [] ad_dir = os.path.dirname(ad_file) - for image_pattern in ad_cfg["images"]: + for image_pattern in ad_cfg.images: pattern_images = set() for image_file in glob.glob(image_pattern, root_dir = ad_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB): _, image_file_ext = os.path.splitext(image_file) @@ -450,8 +414,8 @@ class KleinanzeigenBot(WebScrapingMixin): else: pattern_images.add(abspath(image_file, relative_to = ad_file)) images.extend(sorted(pattern_images)) - ensure(images or not ad_cfg["images"], f"No images found for given file patterns {ad_cfg['images']} at {ad_dir}") - ad_cfg["images"] = list(dict.fromkeys(images)) + ensure(images or not ad_cfg.images, f"No images found for given file patterns {ad_cfg.images} at {ad_dir}") + ad_cfg.images = list(dict.fromkeys(images)) ads.append(( ad_file, @@ -462,6 +426,15 @@ class KleinanzeigenBot(WebScrapingMixin): LOG.info("Loaded %s", pluralize("ad", ads)) return ads + def load_ad(self, ad_cfg_orig:dict[str, Any]) -> Ad: + ad_cfg_merged = dicts.apply_defaults( + target = copy.deepcopy(ad_cfg_orig), + defaults = self.config.ad_defaults.model_dump(), + ignore = lambda k, _: k == "description", + override = lambda _, v: v == "" # noqa: PLC1901 can be simplified to `not v` as an empty string is falsey + ) + return Ad.model_validate(ad_cfg_merged) + def load_config(self) -> None: # write default config.yaml if config file does not exist if not os.path.exists(self.config_file_path): @@ -563,7 +536,7 @@ class KleinanzeigenBot(WebScrapingMixin): return False return False - async def delete_ads(self, ad_cfgs:list[tuple[str, dict[str, Any], dict[str, Any]]]) -> None: + async def delete_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None: count = 0 published_ads = json.loads( @@ -571,7 +544,7 @@ class KleinanzeigenBot(WebScrapingMixin): for (ad_file, ad_cfg, _ad_cfg_orig) in ad_cfgs: count += 1 - LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg["title"], ad_file) + LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg.title, ad_file) await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = self.config.publishing.delete_old_ads_by_title) await self.web_sleep() @@ -579,8 +552,8 @@ class KleinanzeigenBot(WebScrapingMixin): LOG.info("DONE: Deleted %s", pluralize("ad", count)) LOG.info("############################################") - async def delete_ad(self, ad_cfg:dict[str, Any], published_ads:list[dict[str, Any]], *, delete_old_ads_by_title:bool) -> bool: - LOG.info("Deleting ad '%s' if already present...", ad_cfg["title"]) + async def delete_ad(self, ad_cfg:Ad, published_ads:list[dict[str, Any]], *, delete_old_ads_by_title:bool) -> bool: + LOG.info("Deleting ad '%s' if already present...", ad_cfg.title) await self.web_open(f"{self.root_url}/m-meine-anzeigen.html") csrf_token_elem = await self.web_find(By.CSS_SELECTOR, "meta[name=_csrf]") @@ -592,35 +565,35 @@ class KleinanzeigenBot(WebScrapingMixin): for published_ad in published_ads: published_ad_id = int(published_ad.get("id", -1)) published_ad_title = published_ad.get("title", "") - if ad_cfg["id"] == published_ad_id or ad_cfg["title"] == published_ad_title: + if ad_cfg.id == published_ad_id or ad_cfg.title == published_ad_title: LOG.info(" -> deleting %s '%s'...", published_ad_id, published_ad_title) await self.web_request( url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={published_ad_id}", method = "POST", headers = {"x-csrf-token": csrf_token} ) - elif ad_cfg["id"]: + elif ad_cfg.id: await self.web_request( - url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={ad_cfg['id']}", + url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={ad_cfg.id}", method = "POST", headers = {"x-csrf-token": csrf_token}, valid_response_codes = [200, 404] ) await self.web_sleep() - ad_cfg["id"] = None + ad_cfg.id = None return True - async def publish_ads(self, ad_cfgs:list[tuple[str, dict[str, Any], dict[str, Any]]]) -> None: + async def publish_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None: count = 0 published_ads = json.loads( (await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"] for (ad_file, ad_cfg, ad_cfg_orig) in ad_cfgs: - LOG.info("Processing %s/%s: '%s' from [%s]...", count + 1, len(ad_cfgs), ad_cfg["title"], ad_file) + LOG.info("Processing %s/%s: '%s' from [%s]...", count + 1, len(ad_cfgs), ad_cfg.title, ad_file) - if [x for x in published_ads if x["id"] == ad_cfg["id"] and x["state"] == "paused"]: + if [x for x in published_ads if x["id"] == ad_cfg.id and x["state"] == "paused"]: LOG.info("Skipping because ad is reserved") continue @@ -636,7 +609,7 @@ class KleinanzeigenBot(WebScrapingMixin): LOG.info("DONE: (Re-)published %s", pluralize("ad", count)) LOG.info("############################################") - async def publish_ad(self, ad_file:str, ad_cfg:dict[str, Any], ad_cfg_orig:dict[str, Any], published_ads:list[dict[str, Any]]) -> None: + async def publish_ad(self, ad_file:str, ad_cfg:Ad, ad_cfg_orig:dict[str, Any], published_ads:list[dict[str, Any]]) -> None: """ @param ad_cfg: the effective ad config (i.e. with default values applied etc.) @param ad_cfg_orig: the ad config as present in the YAML file @@ -647,26 +620,26 @@ class KleinanzeigenBot(WebScrapingMixin): if self.config.publishing.delete_old_ads == "BEFORE_PUBLISH" and not self.keep_old_ads: await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = self.config.publishing.delete_old_ads_by_title) - LOG.info("Publishing ad '%s'...", ad_cfg["title"]) + LOG.info("Publishing ad '%s'...", ad_cfg.title) if loggers.is_debug(LOG): LOG.debug(" -> effective ad meta:") - YAML().dump(ad_cfg, sys.stdout) + YAML().dump(ad_cfg.model_dump(), sys.stdout) await self.web_open(f"{self.root_url}/p-anzeige-aufgeben-schritt2.html") - if ad_cfg["type"] == "WANTED": + if ad_cfg.type == "WANTED": await self.web_click(By.ID, "adType2") ############################# # set title ############################# - await self.web_input(By.ID, "postad-title", ad_cfg["title"]) + await self.web_input(By.ID, "postad-title", ad_cfg.title) ############################# # set category ############################# - await self.__set_category(ad_cfg["category"], ad_file) + await self.__set_category(ad_cfg.category, ad_file) ############################# # set special attributes @@ -676,36 +649,36 @@ class KleinanzeigenBot(WebScrapingMixin): ############################# # set shipping type/options/costs ############################# - if ad_cfg["type"] == "WANTED": + if ad_cfg.type == "WANTED": # special handling for ads of type WANTED since shipping is a special attribute for these - if ad_cfg["shipping_type"] in {"PICKUP", "SHIPPING"}: - shipping_value = "ja" if ad_cfg["shipping_type"] == "SHIPPING" else "nein" + if ad_cfg.shipping_type in {"PICKUP", "SHIPPING"}: + shipping_value = "ja" if ad_cfg.shipping_type == "SHIPPING" else "nein" try: await self.web_select(By.XPATH, "//select[contains(@id, '.versand_s')]", shipping_value) except TimeoutError: - LOG.warning("Failed to set shipping attribute for type '%s'!", ad_cfg["shipping_type"]) + LOG.warning("Failed to set shipping attribute for type '%s'!", ad_cfg.shipping_type) else: await self.__set_shipping(ad_cfg) ############################# # set price ############################# - price_type = ad_cfg["price_type"] + price_type = ad_cfg.price_type if price_type != "NOT_APPLICABLE": try: await self.web_select(By.CSS_SELECTOR, "select#price-type-react, select#micro-frontend-price-type, select#priceType", price_type) except TimeoutError: pass - if dicts.safe_get(ad_cfg, "price"): - await self.web_input(By.CSS_SELECTOR, "input#post-ad-frontend-price, input#micro-frontend-price, input#pstad-price", ad_cfg["price"]) + if ad_cfg.price: + await self.web_input(By.CSS_SELECTOR, "input#post-ad-frontend-price, input#micro-frontend-price, input#pstad-price", str(ad_cfg.price)) ############################# # set sell_directly ############################# - sell_directly = ad_cfg["sell_directly"] + sell_directly = ad_cfg.sell_directly try: - if ad_cfg["shipping_type"] == "SHIPPING": - if sell_directly and ad_cfg["shipping_options"] and price_type in {"FIXED", "NEGOTIABLE"}: + if ad_cfg.shipping_type == "SHIPPING": + if sell_directly and ad_cfg.shipping_options and price_type in {"FIXED", "NEGOTIABLE"}: if not await self.web_check(By.ID, "radio-buy-now-yes", Is.SELECTED): await self.web_click(By.ID, "radio-buy-now-yes") elif not await self.web_check(By.ID, "radio-buy-now-no", Is.SELECTED): @@ -722,16 +695,16 @@ class KleinanzeigenBot(WebScrapingMixin): ############################# # set contact zipcode ############################# - if ad_cfg["contact"]["zipcode"]: - await self.web_input(By.ID, "pstad-zip", ad_cfg["contact"]["zipcode"]) + if ad_cfg.contact.zipcode: + await self.web_input(By.ID, "pstad-zip", ad_cfg.contact.zipcode) # Set city if location is specified - if ad_cfg["contact"].get("location"): + if ad_cfg.contact.location: try: await self.web_sleep(1) # Wait for city dropdown to populate options = await self.web_find_all(By.CSS_SELECTOR, "#pstad-citychsr option") for option in options: option_text = await self.web_text(By.CSS_SELECTOR, "option", parent = option) - if option_text == ad_cfg["contact"]["location"]: + if option_text == ad_cfg.contact.location: await self.web_select(By.ID, "pstad-citychsr", option_text) break except TimeoutError: @@ -740,7 +713,7 @@ class KleinanzeigenBot(WebScrapingMixin): ############################# # set contact street ############################# - if ad_cfg["contact"]["street"]: + if ad_cfg.contact.street: try: if await self.web_check(By.ID, "pstad-street", Is.DISABLED): await self.web_click(By.ID, "addressVisibility") @@ -748,18 +721,18 @@ class KleinanzeigenBot(WebScrapingMixin): except TimeoutError: # ignore pass - await self.web_input(By.ID, "pstad-street", ad_cfg["contact"]["street"]) + await self.web_input(By.ID, "pstad-street", ad_cfg.contact.street) ############################# # set contact name ############################# - if ad_cfg["contact"]["name"] and not await self.web_check(By.ID, "postad-contactname", Is.READONLY): - await self.web_input(By.ID, "postad-contactname", ad_cfg["contact"]["name"]) + if ad_cfg.contact.name and not await self.web_check(By.ID, "postad-contactname", Is.READONLY): + await self.web_input(By.ID, "postad-contactname", ad_cfg.contact.name) ############################# # set contact phone ############################# - if ad_cfg["contact"]["phone"]: + if ad_cfg.contact.phone: if await self.web_check(By.ID, "postad-phonenumber", Is.DISPLAYED): try: if await self.web_check(By.ID, "postad-phonenumber", Is.DISABLED): @@ -768,7 +741,7 @@ class KleinanzeigenBot(WebScrapingMixin): except TimeoutError: # ignore pass - await self.web_input(By.ID, "postad-phonenumber", ad_cfg["contact"]["phone"]) + await self.web_input(By.ID, "postad-phonenumber", ad_cfg.contact.phone) ############################# # upload images @@ -810,7 +783,7 @@ class KleinanzeigenBot(WebScrapingMixin): # check for no image question try: image_hint_xpath = '//*[contains(@class, "ModalDialog--Actions")]//button[contains(., "Ohne Bild veröffentlichen")]' - if not ad_cfg["images"] and await self.web_check(By.XPATH, image_hint_xpath, Is.DISPLAYED): + if not ad_cfg.images and await self.web_check(By.XPATH, image_hint_xpath, Is.DISPLAYED): await self.web_click(By.XPATH, image_hint_xpath) except TimeoutError: pass # nosec @@ -833,8 +806,8 @@ class KleinanzeigenBot(WebScrapingMixin): # Update content hash after successful publication # Calculate hash on original config to ensure consistent comparison on restart ad_cfg_orig["content_hash"] = calculate_content_hash(ad_cfg_orig) - ad_cfg_orig["updated_on"] = misc.now().isoformat() - if not ad_cfg["created_on"] and not ad_cfg["id"]: + ad_cfg_orig["updated_on"] = misc.now().isoformat(timespec = "seconds") + if not ad_cfg.created_on and not ad_cfg.id: ad_cfg_orig["created_on"] = ad_cfg_orig["updated_on"] LOG.info(" -> SUCCESS: ad published with ID %s", ad_id) @@ -893,55 +866,57 @@ class KleinanzeigenBot(WebScrapingMixin): else: ensure(is_category_auto_selected, f"No category specified in [{ad_file}] and automatic category detection failed") - async def __set_special_attributes(self, ad_cfg:dict[str, Any]) -> None: - if ad_cfg["special_attributes"]: - LOG.debug("Found %i special attributes", len(ad_cfg["special_attributes"])) - for special_attribute_key, special_attribute_value in ad_cfg["special_attributes"].items(): + async def __set_special_attributes(self, ad_cfg:Ad) -> None: + if not ad_cfg.special_attributes: + return - if special_attribute_key == "condition_s": - await self.__set_condition(special_attribute_value) - continue + LOG.debug("Found %i special attributes", len(ad_cfg.special_attributes)) + for special_attribute_key, special_attribute_value in ad_cfg.special_attributes.items(): - LOG.debug("Setting special attribute [%s] to [%s]...", special_attribute_key, special_attribute_value) - try: - # if the element exists but is inside an invisible container, make the container visible + select_container_xpath = f"//div[@class='l-row' and descendant::select[@id='{special_attribute_key}']]" + if not await self.web_check(By.XPATH, select_container_xpath, Is.DISPLAYED): + await (await self.web_find(By.XPATH, select_container_xpath)).apply("elem => elem.singleNodeValue.style.display = 'block'") + except TimeoutError: + pass # nosec - try: - elem_id = special_attr_elem.attrs.id - if special_attr_elem.local_name == "select": - LOG.debug("Attribute field '%s' seems to be a select...", special_attribute_key) - await self.web_select(By.ID, elem_id, special_attribute_value) - elif special_attr_elem.attrs.type == "checkbox": - LOG.debug("Attribute field '%s' seems to be a checkbox...", special_attribute_key) - await self.web_click(By.ID, elem_id) - else: - LOG.debug("Attribute field '%s' seems to be a text input...", special_attribute_key) - await self.web_input(By.ID, elem_id, special_attribute_value) - except TimeoutError as ex: - LOG.debug("Attribute field '%s' is not of kind radio button.", special_attribute_key) - raise TimeoutError(f"Failed to set special attribute [{special_attribute_key}]") from ex - LOG.debug("Successfully set attribute field [%s] to [%s]...", special_attribute_key, special_attribute_value) + try: + # finding element by name cause id are composed sometimes eg. autos.marke_s+autos.model_s for Modell by cars + special_attr_elem = await self.web_find(By.XPATH, f"//*[contains(@name, '{special_attribute_key}')]") + except TimeoutError as ex: + LOG.debug("Attribute field '%s' could not be found.", special_attribute_key) + raise TimeoutError(f"Failed to set special attribute [{special_attribute_key}] (not found)") from ex - async def __set_shipping(self, ad_cfg:dict[str, Any]) -> None: - if ad_cfg["shipping_type"] == "PICKUP": + try: + elem_id = special_attr_elem.attrs.id + if special_attr_elem.local_name == "select": + LOG.debug("Attribute field '%s' seems to be a select...", special_attribute_key) + await self.web_select(By.ID, elem_id, special_attribute_value) + elif special_attr_elem.attrs.type == "checkbox": + LOG.debug("Attribute field '%s' seems to be a checkbox...", special_attribute_key) + await self.web_click(By.ID, elem_id) + else: + LOG.debug("Attribute field '%s' seems to be a text input...", special_attribute_key) + await self.web_input(By.ID, elem_id, special_attribute_value) + except TimeoutError as ex: + LOG.debug("Attribute field '%s' is not of kind radio button.", special_attribute_key) + raise TimeoutError(f"Failed to set special attribute [{special_attribute_key}]") from ex + LOG.debug("Successfully set attribute field [%s] to [%s]...", special_attribute_key, special_attribute_value) + + async def __set_shipping(self, ad_cfg:Ad) -> None: + if ad_cfg.shipping_type == "PICKUP": try: await self.web_click(By.XPATH, '//*[contains(@class, "ShippingPickupSelector")]//label[contains(., "Nur Abholung")]/../input[@type="radio"]') except TimeoutError as ex: LOG.debug(ex, exc_info = True) - elif ad_cfg["shipping_options"]: + elif ad_cfg.shipping_options: await self.web_click(By.XPATH, '//*[contains(@class, "SubSection")]//button[contains(@class, "SelectionButton")]') await self.web_click(By.XPATH, '//*[contains(@class, "CarrierSelectionModal")]//button[contains(., "Andere Versandmethoden")]') await self.__set_shipping_options(ad_cfg) @@ -949,25 +924,28 @@ class KleinanzeigenBot(WebScrapingMixin): special_shipping_selector = '//select[contains(@id, ".versand_s")]' if await self.web_check(By.XPATH, special_shipping_selector, Is.DISPLAYED): # try to set special attribute selector (then we have a commercial account) - shipping_value = "ja" if ad_cfg["shipping_type"] == "SHIPPING" else "nein" + shipping_value = "ja" if ad_cfg.shipping_type == "SHIPPING" else "nein" await self.web_select(By.XPATH, special_shipping_selector, shipping_value) else: try: # no options. only costs. Set custom shipping cost - if ad_cfg["shipping_costs"] is not None: + if ad_cfg.shipping_costs is not None: await self.web_click(By.XPATH, '//*[contains(@class, "SubSection")]//button[contains(@class, "SelectionButton")]') await self.web_click(By.XPATH, '//*[contains(@class, "CarrierSelectionModal")]//button[contains(., "Andere Versandmethoden")]') await self.web_click(By.XPATH, '//*[contains(@id, "INDIVIDUAL") and contains(@data-testid, "Individueller Versand")]') - if ad_cfg["shipping_costs"]: + if ad_cfg.shipping_costs: await self.web_input(By.CSS_SELECTOR, '.IndividualShippingInput input[type="text"]', - str.replace(ad_cfg["shipping_costs"], ".", ",")) + str.replace(str(ad_cfg.shipping_costs), ".", ",")) await self.web_click(By.XPATH, '//dialog//button[contains(., "Fertig")]') except TimeoutError as ex: LOG.debug(ex, exc_info = True) raise TimeoutError(_("Unable to close shipping dialog!")) from ex - async def __set_shipping_options(self, ad_cfg:dict[str, Any]) -> None: + async def __set_shipping_options(self, ad_cfg:Ad) -> None: + if not ad_cfg.shipping_options: + return + shipping_options_mapping = { "DHL_2": ("Klein", "Paket 2 kg"), "Hermes_Päckchen": ("Klein", "Päckchen"), @@ -980,12 +958,9 @@ class KleinanzeigenBot(WebScrapingMixin): "Hermes_L": ("Groß", "L-Paket"), } try: - mapped_shipping_options = [ - shipping_options_mapping[option] - for option in set(ad_cfg["shipping_options"]) - ] + mapped_shipping_options = [shipping_options_mapping[option] for option in set(ad_cfg.shipping_options)] except KeyError as ex: - raise KeyError(f"Unknown shipping option(s), please refer to the documentation/README: {ad_cfg['shipping_options']}") from ex + raise KeyError(f"Unknown shipping option(s), please refer to the documentation/README: {ad_cfg.shipping_options}") from ex shipping_sizes, shipping_packages = zip(*mapped_shipping_options, strict = False) @@ -1029,11 +1004,14 @@ class KleinanzeigenBot(WebScrapingMixin): except TimeoutError as ex: raise TimeoutError(_("Unable to close shipping dialog!")) from ex - async def __upload_images(self, ad_cfg:dict[str, Any]) -> None: - LOG.info(" -> found %s", pluralize("image", ad_cfg["images"])) + async def __upload_images(self, ad_cfg:Ad) -> None: + if not ad_cfg.images: + return + + LOG.info(" -> found %s", pluralize("image", ad_cfg.images)) image_upload:Element = await self.web_find(By.CSS_SELECTOR, "input[type=file]") - for image in ad_cfg["images"]: + for image in ad_cfg.images: LOG.info(" -> uploading image [%s]", image) await image_upload.send_file(image) await self.web_sleep() @@ -1108,14 +1086,13 @@ class KleinanzeigenBot(WebScrapingMixin): else: LOG.error("The page with the id %d does not exist!", ad_id) - def __get_description(self, ad_cfg:dict[str, Any], *, with_affixes:bool) -> str: + def __get_description(self, ad_cfg:Ad, *, with_affixes:bool) -> str: """Get the ad description optionally with prefix and suffix applied. Precedence (highest to lowest): 1. Direct ad-level affixes (description_prefix/suffix) - 2. Legacy nested ad-level affixes (description.prefix/suffix) - 3. Global flattened affixes (ad_defaults.description_prefix/suffix) - 4. Legacy global nested affixes (ad_defaults.description.prefix/suffix) + 2. Global flattened affixes (ad_defaults.description_prefix/suffix) + 3. Legacy global nested affixes (ad_defaults.description.prefix/suffix) Args: ad_cfg: The ad configuration dictionary @@ -1125,20 +1102,15 @@ class KleinanzeigenBot(WebScrapingMixin): """ # Get the main description text description_text = "" - if isinstance(ad_cfg.get("description"), dict): - description_text = ad_cfg["description"].get("text", "") - elif isinstance(ad_cfg.get("description"), str): - description_text = ad_cfg["description"] + if ad_cfg.description: + description_text = ad_cfg.description if with_affixes: # Get prefix with precedence prefix = ( # 1. Direct ad-level prefix - ad_cfg.get("description_prefix") if ad_cfg.get("description_prefix") is not None - # 2. Legacy nested ad-level prefix - else dicts.safe_get(ad_cfg, "description", "prefix") - if dicts.safe_get(ad_cfg, "description", "prefix") is not None - # 3. Global prefix from config + ad_cfg.description_prefix if ad_cfg.description_prefix is not None + # 2. Global prefix from config else get_description_affixes(self.config, prefix = True) or "" # Default to empty string if all sources are None ) @@ -1146,11 +1118,8 @@ class KleinanzeigenBot(WebScrapingMixin): # Get suffix with precedence suffix = ( # 1. Direct ad-level suffix - ad_cfg.get("description_suffix") if ad_cfg.get("description_suffix") is not None - # 2. Legacy nested ad-level suffix - else dicts.safe_get(ad_cfg, "description", "suffix") - if dicts.safe_get(ad_cfg, "description", "suffix") is not None - # 3. Global suffix from config + ad_cfg.description_suffix if ad_cfg.description_suffix is not None + # 2. Global suffix from config else get_description_affixes(self.config, prefix = False) or "" # Default to empty string if all sources are None ) diff --git a/src/kleinanzeigen_bot/ads.py b/src/kleinanzeigen_bot/ads.py index 8fc680f..a1ff891 100644 --- a/src/kleinanzeigen_bot/ads.py +++ b/src/kleinanzeigen_bot/ads.py @@ -2,11 +2,10 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ import hashlib, json, os # isort: skip -from typing import Any, Final +from typing import Any from .model.config_model import Config - -MAX_DESCRIPTION_LENGTH:Final[int] = 4000 +from .utils.misc import get_attr def calculate_content_hash(ad_cfg:dict[str, Any]) -> str: @@ -14,24 +13,24 @@ def calculate_content_hash(ad_cfg:dict[str, Any]) -> str: # Relevant fields for the hash content = { - "active": bool(ad_cfg.get("active", True)), # Explicitly convert to bool - "type": str(ad_cfg.get("type", "")), # Explicitly convert to string - "title": str(ad_cfg.get("title", "")), - "description": str(ad_cfg.get("description", "")), - "category": str(ad_cfg.get("category", "")), - "price": str(ad_cfg.get("price", "")), # Price always as string - "price_type": str(ad_cfg.get("price_type", "")), - "special_attributes": dict(ad_cfg.get("special_attributes") or {}), # Handle None case - "shipping_type": str(ad_cfg.get("shipping_type", "")), - "shipping_costs": str(ad_cfg.get("shipping_costs", "")), - "shipping_options": sorted([str(x) for x in (ad_cfg.get("shipping_options") or [])]), # Handle None case - "sell_directly": bool(ad_cfg.get("sell_directly", False)), # Explicitly convert to bool - "images": sorted([os.path.basename(str(img)) if img is not None else "" for img in (ad_cfg.get("images") or [])]), # Handle None values in images + "active": bool(get_attr(ad_cfg, "active", default = True)), # Explicitly convert to bool + "type": str(get_attr(ad_cfg, "type", "")), # Explicitly convert to string + "title": str(get_attr(ad_cfg, "title", "")), + "description": str(get_attr(ad_cfg, "description", "")), + "category": str(get_attr(ad_cfg, "category", "")), + "price": str(get_attr(ad_cfg, "price", "")), # Price always as string + "price_type": str(get_attr(ad_cfg, "price_type", "")), + "special_attributes": dict(get_attr(ad_cfg, "special_attributes", {})), # Handle None case + "shipping_type": str(get_attr(ad_cfg, "shipping_type", "")), + "shipping_costs": str(get_attr(ad_cfg, "shipping_costs", "")), + "shipping_options": sorted([str(x) for x in get_attr(ad_cfg, "shipping_options", [])]), # Handle None case + "sell_directly": bool(get_attr(ad_cfg, "sell_directly", default = False)), # Explicitly convert to bool + "images": sorted([os.path.basename(str(img)) if img is not None else "" for img in get_attr(ad_cfg, "images", [])]), # Handle None values in images "contact": { - "name": str(ad_cfg.get("contact", {}).get("name", "")), - "street": str(ad_cfg.get("contact", {}).get("street", "")), # Changed from "None" to empty string for consistency - "zipcode": str(ad_cfg.get("contact", {}).get("zipcode", "")), - "phone": str(ad_cfg.get("contact", {}).get("phone", "")) + "name": str(get_attr(ad_cfg, "contact.name", "")), + "street": str(get_attr(ad_cfg, "contact.street", "")), # Changed from "None" to empty string for consistency + "zipcode": str(get_attr(ad_cfg, "contact.zipcode", "")), + "phone": str(get_attr(ad_cfg, "contact.phone", "")) } } diff --git a/src/kleinanzeigen_bot/extract.py b/src/kleinanzeigen_bot/extract.py index 4ea8cdb..b14baf9 100644 --- a/src/kleinanzeigen_bot/extract.py +++ b/src/kleinanzeigen_bot/extract.py @@ -6,7 +6,10 @@ import urllib.request as urllib_request from datetime import datetime from typing import Any, Final +from kleinanzeigen_bot.model.ad_model import ContactPartial + from .ads import calculate_content_hash, get_description_affixes +from .model.ad_model import AdPartial from .model.config_model import Config from .utils import dicts, i18n, loggers, misc, reflect from .utils.web_scraping_mixin import Browser, By, Element, WebScrapingMixin @@ -51,9 +54,12 @@ class AdExtractor(WebScrapingMixin): LOG.info("New directory for ad created at %s.", new_base_dir) # call extraction function - info = await self._extract_ad_page_info(new_base_dir, ad_id) + ad_cfg:AdPartial = await self._extract_ad_page_info(new_base_dir, ad_id) ad_file_path = new_base_dir + "/" + f"ad_{ad_id}.yaml" - dicts.save_dict(ad_file_path, info) + dicts.save_dict( + ad_file_path, + ad_cfg.model_dump(), + header = "# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/ad.schema.json") async def _download_images_from_ad_page(self, directory:str, ad_id:int) -> list[str]: """ @@ -240,7 +246,7 @@ class AdExtractor(WebScrapingMixin): return refs - async def navigate_to_ad_page(self, id_or_url: int | str) -> bool: + async def navigate_to_ad_page(self, id_or_url:int | str) -> bool: """ Navigates to an ad page specified with an ad ID; or alternatively by a given URL. :return: whether the navigation to the ad page was successful @@ -267,13 +273,12 @@ class AdExtractor(WebScrapingMixin): pass return True - async def _extract_ad_page_info(self, directory:str, ad_id:int) -> dict[str, Any]: + async def _extract_ad_page_info(self, directory:str, ad_id:int) -> AdPartial: """ Extracts all necessary information from an ad´s page. :param directory: the path of the ad´s previously created directory :param ad_id: the ad ID, already extracted by a calling function - :return: a dictionary with the keys as given in an ad YAML, and their respective values """ info:dict[str, Any] = {"active": True} @@ -332,7 +337,7 @@ class AdExtractor(WebScrapingMixin): # Calculate the initial hash for the downloaded ad info["content_hash"] = calculate_content_hash(info) - return info + return AdPartial.model_validate(info) async def _extract_category_from_ad_page(self) -> str: """ @@ -479,7 +484,7 @@ class AdExtractor(WebScrapingMixin): except TimeoutError: return None - async def _extract_contact_from_ad_page(self) -> dict[str, (str | None)]: + async def _extract_contact_from_ad_page(self) -> ContactPartial: """ Processes the address part involving street (optional), zip code + city, and phone number (optional). @@ -516,4 +521,4 @@ class AdExtractor(WebScrapingMixin): contact["phone"] = None # phone seems to be a deprecated feature (for non-professional users) # also see 'https://themen.kleinanzeigen.de/hilfe/deine-anzeigen/Telefon/ - return contact + return ContactPartial.model_validate(contact) diff --git a/src/kleinanzeigen_bot/model/ad_model.py b/src/kleinanzeigen_bot/model/ad_model.py new file mode 100644 index 0000000..80c1a51 --- /dev/null +++ b/src/kleinanzeigen_bot/model/ad_model.py @@ -0,0 +1,115 @@ +# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ +from __future__ import annotations + +from datetime import datetime # noqa: TC003 Move import into a type-checking block +from typing import Any, Dict, Final, List, Literal + +from pydantic import Field, model_validator, validator + +from kleinanzeigen_bot.utils.misc import parse_datetime, parse_decimal +from kleinanzeigen_bot.utils.pydantics import ContextualModel + +MAX_DESCRIPTION_LENGTH:Final[int] = 4000 + + +def _iso_datetime_field() -> Any: + return Field( + default = None, + description = "ISO-8601 timestamp with optional timezone (e.g. 2024-12-25T00:00:00 or 2024-12-25T00:00:00Z)", + json_schema_extra = { + "anyOf": [ + {"type": "null"}, + { + "type": "string", + "pattern": ( + r"^\d{4}-\d{2}-\d{2}T" # date + 'T' + r"\d{2}:\d{2}:\d{2}" # hh:mm:ss + r"(?:\.\d{1,6})?" # optional .micro + r"(?:Z|[+-]\d{2}:\d{2})?$" # optional Z or ±HH:MM + ), + }, + ], + }, + ) + + +class ContactPartial(ContextualModel): + name:str | None = None + street:str | None = None + zipcode:int | str | None = None + location:str | None = None + + phone:str | None = None + + +class AdPartial(ContextualModel): + active:bool = True + type:Literal["OFFER", "WANTED"] = "OFFER" + title:str = Field(..., min_length = 10) + description:str + description_prefix:str | None = None + description_suffix:str | None = None + category:str + special_attributes:Dict[str, str] | None = Field(default = None) + price:int | None = None + price_type:Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"] = "NEGOTIABLE" + shipping_type:Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"] = "SHIPPING" + shipping_costs:float | None = None + shipping_options:List[str] | None = Field(default = None) + sell_directly:bool | None = False + images:List[str] | None = Field(default = None) + contact:ContactPartial | None = None + republication_interval:int = 7 + + id:int | None = None + created_on:datetime | None = _iso_datetime_field() + updated_on:datetime | None = _iso_datetime_field() + content_hash:str | None = None + + @validator("created_on", "updated_on", pre = True) + @classmethod + def _parse_dates(cls, v:Any) -> Any: + return parse_datetime(v) + + @validator("shipping_costs", pre = True) + @classmethod + def _parse_shipping_costs(cls, v:float | int | str) -> Any: + if v: + return round(parse_decimal(v), 2) + return None + + @validator("description") + @classmethod + def _validate_description_length(cls, v:str) -> str: + if len(v) > MAX_DESCRIPTION_LENGTH: + raise ValueError(f"description length exceeds {MAX_DESCRIPTION_LENGTH} characters") + return v + + @model_validator(mode = "before") + @classmethod + def _validate_price_and_price_type(cls, values:Dict[str, Any]) -> Dict[str, Any]: + price_type = values.get("price_type") + price = values.get("price") + if price_type == "GIVE_AWAY" and price is not None: + raise ValueError("price must not be specified when price_type is GIVE_AWAY") + if price_type == "FIXED" and price is None: + raise ValueError("price is required when price_type is FIXED") + return values + + @validator("shipping_options", each_item = True) + @classmethod + def _validate_shipping_option(cls, v:str) -> str: + if not v.strip(): + raise ValueError("shipping_options entries must be non-empty") + return v + + +class Contact(ContactPartial): + name:str # pyright: ignore[reportGeneralTypeIssues, reportIncompatibleVariableOverride] + zipcode:int | str # pyright: ignore[reportGeneralTypeIssues, reportIncompatibleVariableOverride] + + +class Ad(AdPartial): + contact:Contact # pyright: ignore[reportGeneralTypeIssues, reportIncompatibleVariableOverride] diff --git a/src/kleinanzeigen_bot/resources/ad_fields.yaml b/src/kleinanzeigen_bot/resources/ad_fields.yaml deleted file mode 100644 index 7f0af44..0000000 --- a/src/kleinanzeigen_bot/resources/ad_fields.yaml +++ /dev/null @@ -1,24 +0,0 @@ -active: # one of: true, false -type: # one of: OFFER, WANTED -title: -description: -category: -special_attributes: {} -price: -price_type: # one of: FIXED, NEGOTIABLE, GIVE_AWAY, NOT_APPLICABLE -shipping_type: # one of: PICKUP, SHIPPING, NOT_APPLICABLE -shipping_costs: -shipping_options: [] # see README.md for more information -sell_directly: # requires shipping_options to take effect -images: [] - -contact: - name: - street: - zipcode: - phone: - -republication_interval: -id: -created_on: -updated_on: diff --git a/src/kleinanzeigen_bot/utils/dicts.py b/src/kleinanzeigen_bot/utils/dicts.py index f5bce64..3ee97b3 100644 --- a/src/kleinanzeigen_bot/utils/dicts.py +++ b/src/kleinanzeigen_bot/utils/dicts.py @@ -8,15 +8,18 @@ from gettext import gettext as _ from importlib.resources import read_text as get_resource_as_string from pathlib import Path from types import ModuleType -from typing import Any, Final +from typing import Any, Final, TypeVar from ruamel.yaml import YAML from . import files, loggers # pylint: disable=cyclic-import -from .misc import K, V LOG:Final[loggers.Logger] = loggers.get_logger(__name__) +# https://mypy.readthedocs.io/en/stable/generics.html#generic-functions +K = TypeVar("K") +V = TypeVar("V") + def apply_defaults( target:dict[Any, Any], diff --git a/src/kleinanzeigen_bot/utils/misc.py b/src/kleinanzeigen_bot/utils/misc.py index 2c92b4b..d5d41a2 100644 --- a/src/kleinanzeigen_bot/utils/misc.py +++ b/src/kleinanzeigen_bot/utils/misc.py @@ -5,14 +5,12 @@ import asyncio, decimal, re, sys, time # isort: skip from collections.abc import Callable from datetime import datetime, timedelta, timezone from gettext import gettext as _ -from typing import Any, TypeVar +from typing import Any, Mapping, TypeVar from . import i18n # https://mypy.readthedocs.io/en/stable/generics.html#generic-functions T = TypeVar("T") -K = TypeVar("K") -V = TypeVar("V") def ensure( @@ -44,6 +42,63 @@ def ensure( time.sleep(poll_requency) +def get_attr(obj:Mapping[str, Any] | Any, key:str, default:Any | None = None) -> Any: + """ + Unified getter for attribute or key access on objects or dicts. + Supports dot-separated paths for nested access. + + Args: + obj: The object or dictionary to get the value from. + key: The attribute or key name, possibly nested via dot notation (e.g. 'contact.email'). + default: A default value to return if the key/attribute path is not found. + + Returns: + The found value or the default. + + Examples: + >>> class User: + ... def __init__(self, contact): self.contact = contact + + # [object] normal nested access: + >>> get_attr(User({'email': 'user@example.com'}), 'contact.email') + 'user@example.com' + + # [object] missing key at depth: + >>> get_attr(User({'email': 'user@example.com'}), 'contact.foo') is None + True + + # [object] explicit None treated as missing: + >>> get_attr(User({'email': None}), 'contact.email', default='n/a') + 'n/a' + + # [object] parent in path is None: + >>> get_attr(User(None), 'contact.email', default='n/a') + 'n/a' + + # [dict] normal nested access: + >>> get_attr({'contact': {'email': 'data@example.com'}}, 'contact.email') + 'data@example.com' + + # [dict] missing key at depth: + >>> get_attr({'contact': {'email': 'user@example.com'}}, 'contact.foo') is None + True + + # [dict] explicit None treated as missing: + >>> get_attr({'contact': {'email': None}}, 'contact.email', default='n/a') + 'n/a' + + # [dict] parent in path is None: + >>> get_attr({}, 'contact.email', default='none') + 'none' + """ + for part in key.split("."): + obj = obj.get(part) if isinstance(obj, Mapping) else getattr(obj, part, None) + if obj is None: + return default + + return obj + + def now() -> datetime: return datetime.now(timezone.utc) diff --git a/tests/conftest.py b/tests/conftest.py index 4503a2d..b28411f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -40,7 +40,8 @@ def test_bot_config() -> Config: return Config.model_validate({ "ad_defaults": { "contact": { - "name": "dummy_name" + "name": "dummy_name", + "zipcode": "12345" }, }, "login": { diff --git a/tests/unit/test_extract.py b/tests/unit/test_extract.py index 270f626..fee3976 100644 --- a/tests/unit/test_extract.py +++ b/tests/unit/test_extract.py @@ -8,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, call, patch import pytest from kleinanzeigen_bot.extract import AdExtractor +from kleinanzeigen_bot.model.ad_model import AdPartial, ContactPartial from kleinanzeigen_bot.model.config_model import Config, DownloadConfig from kleinanzeigen_bot.utils.web_scraping_mixin import Browser, By, Element @@ -441,7 +442,7 @@ class TestAdExtractorContent: _extract_contact_from_ad_page = AsyncMock(return_value = {}) ): info = await test_extractor._extract_ad_page_info("/some/dir", 12345) - assert info["description"] == raw_description + assert info.description == raw_description @pytest.mark.asyncio async def test_extract_description_with_affixes_timeout( @@ -466,11 +467,11 @@ class TestAdExtractorContent: _extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)), _extract_sell_directly_from_ad_page = AsyncMock(return_value = False), _download_images_from_ad_page = AsyncMock(return_value = []), - _extract_contact_from_ad_page = AsyncMock(return_value = {}) + _extract_contact_from_ad_page = AsyncMock(return_value = ContactPartial()) ): try: info = await test_extractor._extract_ad_page_info("/some/dir", 12345) - assert not info["description"] + assert not info.description except TimeoutError: # This is also acceptable - depends on how we want to handle timeouts pass @@ -499,10 +500,10 @@ class TestAdExtractorContent: _extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)), _extract_sell_directly_from_ad_page = AsyncMock(return_value = False), _download_images_from_ad_page = AsyncMock(return_value = []), - _extract_contact_from_ad_page = AsyncMock(return_value = {}) + _extract_contact_from_ad_page = AsyncMock(return_value = ContactPartial()) ): info = await test_extractor._extract_ad_page_info("/some/dir", 12345) - assert info["description"] == raw_description + assert info.description == raw_description @pytest.mark.asyncio async def test_extract_sell_directly(self, test_extractor:AdExtractor) -> None: @@ -615,12 +616,11 @@ class TestAdExtractorContact: ] contact_info = await extractor._extract_contact_from_ad_page() - assert isinstance(contact_info, dict) - assert contact_info["street"] == "Example Street 123" - assert contact_info["zipcode"] == "12345" - assert contact_info["location"] == "Berlin - Mitte" - assert contact_info["name"] == "Test User" - assert contact_info["phone"] is None + assert contact_info.street == "Example Street 123" + assert contact_info.zipcode == "12345" + assert contact_info.location == "Berlin - Mitte" + assert contact_info.name == "Test User" + assert contact_info.phone is None @pytest.mark.asyncio # pylint: disable=protected-access @@ -656,8 +656,7 @@ class TestAdExtractorContact: ] contact_info = await extractor._extract_contact_from_ad_page() - assert isinstance(contact_info, dict) - assert contact_info["phone"] == "01234567890" # Normalized phone number + assert contact_info.phone == "01234567890" # Normalized phone number class TestAdExtractorDownload: @@ -696,9 +695,10 @@ class TestAdExtractorDownload: mock_exists.side_effect = lambda path: path in existing_paths mock_isdir.side_effect = lambda path: path == base_dir - mock_extract.return_value = { + mock_extract.return_value = AdPartial.model_validate({ "title": "Test Advertisement Title", "description": "Test Description", + "category": "Dienstleistungen", "price": 100, "images": [], "contact": { @@ -707,7 +707,7 @@ class TestAdExtractorDownload: "zipcode": "12345", "location": "Test City" } - } + }) await extractor.download_ad(12345) @@ -723,7 +723,7 @@ class TestAdExtractorDownload: assert actual_call is not None actual_path = actual_call[0][0].replace("/", os.path.sep) assert actual_path == yaml_path - assert actual_call[0][1] == mock_extract.return_value + assert actual_call[0][1] == mock_extract.return_value.model_dump() @pytest.mark.asyncio # pylint: disable=protected-access @@ -752,9 +752,10 @@ class TestAdExtractorDownload: mock_exists.return_value = False mock_isdir.return_value = False - mock_extract.return_value = { + mock_extract.return_value = AdPartial.model_validate({ "title": "Test Advertisement Title", "description": "Test Description", + "category": "Dienstleistungen", "price": 100, "images": [], "contact": { @@ -763,7 +764,7 @@ class TestAdExtractorDownload: "zipcode": "12345", "location": "Test City" } - } + }) await extractor.download_ad(12345) @@ -781,4 +782,4 @@ class TestAdExtractorDownload: assert actual_call is not None actual_path = actual_call[0][0].replace("/", os.path.sep) assert actual_path == yaml_path - assert actual_call[0][1] == mock_extract.return_value + assert actual_call[0][1] == mock_extract.return_value.model_dump() diff --git a/tests/unit/test_init.py b/tests/unit/test_init.py index 33cbc05..8793f8a 100644 --- a/tests/unit/test_init.py +++ b/tests/unit/test_init.py @@ -11,13 +11,13 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest from pydantic import ValidationError -from ruamel.yaml import YAML from kleinanzeigen_bot import LOG, KleinanzeigenBot, misc from kleinanzeigen_bot._version import __version__ from kleinanzeigen_bot.ads import calculate_content_hash +from kleinanzeigen_bot.model.ad_model import Ad from kleinanzeigen_bot.model.config_model import AdDefaults, Config, PublishingConfig -from kleinanzeigen_bot.utils import loggers +from kleinanzeigen_bot.utils import dicts, loggers @pytest.fixture @@ -68,32 +68,6 @@ def base_ad_config() -> dict[str, Any]: } -def create_ad_config(base_config:dict[str, Any], **overrides:Any) -> dict[str, Any]: - """Create a new ad configuration by extending or overriding the base configuration. - - Args: - base_config: The base configuration to start from - **overrides: Key-value pairs to override or extend the base configuration - - Returns: - A new ad configuration dictionary - """ - config = copy.deepcopy(base_config) - for key, value in overrides.items(): - if isinstance(value, dict) and key in config and isinstance(config[key], dict): - config[key].update(value) - elif key in config: - config[key] = value - else: - config[key] = value - - # Only check length if description is a string - if isinstance(config.get("description"), str): - assert len(config["description"]) <= 4000, "Length of ad description including prefix and suffix exceeds 4000 chars" - - return config - - def remove_fields(config:dict[str, Any], *fields:str) -> dict[str, Any]: """Create a new ad configuration with specified fields removed. @@ -669,21 +643,17 @@ categories: ad_file = ad_dir / "test_ad.yaml" # Create a minimal config with empty title to trigger validation - ad_cfg = create_ad_config( - minimal_ad_config, - title = "" # Empty title to trigger length validation - ) - - yaml = YAML() - with open(ad_file, "w", encoding = "utf-8") as f: - yaml.dump(ad_cfg, f) + ad_cfg = minimal_ad_config | { + "title": "" + } + dicts.save_dict(ad_file, ad_cfg) # Set config file path to tmp_path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml") test_bot.config.ad_files = ["ads/*.yaml"] - with pytest.raises(AssertionError) as exc_info: + with pytest.raises(ValidationError) as exc_info: test_bot.load_ads() - assert "must be at least 10 characters long" in str(exc_info.value) + assert "title" in str(exc_info.value) def test_load_ads_with_invalid_price_type(self, test_bot:KleinanzeigenBot, tmp_path:Any, minimal_ad_config:dict[str, Any]) -> None: """Test loading ads with invalid price type.""" @@ -693,21 +663,17 @@ categories: ad_file = ad_dir / "test_ad.yaml" # Create config with invalid price type - ad_cfg = create_ad_config( - minimal_ad_config, - price_type = "INVALID_TYPE" # Invalid price type - ) - - yaml = YAML() - with open(ad_file, "w", encoding = "utf-8") as f: - yaml.dump(ad_cfg, f) + ad_cfg = minimal_ad_config | { + "price_type": "INVALID_TYPE" + } + dicts.save_dict(ad_file, ad_cfg) # Set config file path to tmp_path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml") test_bot.config.ad_files = ["ads/*.yaml"] - with pytest.raises(AssertionError) as exc_info: + with pytest.raises(ValidationError) as exc_info: test_bot.load_ads() - assert "property [price_type] must be one of:" in str(exc_info.value) + assert "price_type" in str(exc_info.value) def test_load_ads_with_invalid_shipping_type(self, test_bot:KleinanzeigenBot, tmp_path:Any, minimal_ad_config:dict[str, Any]) -> None: """Test loading ads with invalid shipping type.""" @@ -717,21 +683,17 @@ categories: ad_file = ad_dir / "test_ad.yaml" # Create config with invalid shipping type - ad_cfg = create_ad_config( - minimal_ad_config, - shipping_type = "INVALID_TYPE" # Invalid shipping type - ) - - yaml = YAML() - with open(ad_file, "w", encoding = "utf-8") as f: - yaml.dump(ad_cfg, f) + ad_cfg = minimal_ad_config | { + "shipping_type": "INVALID_TYPE" + } + dicts.save_dict(ad_file, ad_cfg) # Set config file path to tmp_path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml") test_bot.config.ad_files = ["ads/*.yaml"] - with pytest.raises(AssertionError) as exc_info: + with pytest.raises(ValidationError) as exc_info: test_bot.load_ads() - assert "property [shipping_type] must be one of:" in str(exc_info.value) + assert "shipping_type" in str(exc_info.value) def test_load_ads_with_invalid_price_config(self, test_bot:KleinanzeigenBot, tmp_path:Any, minimal_ad_config:dict[str, Any]) -> None: """Test loading ads with invalid price configuration.""" @@ -741,22 +703,18 @@ categories: ad_file = ad_dir / "test_ad.yaml" # Create config with price for GIVE_AWAY type - ad_cfg = create_ad_config( - minimal_ad_config, - price_type = "GIVE_AWAY", - price = 100 # Price should not be set for GIVE_AWAY - ) - - yaml = YAML() - with open(ad_file, "w", encoding = "utf-8") as f: - yaml.dump(ad_cfg, f) + ad_cfg = minimal_ad_config | { + "price_type": "GIVE_AWAY", + "price": 100 # Price should not be set for GIVE_AWAY + } + dicts.save_dict(ad_file, ad_cfg) # Set config file path to tmp_path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml") test_bot.config.ad_files = ["ads/*.yaml"] - with pytest.raises(AssertionError) as exc_info: + with pytest.raises(ValidationError) as exc_info: test_bot.load_ads() - assert "must not be specified for GIVE_AWAY ad" in str(exc_info.value) + assert "price" in str(exc_info.value) def test_load_ads_with_missing_price(self, test_bot:KleinanzeigenBot, tmp_path:Any, minimal_ad_config:dict[str, Any]) -> None: """Test loading ads with missing price for FIXED price type.""" @@ -766,50 +724,18 @@ categories: ad_file = ad_dir / "test_ad.yaml" # Create config with FIXED price type but no price - ad_cfg = create_ad_config( - minimal_ad_config, - price_type = "FIXED", - price = None # Missing required price for FIXED type - ) - - yaml = YAML() - with open(ad_file, "w", encoding = "utf-8") as f: - yaml.dump(ad_cfg, f) + ad_cfg = minimal_ad_config | { + "price_type": "FIXED", + "price": None # Missing required price for FIXED type + } + dicts.save_dict(ad_file, ad_cfg) # Set config file path to tmp_path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml") test_bot.config.ad_files = ["ads/*.yaml"] - with pytest.raises(AssertionError) as exc_info: + with pytest.raises(ValidationError) as exc_info: test_bot.load_ads() - assert "not specified" in str(exc_info.value) - - def test_load_ads_with_invalid_category(self, test_bot:KleinanzeigenBot, tmp_path:Any, minimal_ad_config:dict[str, Any]) -> None: - """Test loading ads with invalid category.""" - temp_path = Path(tmp_path) - ad_dir = temp_path / "ads" - ad_dir.mkdir() - ad_file = ad_dir / "test_ad.yaml" - - # Create config with invalid category and empty description to prevent auto-detection - ad_cfg = create_ad_config( - minimal_ad_config, - category = "999999", # Non-existent category - description = None # Set description to None to trigger validation - ) - - # Mock the config to prevent auto-detection - test_bot.config.ad_defaults = AdDefaults() - - yaml = YAML() - with open(ad_file, "w", encoding = "utf-8") as f: - yaml.dump(ad_cfg, f) - - # Set config file path to tmp_path and use relative path for ad_files - test_bot.config_file_path = str(temp_path / "config.yaml") - test_bot.config.ad_files = ["ads/*.yaml"] - with pytest.raises(AssertionError) as exc_info: - test_bot.load_ads() - assert "property [description] not specified" in str(exc_info.value) + assert "price is required when price_type is FIXED" in str(exc_info.value) class TestKleinanzeigenBotAdDeletion: @@ -823,11 +749,10 @@ class TestKleinanzeigenBotAdDeletion: test_bot.page.sleep = AsyncMock() # Use minimal config since we only need title for deletion by title - ad_cfg = create_ad_config( - minimal_ad_config, - title = "Test Title", - id = None # Explicitly set id to None for title-based deletion - ) + ad_cfg = Ad.model_validate(minimal_ad_config | { + "title": "Test Title", + "id": None # Explicitly set id to None for title-based deletion + }) published_ads = [ {"title": "Test Title", "id": "67890"}, @@ -850,10 +775,9 @@ class TestKleinanzeigenBotAdDeletion: test_bot.page.sleep = AsyncMock() # Create config with ID for deletion by ID - ad_cfg = create_ad_config( - minimal_ad_config, - id = "12345" - ) + ad_cfg = Ad.model_validate(minimal_ad_config | { + id: "12345" + }) published_ads = [ {"title": "Different Title", "id": "12345"}, @@ -883,13 +807,12 @@ class TestKleinanzeigenBotAdRepublication: }) # Create ad config with all necessary fields for republication - ad_cfg = create_ad_config( - base_ad_config, - id = "12345", - updated_on = "2024-01-01T00:00:00", - created_on = "2024-01-01T00:00:00", - description = "Changed description" - ) + ad_cfg = Ad.model_validate(base_ad_config | { + "id": "12345", + "updated_on": "2024-01-01T00:00:01", + "created_on": "2024-01-01T00:00:01", + "description": "Changed description" + }) # Create a temporary directory and file with tempfile.TemporaryDirectory() as temp_dir: @@ -898,21 +821,14 @@ class TestKleinanzeigenBotAdRepublication: ad_dir.mkdir() ad_file = ad_dir / "test_ad.yaml" - yaml = YAML() - with open(ad_file, "w", encoding = "utf-8") as f: - yaml.dump(ad_cfg, f) + dicts.save_dict(ad_file, ad_cfg.model_dump()) # Set config file path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml") test_bot.config.ad_files = ["ads/*.yaml"] - # Mock the loading of the original ad configuration - with patch("kleinanzeigen_bot.utils.dicts.load_dict", side_effect = [ - ad_cfg, # First call returns the original ad config - {} # Second call for ad_fields.yaml - ]): - ads_to_publish = test_bot.load_ads() - assert len(ads_to_publish) == 1 + ads_to_publish = test_bot.load_ads() + assert len(ads_to_publish) == 1 def test_check_ad_republication_no_changes(self, test_bot:KleinanzeigenBot, base_ad_config:dict[str, Any]) -> None: """Test that unchanged ads within interval are not marked for republication.""" @@ -920,16 +836,15 @@ class TestKleinanzeigenBotAdRepublication: three_days_ago = (current_time - timedelta(days = 3)).isoformat() # Create ad config with timestamps for republication check - ad_cfg = create_ad_config( - base_ad_config, - id = "12345", - updated_on = three_days_ago, - created_on = three_days_ago - ) + ad_cfg = Ad.model_validate(base_ad_config | { + "id": "12345", + "updated_on": three_days_ago, + "created_on": three_days_ago + }) # Calculate hash before making the copy to ensure they match - current_hash = calculate_content_hash(ad_cfg) - ad_cfg_orig = copy.deepcopy(ad_cfg) + ad_cfg_orig = ad_cfg.model_dump() + current_hash = calculate_content_hash(ad_cfg_orig) ad_cfg_orig["content_hash"] = current_hash # Mock the config to prevent actual file operations @@ -952,16 +867,15 @@ class TestKleinanzeigenBotShippingOptions: test_bot.page.evaluate = AsyncMock() # Create ad config with specific shipping options - ad_cfg = create_ad_config( - base_ad_config, - shipping_options = ["DHL_2", "Hermes_Päckchen"], - created_on = "2024-01-01T00:00:00", # Add created_on to prevent KeyError - updated_on = "2024-01-01T00:00:00" # Add updated_on for consistency - ) + ad_cfg = Ad.model_validate(base_ad_config | { + "shipping_options": ["DHL_2", "Hermes_Päckchen"], + "updated_on": "2024-01-01T00:00:00", # Add created_on to prevent KeyError + "created_on": "2024-01-01T00:00:00" # Add updated_on for consistency + }) # Create the original ad config and published ads list - ad_cfg_orig = copy.deepcopy(ad_cfg) - ad_cfg_orig["content_hash"] = calculate_content_hash(ad_cfg) # Add content hash to prevent republication + ad_cfg_orig = ad_cfg.model_dump() + ad_cfg_orig["content_hash"] = calculate_content_hash(ad_cfg_orig) # Add content hash to prevent republication published_ads:list[dict[str, Any]] = [] # Set up default config values needed for the test @@ -1052,7 +966,13 @@ class TestKleinanzeigenBotPrefixSuffix: for config, raw_description, expected_description in description_test_cases: test_bot = KleinanzeigenBot() test_bot.config = test_bot_config.with_values(config) - ad_cfg = {"description": raw_description, "active": True} + ad_cfg = test_bot.load_ad({ + "description": raw_description, + "active": True, + "title": "0123456789", + "category": "whatever", + }) + # Access private method using the correct name mangling description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True) assert description == expected_description @@ -1066,10 +986,12 @@ class TestKleinanzeigenBotPrefixSuffix: "description_suffix": "S" * 1000 } }) - ad_cfg = { + ad_cfg = test_bot.load_ad({ "description": "D" * 2001, # This plus affixes will exceed 4000 chars - "active": True - } + "active": True, + "title": "0123456789", + "category": "whatever", + }) with pytest.raises(AssertionError) as exc_info: getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True) @@ -1087,10 +1009,12 @@ class TestKleinanzeigenBotDescriptionHandling: test_bot.config = test_bot_config # Test with a simple ad config - ad_cfg = { + ad_cfg = test_bot.load_ad({ "description": "Test Description", - "active": True - } + "active": True, + "title": "0123456789", + "category": "whatever", + }) # The description should be returned as-is without any prefix/suffix description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True) @@ -1106,10 +1030,12 @@ class TestKleinanzeigenBotDescriptionHandling: } }) - ad_cfg = { + ad_cfg = test_bot.load_ad({ "description": "Test Description", - "active": True - } + "active": True, + "title": "0123456789", + "category": "whatever", + }) description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True) assert description == "Prefix: Test Description :Suffix" @@ -1128,10 +1054,12 @@ class TestKleinanzeigenBotDescriptionHandling: } }) - ad_cfg = { + ad_cfg = test_bot.load_ad({ "description": "Test Description", - "active": True - } + "active": True, + "title": "0123456789", + "category": "whatever", + }) description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True) assert description == "New Prefix: Test Description :New Suffix" @@ -1146,12 +1074,14 @@ class TestKleinanzeigenBotDescriptionHandling: } }) - ad_cfg = { + ad_cfg = test_bot.load_ad({ "description": "Test Description", "description_prefix": "Ad Prefix: ", "description_suffix": " :Ad Suffix", - "active": True - } + "active": True, + "title": "0123456789", + "category": "whatever", + }) description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True) assert description == "Ad Prefix: Test Description :Ad Suffix" @@ -1170,10 +1100,12 @@ class TestKleinanzeigenBotDescriptionHandling: } }) - ad_cfg = { + ad_cfg = test_bot.load_ad({ "description": "Test Description", - "active": True - } + "active": True, + "title": "0123456789", + "category": "whatever", + }) description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True) assert description == "Test Description" @@ -1183,10 +1115,12 @@ class TestKleinanzeigenBotDescriptionHandling: test_bot = KleinanzeigenBot() test_bot.config = test_bot_config - ad_cfg = { + ad_cfg = test_bot.load_ad({ "description": "Contact: test@example.com", - "active": True - } + "active": True, + "title": "0123456789", + "category": "whatever", + }) description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True) assert description == "Contact: test(at)example.com" @@ -1210,17 +1144,17 @@ class TestKleinanzeigenBotChangedAds: }) # Create a changed ad - changed_ad = create_ad_config( - base_ad_config, - id = "12345", - title = "Changed Ad", - updated_on = "2024-01-01T00:00:00", - created_on = "2024-01-01T00:00:00", - active = True - ) + ad_cfg = Ad.model_validate(base_ad_config | { + "id": "12345", + "title": "Changed Ad", + "updated_on": "2024-01-01T00:00:00", + "created_on": "2024-01-01T00:00:00", + "active": True + }) # Calculate hash for changed_ad and add it to the config # Then modify the ad to simulate a change + changed_ad = ad_cfg.model_dump() changed_hash = calculate_content_hash(changed_ad) changed_ad["content_hash"] = changed_hash # Now modify the ad to make it "changed" @@ -1233,10 +1167,7 @@ class TestKleinanzeigenBotChangedAds: ad_dir.mkdir() # Write the ad file - yaml = YAML() - changed_file = ad_dir / "changed_ad.yaml" - with open(changed_file, "w", encoding = "utf-8") as f: - yaml.dump(changed_ad, f) + dicts.save_dict(ad_dir / "changed_ad.yaml", changed_ad) # Set config file path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml") @@ -1251,7 +1182,7 @@ class TestKleinanzeigenBotChangedAds: # The changed ad should be loaded assert len(ads_to_publish) == 1 - assert ads_to_publish[0][1]["title"] == "Changed Ad - Modified" + assert ads_to_publish[0][1].title == "Changed Ad - Modified" def test_load_ads_with_due_selector_includes_all_due_ads(self, test_bot:KleinanzeigenBot, base_ad_config:dict[str, Any]) -> None: """Test that 'due' selector includes all ads that are due for republication, regardless of changes.""" @@ -1262,15 +1193,15 @@ class TestKleinanzeigenBotChangedAds: current_time = misc.now() old_date = (current_time - timedelta(days = 10)).isoformat() # Past republication interval - changed_ad = create_ad_config( - base_ad_config, - id = "12345", - title = "Changed Ad", - updated_on = old_date, - created_on = old_date, - republication_interval = 7, # Due for republication after 7 days - active = True - ) + ad_cfg = Ad.model_validate(base_ad_config | { + "id": "12345", + "title": "Changed Ad", + "updated_on": old_date, + "created_on": old_date, + "republication_interval": 7, # Due for republication after 7 days + "active": True + }) + changed_ad = ad_cfg.model_dump() # Create temporary directory and file with tempfile.TemporaryDirectory() as temp_dir: @@ -1279,10 +1210,7 @@ class TestKleinanzeigenBotChangedAds: ad_dir.mkdir() # Write the ad file - yaml = YAML() - ad_file = ad_dir / "changed_ad.yaml" - with open(ad_file, "w", encoding = "utf-8") as f: - yaml.dump(changed_ad, f) + dicts.save_dict(ad_dir / "changed_ad.yaml", changed_ad) # Set config file path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml")