diff --git a/README.md b/README.md index 6de4def..ea283ab 100644 --- a/README.md +++ b/README.md @@ -229,6 +229,8 @@ Valid file extensions are `.json`, `.yaml` and `.yml` The following parameters can be configured: ```yaml +# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/config.schema.json + # glob (wildcard) patterns to select ad configuration files # if relative paths are specified, then they are relative to this configuration file ad_files: @@ -425,6 +427,7 @@ By default a new browser process will be launched. To reuse a manually launched - all tests: `pdm run test` - with coverage: `pdm run test:cov` - Run syntax checks: `pdm run lint` - Linting issues found by ruff can be auto-fixed using `pdm run lint:fix` +- Derive JSON schema files from Pydantic data model: `pdm run generate-schemas` - Create platform-specific executable: `pdm run compile` - Application bootstrap works like this: ```python diff --git a/pdm.lock b/pdm.lock index e7cfdda..f9ff84a 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:2ce8b5b77bbdaa380fbb3b50bc888b1f7c7c03fc4072cb7db379a787b2532d20" +content_hash = "sha256:4c861bebeac9e92661923a7e8d04a695c2185a5d0f85179fb858febd2503fdaf" [[metadata.targets]] requires_python = ">=3.10,<3.14" @@ -25,7 +25,7 @@ name = "annotated-types" version = "0.7.0" requires_python = ">=3.8" summary = "Reusable constraint types to use with typing.Annotated" -groups = ["dev"] +groups = ["default", "dev"] dependencies = [ "typing-extensions>=4.0.0; python_version < \"3.9\"", ] @@ -910,7 +910,7 @@ name = "pydantic" version = "2.11.4" requires_python = ">=3.9" summary = "Data validation using Python type hints" -groups = ["dev"] +groups = ["default", "dev"] dependencies = [ "annotated-types>=0.6.0", "pydantic-core==2.33.2", @@ -927,7 +927,7 @@ name = "pydantic-core" version = "2.33.2" requires_python = ">=3.9" summary = "Core functionality for Pydantic validation and serialization" -groups = ["dev"] +groups = ["default", "dev"] dependencies = [ "typing-extensions!=4.7.0,>=4.6.0", ] @@ -1371,7 +1371,7 @@ name = "typing-extensions" version = "4.13.2" requires_python = ">=3.8" summary = "Backported and Experimental Type Hints for Python 3.8+" -groups = ["dev"] +groups = ["default", "dev"] files = [ {file = "typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c"}, {file = "typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"}, @@ -1382,7 +1382,7 @@ name = "typing-inspection" version = "0.4.0" requires_python = ">=3.9" summary = "Runtime typing introspection tools" -groups = ["dev"] +groups = ["default", "dev"] dependencies = [ "typing-extensions>=4.12.0", ] diff --git a/pyproject.toml b/pyproject.toml index 277de14..eb10e86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ dependencies = [ "colorama", "jaraco.text", # required by pkg_resources during runtime "nodriver==0.39.0", # 0.40-0.44 have issues starting browsers and evaluating self.web_execute("window.BelenConf") fails + "pydantic>=2.0.0", "ruamel.yaml", "psutil", "wcmatch", @@ -256,7 +257,7 @@ min-file-size = 256 [tool.ruff.lint.pylint] # https://pylint.pycqa.org/en/latest/user_guide/configuration/all-options.html#design-checker # https://pylint.pycqa.org/en/latest/user_guide/checkers/features.html#design-checker-messages -max-args = 5 # max. number of args for function / method (R0913) +max-args = 6 # max. number of args for function / method (R0913) # max-attributes = 15 # max. number of instance attrs for a class (R0902) max-branches = 40 # max. number of branch for function / method body (R0912) max-locals = 30 # max. number of local vars for function / method body (R0914) diff --git a/schemas/config.schema.json b/schemas/config.schema.json new file mode 100644 index 0000000..1716e1c --- /dev/null +++ b/schemas/config.schema.json @@ -0,0 +1,377 @@ +{ + "$defs": { + "AdDefaults": { + "properties": { + "active": { + "default": true, + "title": "Active", + "type": "boolean" + }, + "type": { + "default": "OFFER", + "enum": [ + "OFFER", + "WANTED" + ], + "title": "Type", + "type": "string" + }, + "description": { + "anyOf": [ + { + "$ref": "#/$defs/DescriptionAffixes" + }, + { + "type": "null" + } + ], + "default": null + }, + "description_prefix": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "prefix for the ad description", + "title": "Description Prefix" + }, + "description_suffix": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": " suffix for the ad description", + "title": "Description Suffix" + }, + "price_type": { + "default": "NEGOTIABLE", + "enum": [ + "FIXED", + "NEGOTIABLE", + "GIVE_AWAY", + "NOT_APPLICABLE" + ], + "title": "Price Type", + "type": "string" + }, + "shipping_type": { + "default": "SHIPPING", + "enum": [ + "PICKUP", + "SHIPPING", + "NOT_APPLICABLE" + ], + "title": "Shipping Type", + "type": "string" + }, + "sell_directly": { + "default": false, + "description": "requires shipping_type SHIPPING to take effect", + "title": "Sell Directly", + "type": "boolean" + }, + "contact": { + "$ref": "#/$defs/ContactDefaults" + }, + "republication_interval": { + "default": 7, + "title": "Republication Interval", + "type": "integer" + } + }, + "title": "AdDefaults", + "type": "object" + }, + "BrowserConfig": { + "properties": { + "arguments": { + "description": "See https://peter.sh/experiments/chromium-command-line-switches/", + "items": { + "type": "string" + }, + "title": "Arguments", + "type": "array" + }, + "binary_location": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "path to custom browser executable, if not specified will be looked up on PATH", + "title": "Binary Location" + }, + "extensions": { + "description": "a list of .crx extension files to be loaded", + "items": { + "type": "string" + }, + "title": "Extensions", + "type": "array" + }, + "use_private_window": { + "default": true, + "title": "Use Private Window", + "type": "boolean" + }, + "user_data_dir": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "See https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md", + "title": "User Data Dir" + }, + "profile_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Profile Name" + } + }, + "title": "BrowserConfig", + "type": "object" + }, + "CaptchaConfig": { + "properties": { + "auto_restart": { + "default": false, + "title": "Auto Restart", + "type": "boolean" + }, + "restart_delay": { + "default": "6h", + "title": "Restart Delay", + "type": "string" + } + }, + "title": "CaptchaConfig", + "type": "object" + }, + "ContactDefaults": { + "properties": { + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Name" + }, + "street": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Street" + }, + "zipcode": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Zipcode" + }, + "phone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Phone" + } + }, + "title": "ContactDefaults", + "type": "object" + }, + "DescriptionAffixes": { + "deprecated": true, + "properties": { + "prefix": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Prefix" + }, + "suffix": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Suffix" + } + }, + "title": "DescriptionAffixes", + "type": "object" + }, + "DownloadConfig": { + "properties": { + "include_all_matching_shipping_options": { + "default": false, + "description": "if true, all shipping options matching the package size will be included", + "title": "Include All Matching Shipping Options", + "type": "boolean" + }, + "excluded_shipping_options": { + "description": "list of shipping options to exclude, e.g. ['DHL_2', 'DHL_5']", + "items": { + "type": "string" + }, + "title": "Excluded Shipping Options", + "type": "array" + } + }, + "title": "DownloadConfig", + "type": "object" + }, + "LoginConfig": { + "properties": { + "username": { + "minLength": 1, + "title": "Username", + "type": "string" + }, + "password": { + "minLength": 1, + "title": "Password", + "type": "string" + } + }, + "required": [ + "username", + "password" + ], + "title": "LoginConfig", + "type": "object" + }, + "PublishingConfig": { + "properties": { + "delete_old_ads": { + "anyOf": [ + { + "enum": [ + "BEFORE_PUBLISH", + "AFTER_PUBLISH", + "NEVER" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "default": "AFTER_PUBLISH", + "title": "Delete Old Ads" + }, + "delete_old_ads_by_title": { + "default": true, + "description": "only works if delete_old_ads is set to BEFORE_PUBLISH", + "title": "Delete Old Ads By Title", + "type": "boolean" + } + }, + "title": "PublishingConfig", + "type": "object" + } + }, + "properties": { + "ad_files": { + "description": "\nglob (wildcard) patterns to select ad configuration files\nif relative paths are specified, then they are relative to this configuration file\n", + "items": { + "type": "string" + }, + "minItems": 1, + "title": "Ad Files", + "type": "array" + }, + "ad_defaults": { + "$ref": "#/$defs/AdDefaults", + "description": "Default values for ads, can be overwritten in each ad configuration file" + }, + "categories": { + "additionalProperties": { + "type": "string" + }, + "description": "\nadditional name to category ID mappings, see default list at\nhttps://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml\n\nExample:\n categories:\n Elektronik > Notebooks: 161/278\n Jobs > Praktika: 102/125\n ", + "title": "Categories", + "type": "object" + }, + "download": { + "$ref": "#/$defs/DownloadConfig" + }, + "publishing": { + "$ref": "#/$defs/PublishingConfig" + }, + "browser": { + "$ref": "#/$defs/BrowserConfig", + "description": "Browser configuration" + }, + "login": { + "$ref": "#/$defs/LoginConfig", + "description": "Login credentials" + }, + "captcha": { + "$ref": "#/$defs/CaptchaConfig" + } + }, + "title": "Config", + "type": "object", + "description": "Auto-generated JSON Schema for Config" +} diff --git a/scripts/generate_schemas.py b/scripts/generate_schemas.py new file mode 100644 index 0000000..5aa7e55 --- /dev/null +++ b/scripts/generate_schemas.py @@ -0,0 +1,39 @@ +# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ +import json +from pathlib import Path +from typing import Type + +from pydantic import BaseModel + +from kleinanzeigen_bot.model.config_model import Config + + +def generate_schema(model:Type[BaseModel], out_dir:Path) -> None: + """ + Generate and write JSON schema for the given model. + """ + name = model.__name__ + print(f"[+] Generating schema for model [{name}]...") + + # Create JSON Schema dict + schema = model.model_json_schema(mode = "validation") + schema.setdefault("title", f"{name} Schema") + schema.setdefault("description", f"Auto-generated JSON Schema for {name}") + + # Write JSON + json_path = out_dir / f"{name.lower()}.schema.json" + with json_path.open("w", encoding = "utf-8") as f_json: + json.dump(schema, f_json, indent = 2) + f_json.write("\n") + print(f"[✓] {json_path}") + + +project_root = Path(__file__).parent.parent +out_dir = project_root / "schemas" +out_dir.mkdir(parents = True, exist_ok = True) + +print(f"Generating schemas in: {out_dir.resolve()}") +generate_schema(Config, out_dir) +print("All schemas generated successfully.") diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py index 08192a1..45f776b 100644 --- a/src/kleinanzeigen_bot/__init__.py +++ b/src/kleinanzeigen_bot/__init__.py @@ -15,6 +15,7 @@ from wcmatch import glob from . import extract, resources from ._version import __version__ from .ads import MAX_DESCRIPTION_LENGTH, calculate_content_hash, get_description_affixes +from .model.config_model import Config from .utils import dicts, error_handlers, loggers, misc from .utils.exceptions import CaptchaEncountered from .utils.files import abspath @@ -42,7 +43,7 @@ class KleinanzeigenBot(WebScrapingMixin): self.root_url = "https://www.kleinanzeigen.de" - self.config:dict[str, Any] = {} + self.config:Config self.config_file_path = abspath("config.yaml") self.categories:dict[str, str] = {} @@ -325,7 +326,7 @@ class KleinanzeigenBot(WebScrapingMixin): ad_files:dict[str, str] = {} data_root_dir = os.path.dirname(self.config_file_path) - for file_pattern in self.config["ad_files"]: + for file_pattern in self.config.ad_files: for ad_file in glob.glob(file_pattern, root_dir = data_root_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB): if not str(ad_file).endswith("ad_fields.yaml"): ad_files[abspath(ad_file, relative_to = data_root_dir)] = ad_file @@ -349,7 +350,7 @@ class KleinanzeigenBot(WebScrapingMixin): ad_cfg_orig = dicts.load_dict(ad_file, "ad") ad_cfg = copy.deepcopy(ad_cfg_orig) dicts.apply_defaults(ad_cfg, - self.config["ad_defaults"], + self.config.ad_defaults.model_dump(), ignore = lambda k, _: k == "description", override = lambda _, v: v == "" # noqa: PLC1901 can be simplified to `not v` as an empty string is falsey ) @@ -462,40 +463,44 @@ class KleinanzeigenBot(WebScrapingMixin): return ads def load_config(self) -> None: - config_defaults = dicts.load_dict_from_module(resources, "config_defaults.yaml") - config = dicts.load_dict_if_exists(self.config_file_path, _("config")) - - if config is None: + # write default config.yaml if config file does not exist + if not os.path.exists(self.config_file_path): LOG.warning("Config file %s does not exist. Creating it with default values...", self.config_file_path) - dicts.save_dict(self.config_file_path, config_defaults) - config = {} + default_config = Config.model_construct() + default_config.login.username = "" + default_config.login.password = "" + dicts.save_dict(self.config_file_path, default_config.model_dump(exclude_none = True, exclude = { + "ad_defaults": { + "description" # deprecated + } + }), header = "# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/config.schema.json") - self.config = dicts.apply_defaults(config, config_defaults) + config_yaml = dicts.load_dict_if_exists(self.config_file_path, _("config")) + self.config = Config.model_validate(config_yaml, strict = True, context = self.config_file_path) + # load built-in category mappings self.categories = dicts.load_dict_from_module(resources, "categories.yaml", "categories") deprecated_categories = dicts.load_dict_from_module(resources, "categories_old.yaml", "categories") self.categories.update(deprecated_categories) - if self.config["categories"]: - self.categories.update(self.config["categories"]) + if self.config.categories: + self.categories.update(self.config.categories) LOG.info(" -> found %s", pluralize("category", self.categories)) - ensure(self.config["login"]["username"], f"[login.username] not specified @ [{self.config_file_path}]") - ensure(self.config["login"]["password"], f"[login.password] not specified @ [{self.config_file_path}]") - - self.browser_config.arguments = self.config["browser"]["arguments"] - self.browser_config.binary_location = self.config["browser"]["binary_location"] - self.browser_config.extensions = [abspath(item, relative_to = self.config_file_path) for item in self.config["browser"]["extensions"]] - self.browser_config.use_private_window = self.config["browser"]["use_private_window"] - if self.config["browser"]["user_data_dir"]: - self.browser_config.user_data_dir = abspath(self.config["browser"]["user_data_dir"], relative_to = self.config_file_path) - self.browser_config.profile_name = self.config["browser"]["profile_name"] + # populate browser_config object used by WebScrapingMixin + self.browser_config.arguments = self.config.browser.arguments + self.browser_config.binary_location = self.config.browser.binary_location + self.browser_config.extensions = [abspath(item, relative_to = self.config_file_path) for item in self.config.browser.extensions] + self.browser_config.use_private_window = self.config.browser.use_private_window + if self.config.browser.user_data_dir: + self.browser_config.user_data_dir = abspath(self.config.browser.user_data_dir, relative_to = self.config_file_path) + self.browser_config.profile_name = self.config.browser.profile_name async def login(self) -> None: LOG.info("Checking if already logged in...") await self.web_open(f"{self.root_url}") if await self.is_logged_in(): - LOG.info("Already logged in as [%s]. Skipping login.", self.config["login"]["username"]) + LOG.info("Already logged in as [%s]. Skipping login.", self.config.login.username) return LOG.info("Opening login page...") @@ -519,9 +524,9 @@ class KleinanzeigenBot(WebScrapingMixin): await self.handle_after_login_logic() async def fill_login_data_and_send(self) -> None: - LOG.info("Logging in as [%s]...", self.config["login"]["username"]) - await self.web_input(By.ID, "email", self.config["login"]["username"]) - await self.web_input(By.ID, "password", self.config["login"]["password"]) + LOG.info("Logging in as [%s]...", self.config.login.username) + await self.web_input(By.ID, "email", self.config.login.username) + await self.web_input(By.ID, "password", self.config.login.password) await self.web_click(By.CSS_SELECTOR, "form#login-form button[type='submit']") async def handle_after_login_logic(self) -> None: @@ -546,13 +551,13 @@ class KleinanzeigenBot(WebScrapingMixin): try: # Try to find the standard element first user_info = await self.web_text(By.CLASS_NAME, "mr-medium") - if self.config["login"]["username"].lower() in user_info.lower(): + if self.config.login.username.lower() in user_info.lower(): return True except TimeoutError: try: # If standard element not found, try the alternative user_info = await self.web_text(By.ID, "user-email") - if self.config["login"]["username"].lower() in user_info.lower(): + if self.config.login.username.lower() in user_info.lower(): return True except TimeoutError: return False @@ -567,7 +572,7 @@ class KleinanzeigenBot(WebScrapingMixin): for (ad_file, ad_cfg, _ad_cfg_orig) in ad_cfgs: count += 1 LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg["title"], ad_file) - await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = self.config["publishing"]["delete_old_ads_by_title"]) + await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = self.config.publishing.delete_old_ads_by_title) await self.web_sleep() LOG.info("############################################") @@ -624,7 +629,7 @@ class KleinanzeigenBot(WebScrapingMixin): await self.publish_ad(ad_file, ad_cfg, ad_cfg_orig, published_ads) await self.web_await(lambda: self.web_check(By.ID, "checking-done", Is.DISPLAYED), timeout = 5 * 60) - if self.config["publishing"]["delete_old_ads"] == "AFTER_PUBLISH" and not self.keep_old_ads: + if self.config.publishing.delete_old_ads == "AFTER_PUBLISH" and not self.keep_old_ads: await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = False) LOG.info("############################################") @@ -639,8 +644,8 @@ class KleinanzeigenBot(WebScrapingMixin): """ await self.assert_free_ad_limit_not_reached() - if self.config["publishing"]["delete_old_ads"] == "BEFORE_PUBLISH" and not self.keep_old_ads: - await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = self.config["publishing"]["delete_old_ads_by_title"]) + if self.config.publishing.delete_old_ads == "BEFORE_PUBLISH" and not self.keep_old_ads: + await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = self.config.publishing.delete_old_ads_by_title) LOG.info("Publishing ad '%s'...", ad_cfg["title"]) @@ -779,9 +784,9 @@ class KleinanzeigenBot(WebScrapingMixin): "iframe[name^='a-'][src^='https://www.google.com/recaptcha/api2/anchor?']", timeout = 2) - if self.config.get("captcha", {}).get("auto_restart", False): + if self.config.captcha.auto_restart: LOG.warning("Captcha recognized - auto-restart enabled, abort run...") - raise CaptchaEncountered(misc.parse_duration(self.config.get("captcha", {}).get("restart_delay", "6h"))) + raise CaptchaEncountered(misc.parse_duration(self.config.captcha.restart_delay)) # Fallback: manuell LOG.warning("############################################") @@ -1036,7 +1041,7 @@ class KleinanzeigenBot(WebScrapingMixin): async def assert_free_ad_limit_not_reached(self) -> None: try: await self.web_find(By.XPATH, "/html/body/div[1]/form/fieldset[6]/div[1]/header", timeout = 2) - raise AssertionError(f"Cannot publish more ads. The monthly limit of free ads of account {self.config['login']['username']} is reached.") + raise AssertionError(f"Cannot publish more ads. The monthly limit of free ads of account {self.config.login.username} is reached.") except TimeoutError: pass diff --git a/src/kleinanzeigen_bot/ads.py b/src/kleinanzeigen_bot/ads.py index 3ca53e1..8fc680f 100644 --- a/src/kleinanzeigen_bot/ads.py +++ b/src/kleinanzeigen_bot/ads.py @@ -4,7 +4,7 @@ import hashlib, json, os # isort: skip from typing import Any, Final -from .utils import dicts +from .model.config_model import Config MAX_DESCRIPTION_LENGTH:Final[int] = 4000 @@ -40,7 +40,7 @@ def calculate_content_hash(ad_cfg:dict[str, Any]) -> str: return hashlib.sha256(content_str.encode()).hexdigest() -def get_description_affixes(config:dict[str, Any], *, prefix:bool = True) -> str: +def get_description_affixes(config:Config, *, prefix:bool = True) -> str: """Get prefix or suffix for description with proper precedence. This function handles both the new flattened format and legacy nested format: @@ -65,24 +65,21 @@ def get_description_affixes(config:dict[str, Any], *, prefix:bool = True) -> str Example: >>> config = {"ad_defaults": {"description_prefix": "Hello", "description": {"prefix": "Hi"}}} - >>> get_description_affixes(config, prefix=True) + >>> get_description_affixes(Config.model_validate(config), prefix=True) 'Hello' """ - # Handle edge cases - if not isinstance(config, dict): - return "" - affix_type = "prefix" if prefix else "suffix" # First try new flattened format (description_prefix/description_suffix) flattened_key = f"description_{affix_type}" - flattened_value = dicts.safe_get(config, "ad_defaults", flattened_key) + flattened_value = getattr(config.ad_defaults, flattened_key) if isinstance(flattened_value, str): return flattened_value # Then try legacy nested format (description.prefix/description.suffix) - nested_value = dicts.safe_get(config, "ad_defaults", "description", affix_type) - if isinstance(nested_value, str): - return nested_value + if config.ad_defaults.description: + nested_value = getattr(config.ad_defaults.description, affix_type) + if isinstance(nested_value, str): + return nested_value return "" diff --git a/src/kleinanzeigen_bot/extract.py b/src/kleinanzeigen_bot/extract.py index 9bfa268..4ea8cdb 100644 --- a/src/kleinanzeigen_bot/extract.py +++ b/src/kleinanzeigen_bot/extract.py @@ -7,6 +7,7 @@ from datetime import datetime from typing import Any, Final from .ads import calculate_content_hash, get_description_affixes +from .model.config_model import Config from .utils import dicts, i18n, loggers, misc, reflect from .utils.web_scraping_mixin import Browser, By, Element, WebScrapingMixin @@ -22,7 +23,7 @@ class AdExtractor(WebScrapingMixin): Wrapper class for ad extraction that uses an active bot´s browser session to extract specific elements from an ad page. """ - def __init__(self, browser:Browser, config:dict[str, Any]) -> None: + def __init__(self, browser:Browser, config:Config) -> None: super().__init__() self.browser = browser self.config = config @@ -432,11 +433,8 @@ class AdExtractor(WebScrapingMixin): # Convert Euro to cents and round to nearest integer price_in_cent = round(ship_costs * 100) - # Get excluded shipping options from config - excluded_options = self.config.get("download", {}).get("excluded_shipping_options", []) - # If include_all_matching_shipping_options is enabled, get all options for the same package size - if self.config.get("download", {}).get("include_all_matching_shipping_options", False): + if self.config.download.include_all_matching_shipping_options: # Find all options with the same price to determine the package size matching_options = [opt for opt in shipping_costs if opt["priceInEuroCent"] == price_in_cent] if not matching_options: @@ -451,7 +449,7 @@ class AdExtractor(WebScrapingMixin): for opt in shipping_costs if opt["packageSize"] == matching_size and opt["id"] in shipping_option_mapping - and shipping_option_mapping[opt["id"]] not in excluded_options + and shipping_option_mapping[opt["id"]] not in self.config.download.excluded_shipping_options ] else: # Only use the matching option if it's not excluded @@ -460,7 +458,7 @@ class AdExtractor(WebScrapingMixin): return "NOT_APPLICABLE", ship_costs, shipping_options shipping_option = shipping_option_mapping.get(matching_option["id"]) - if not shipping_option or shipping_option in excluded_options: + if not shipping_option or shipping_option in self.config.download.excluded_shipping_options: return "NOT_APPLICABLE", ship_costs, shipping_options shipping_options = [shipping_option] diff --git a/src/kleinanzeigen_bot/model/__init__.py b/src/kleinanzeigen_bot/model/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/kleinanzeigen_bot/model/config_model.py b/src/kleinanzeigen_bot/model/config_model.py new file mode 100644 index 0000000..cbb195b --- /dev/null +++ b/src/kleinanzeigen_bot/model/config_model.py @@ -0,0 +1,144 @@ +# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ +from __future__ import annotations + +import copy +from typing import Any, Dict, List, Literal + +from pydantic import Field, model_validator, validator +from typing_extensions import deprecated + +from kleinanzeigen_bot.utils import dicts +from kleinanzeigen_bot.utils.pydantics import ContextualModel + + +class ContactDefaults(ContextualModel): + name:str | None = None + street:str | None = None + zipcode:int | str | None = None + phone:str | None = None + + +@deprecated("Use description_prefix/description_suffix instead") +class DescriptionAffixes(ContextualModel): + prefix:str | None = None + suffix:str | None = None + + +class AdDefaults(ContextualModel): + active:bool = True + type:Literal["OFFER", "WANTED"] = "OFFER" + description:DescriptionAffixes | None = None + description_prefix:str | None = Field(default = None, description = "prefix for the ad description") + description_suffix:str | None = Field(default = None, description = " suffix for the ad description") + price_type:Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"] = "NEGOTIABLE" + shipping_type:Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"] = "SHIPPING" + sell_directly:bool = Field(default = False, description = "requires shipping_type SHIPPING to take effect") + contact:ContactDefaults = Field(default_factory = ContactDefaults) + republication_interval:int = 7 + + @model_validator(mode = "before") + @classmethod + def unify_description(cls, values:Dict[str, Any]) -> Dict[str, Any]: + # Ensure flat prefix/suffix take precedence over deprecated nested "description" + desc = values.get("description") + flat_prefix = values.get("description_prefix") + flat_suffix = values.get("description_suffix") + + if not flat_prefix and isinstance(desc, dict) and desc.get("prefix") is not None: + values["description_prefix"] = desc.get("prefix", "") + if not flat_suffix and isinstance(desc, dict) and desc.get("suffix") is not None: + values["description_suffix"] = desc.get("suffix", "") + return values + + +class DownloadConfig(ContextualModel): + include_all_matching_shipping_options:bool = Field( + default = False, + description = "if true, all shipping options matching the package size will be included" + ) + excluded_shipping_options:List[str] = Field( + default_factory = list, + description = "list of shipping options to exclude, e.g. ['DHL_2', 'DHL_5']" + ) + + +class BrowserConfig(ContextualModel): + arguments:List[str] = Field( + default_factory = list, + description = "See https://peter.sh/experiments/chromium-command-line-switches/" + ) + binary_location:str | None = Field( + default = None, + description = "path to custom browser executable, if not specified will be looked up on PATH" + ) + extensions:List[str] = Field( + default_factory = list, + description = "a list of .crx extension files to be loaded" + ) + use_private_window:bool = True + user_data_dir:str | None = Field( + default = None, + description = "See https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md" + ) + profile_name:str | None = None + + +class LoginConfig(ContextualModel): + username:str = Field(..., min_length = 1) + password:str = Field(..., min_length = 1) + + +class PublishingConfig(ContextualModel): + delete_old_ads:Literal["BEFORE_PUBLISH", "AFTER_PUBLISH", "NEVER"] | None = "AFTER_PUBLISH" + delete_old_ads_by_title:bool = Field(default = True, description = "only works if delete_old_ads is set to BEFORE_PUBLISH") + + +class CaptchaConfig(ContextualModel): + auto_restart:bool = False + restart_delay:str = "6h" + + +class Config(ContextualModel): + ad_files:List[str] = Field( + default_factory = lambda: ["./**/ad_*.{json,yml,yaml}"], + min_items = 1, + description = """ +glob (wildcard) patterns to select ad configuration files +if relative paths are specified, then they are relative to this configuration file +""" + ) # type: ignore[call-overload] + + ad_defaults:AdDefaults = Field( + default_factory = AdDefaults, + description = "Default values for ads, can be overwritten in each ad configuration file" + ) + + categories:Dict[str, str] = Field(default_factory = dict, description = """ +additional name to category ID mappings, see default list at +https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml + +Example: + categories: + Elektronik > Notebooks: 161/278 + Jobs > Praktika: 102/125 + """) + + download:DownloadConfig = Field(default_factory = DownloadConfig) + publishing:PublishingConfig = Field(default_factory = PublishingConfig) + browser:BrowserConfig = Field(default_factory = BrowserConfig, description = "Browser configuration") + login:LoginConfig = Field(default_factory = LoginConfig.model_construct, description = "Login credentials") + captcha:CaptchaConfig = Field(default_factory = CaptchaConfig) + + def with_values(self, values:dict[str, Any]) -> Config: + return Config.model_validate( + dicts.apply_defaults(copy.deepcopy(values), defaults = self.model_dump()) + ) + + @validator("ad_files", each_item = True) + @classmethod + def _non_empty_glob_pattern(cls, v:str) -> str: + if not v.strip(): + raise ValueError("ad_files entries must be non-empty glob patterns") + return v diff --git a/src/kleinanzeigen_bot/resources/config_defaults.yaml b/src/kleinanzeigen_bot/resources/config_defaults.yaml deleted file mode 100644 index 3455e58..0000000 --- a/src/kleinanzeigen_bot/resources/config_defaults.yaml +++ /dev/null @@ -1,52 +0,0 @@ -ad_files: - - "./**/ad_*.{json,yml,yaml}" - -# default values for ads, can be overwritten in each ad configuration file -ad_defaults: - active: true - type: OFFER # one of: OFFER, WANTED - description_prefix: "" # prefix for the ad description - description_suffix: "" # suffix for the ad description - - price_type: NEGOTIABLE # one of: FIXED, NEGOTIABLE, GIVE_AWAY, NOT_APPLICABLE - shipping_type: SHIPPING # one of: PICKUP, SHIPPING, NOT_APPLICABLE - sell_directly: false # requires shipping_options to take effect - contact: - name: "" - street: "" - zipcode: - phone: "" # IMPORTANT: surround phone number with quotes to prevent removal of leading zeros - republication_interval: 7 # every X days ads should be re-published - -# additional name to category ID mappings, see default list at -# https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml -# -# categories: -# Elektronik > Notebooks: 161/278 -# Jobs > Praktika: 102/125 -categories: {} - -download: - # if true, all shipping options matching the package size will be included - include_all_matching_shipping_options: false - # list of shipping options to exclude, e.g. ["DHL_2", "DHL_5"] - excluded_shipping_options: [] - -publishing: - delete_old_ads: "AFTER_PUBLISH" # one of: AFTER_PUBLISH, BEFORE_PUBLISH, NEVER - delete_old_ads_by_title: true # only works if delete_old_ads is set to BEFORE_PUBLISH - -# browser configuration -browser: - # https://peter.sh/experiments/chromium-command-line-switches/ - arguments: [] - binary_location: # path to custom browser executable, if not specified will be looked up on PATH - extensions: [] # a list of .crx extension files to be loaded - use_private_window: true - user_data_dir: "" # see https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md - profile_name: "" - -# login credentials -login: - username: "" - password: "" diff --git a/src/kleinanzeigen_bot/resources/translations.de.yaml b/src/kleinanzeigen_bot/resources/translations.de.yaml index 645c448..258395b 100644 --- a/src/kleinanzeigen_bot/resources/translations.de.yaml +++ b/src/kleinanzeigen_bot/resources/translations.de.yaml @@ -207,7 +207,7 @@ kleinanzeigen_bot/utils/error_handlers.py: "Aborted on user request.": "Auf Benutzeranfrage abgebrochen." on_exception: "%s: %s": "%s: %s" - "Unknown exception occurred (missing exception info): ex_type=%s, ex_value=%s": "Unbekannter Fehler aufgetreten (fehlende Fehlerinformation): ex_type=%s, ex_value=%s" + "Unknown exception occurred (missing exception info): ex_type=%s, ex=%s": "Unbekannter Fehler aufgetreten (fehlende Fehlerinformation): ex_type=%s, ex_value=%s" ################################################# kleinanzeigen_bot/utils/loggers.py: @@ -229,6 +229,117 @@ kleinanzeigen_bot/utils/dicts.py: load_dict_from_module: "Loading %s[%s.%s]...": "Lade %s[%s.%s]..." +################################################# +kleinanzeigen_bot/utils/pydantics.py: +################################################# + __get_message_template: + "Object has no attribute '{attribute}'": "Objekt hat kein Attribut '{attribute}'" + "Invalid JSON: {error}": "Ungültiges JSON: {error}" + "JSON input should be string, bytes or bytearray": "JSON-Eingabe sollte eine Zeichenkette, Bytes oder Bytearray sein" + "Cannot check `{method_name}` when validating from json, use a JsonOrPython validator instead": "Kann `{method_name}` beim Validieren von JSON nicht prüfen, verwende stattdessen einen JsonOrPython-Validator" + "Recursion error - cyclic reference detected": "Rekursionsfehler – zirkuläre Referenz erkannt" + "Field required": "Feld erforderlich" + "Field is frozen": "Feld ist gesperrt" + "Instance is frozen": "Instanz ist gesperrt" + "Extra inputs are not permitted": "Zusätzliche Eingaben sind nicht erlaubt" + "Keys should be strings": "Schlüssel sollten Zeichenketten sein" + "Error extracting attribute: {error}": "Fehler beim Extrahieren des Attributs: {error}" + "Input should be a valid dictionary or instance of {class_name}": "Eingabe sollte ein gültiges Wörterbuch oder eine Instanz von {class_name} sein" + "Input should be a valid dictionary or object to extract fields from": "Eingabe sollte ein gültiges Wörterbuch oder Objekt sein, um Felder daraus zu extrahieren" + "Input should be a dictionary or an instance of {class_name}": "Eingabe sollte ein Wörterbuch oder eine Instanz von {class_name} sein" + "Input should be an instance of {class_name}": "Eingabe sollte eine Instanz von {class_name} sein" + "Input should be None": "Eingabe sollte None sein" + "Input should be greater than {gt}": "Eingabe sollte größer als {gt} sein" + "Input should be greater than or equal to {ge}": "Eingabe sollte größer oder gleich {ge} sein" + "Input should be less than {lt}": "Eingabe sollte kleiner als {lt} sein" + "Input should be less than or equal to {le}": "Eingabe sollte kleiner oder gleich {le} sein" + "Input should be a multiple of {multiple_of}": "Eingabe sollte ein Vielfaches von {multiple_of} sein" + "Input should be a finite number": "Eingabe sollte eine endliche Zahl sein" + "{field_type} should have at least {min_length} item{expected_plural} after validation, not {actual_length}": "{field_type} sollte nach der Validierung mindestens {min_length} Element{expected_plural} haben, nicht {actual_length}" + "{field_type} should have at most {max_length} item{expected_plural} after validation, not {actual_length}": "{field_type} sollte nach der Validierung höchstens {max_length} Element{expected_plural} haben, nicht {actual_length}" + "Input should be iterable": "Eingabe sollte iterierbar sein" + "Error iterating over object, error: {error}": "Fehler beim Iterieren des Objekts: {error}" + "Input should be a valid string": "Eingabe sollte eine gültige Zeichenkette sein" + "Input should be a string, not an instance of a subclass of str": "Eingabe sollte ein String sein, keine Instanz einer Unterklasse von str" + "Input should be a valid string, unable to parse raw data as a unicode string": "Eingabe sollte eine gültige Zeichenkette sein, Rohdaten können nicht als Unicode-String geparst werden" + "String should have at least {min_length} character{expected_plural}": "String sollte mindestens {min_length} Zeichen{expected_plural} haben" + "String should have at most {max_length} character{expected_plural}": "String sollte höchstens {max_length} Zeichen{expected_plural} haben" + "String should match pattern '{pattern}'": "String sollte dem Muster '{pattern}' entsprechen" + "Input should be {expected}": "Eingabe sollte {expected} sein" + "Input should be a valid dictionary": "Eingabe sollte ein gültiges Wörterbuch sein" + "Input should be a valid mapping, error: {error}": "Eingabe sollte eine gültige Zuordnung sein, Fehler: {error}" + "Input should be a valid list": "Eingabe sollte eine gültige Liste sein" + "Input should be a valid tuple": "Eingabe sollte ein gültiges Tupel sein" + "Input should be a valid set": "Eingabe sollte eine gültige Menge sein" + "Set items should be hashable": "Elemente einer Menge sollten hashbar sein" + "Input should be a valid boolean": "Eingabe sollte ein gültiger Boolescher Wert sein" + "Input should be a valid boolean, unable to interpret input": "Eingabe sollte ein gültiger Boolescher Wert sein, Eingabe kann nicht interpretiert werden" + "Input should be a valid integer": "Eingabe sollte eine gültige Ganzzahl sein" + "Input should be a valid integer, unable to parse string as an integer": "Eingabe sollte eine gültige Ganzzahl sein, Zeichenkette konnte nicht als Ganzzahl geparst werden" + "Input should be a valid integer, got a number with a fractional part": "Eingabe sollte eine gültige Ganzzahl sein, Zahl hat einen Dezimalteil" + "Unable to parse input string as an integer, exceeded maximum size": "Zeichenkette konnte nicht als Ganzzahl geparst werden, maximale Größe überschritten" + "Input should be a valid number": "Eingabe sollte eine gültige Zahl sein" + "Input should be a valid number, unable to parse string as a number": "Eingabe sollte eine gültige Zahl sein, Zeichenkette kann nicht als Zahl geparst werden" + "Input should be a valid bytes": "Eingabe sollte gültige Bytes sein" + "Data should have at least {min_length} byte{expected_plural}": "Daten sollten mindestens {min_length} Byte{expected_plural} enthalten" + "Data should have at most {max_length} byte{expected_plural}": "Daten sollten höchstens {max_length} Byte{expected_plural} enthalten" + "Data should be valid {encoding}: {encoding_error}": "Daten sollten gültiges {encoding} sein: {encoding_error}" + "Value error, {error}": "Wertfehler: {error}" + "Assertion failed, {error}": "Assertion fehlgeschlagen: {error}" + "Input should be a valid date": "Eingabe sollte ein gültiges Datum sein" + "Input should be a valid date in the format YYYY-MM-DD, {error}": "Eingabe sollte ein gültiges Datum im Format YYYY-MM-DD sein: {error}" + "Input should be a valid date or datetime, {error}": "Eingabe sollte ein gültiges Datum oder eine gültige Datums-Uhrzeit sein: {error}" + "Datetimes provided to dates should have zero time - e.g. be exact dates": "Datetime-Werte für Datum sollten keine Uhrzeit enthalten – also exakte Daten sein" + "Date should be in the past": "Datum sollte in der Vergangenheit liegen" + "Date should be in the future": "Datum sollte in der Zukunft liegen" + "Input should be a valid time": "Eingabe sollte eine gültige Uhrzeit sein" + "Input should be in a valid time format, {error}": "Eingabe sollte in einem gültigen Zeitformat sein: {error}" + "Input should be a valid datetime": "Eingabe sollte ein gültiges Datum mit Uhrzeit sein" + "Input should be a valid datetime, {error}": "Eingabe sollte ein gültiges Datum mit Uhrzeit sein: {error}" + "Invalid datetime object, got {error}": "Ungültiges Datetime-Objekt: {error}" + "Input should be a valid datetime or date, {error}": "Eingabe sollte ein gültiges Datum oder Datum mit Uhrzeit sein: {error}" + "Input should be in the past": "Eingabe sollte in der Vergangenheit liegen" + "Input should be in the future": "Eingabe sollte in der Zukunft liegen" + "Input should not have timezone info": "Eingabe sollte keine Zeitzonen-Information enthalten" + "Input should have timezone info": "Eingabe sollte Zeitzonen-Information enthalten" + "Timezone offset of {tz_expected} required, got {tz_actual}": "Zeitzonen-Offset von {tz_expected} erforderlich, erhalten: {tz_actual}" + "Input should be a valid timedelta": "Eingabe sollte ein gültiges Zeitdelta sein" + "Input should be a valid timedelta, {error}": "Eingabe sollte ein gültiges Zeitdelta sein: {error}" + "Input should be a valid frozenset": "Eingabe sollte ein gültiges Frozenset sein" + "Input should be an instance of {class}": "Eingabe sollte eine Instanz von {class} sein" + "Input should be a subclass of {class}": "Eingabe sollte eine Unterklasse von {class} sein" + "Input should be callable": "Eingabe sollte aufrufbar sein" + "Input tag '{tag}' found using {discriminator} does not match any of the expected tags: {expected_tags}": "Eingabe-Tag '{tag}', ermittelt durch {discriminator}, stimmt mit keinem der erwarteten Tags überein: {expected_tags}" + "Unable to extract tag using discriminator {discriminator}": "Tag kann mit {discriminator} nicht extrahiert werden" + "Arguments must be a tuple, list or a dictionary": "Argumente müssen ein Tupel, eine Liste oder ein Wörterbuch sein" + "Missing required argument": "Erforderliches Argument fehlt" + "Unexpected keyword argument": "Unerwartetes Schlüsselwort-Argument" + "Missing required keyword only argument": "Erforderliches keyword-only-Argument fehlt" + "Unexpected positional argument": "Unerwartetes Positionsargument" + "Missing required positional only argument": "Erforderliches positional-only-Argument fehlt" + "Got multiple values for argument": "Mehrere Werte für Argument erhalten" + "URL input should be a string or URL": "URL-Eingabe sollte eine Zeichenkette oder URL sein" + "Input should be a valid URL, {error}": "Eingabe sollte eine gültige URL sein: {error}" + "Input violated strict URL syntax rules, {error}": "Eingabe hat strikte URL-Syntaxregeln verletzt: {error}" + "URL should have at most {max_length} character{expected_plural}": "URL sollte höchstens {max_length} Zeichen{expected_plural} haben" + "URL scheme should be {expected_schemes}": "URL-Schema sollte {expected_schemes} sein" + "UUID input should be a string, bytes or UUID object": "UUID-Eingabe sollte eine Zeichenkette, Bytes oder ein UUID-Objekt sein" + "Input should be a valid UUID, {error}": "Eingabe sollte eine gültige UUID sein: {error}" + "UUID version {expected_version} expected": "UUID-Version {expected_version} erwartet" + "Decimal input should be an integer, float, string or Decimal object": "Decimal-Eingabe sollte eine Ganzzahl, Gleitkommazahl, Zeichenkette oder ein Decimal-Objekt sein" + "Input should be a valid decimal": "Eingabe sollte ein gültiges Decimal sein" + "Decimal input should have no more than {max_digits} digit{expected_plural} in total": "Decimal-Eingabe sollte insgesamt nicht mehr als {max_digits} Ziffer{expected_plural} haben" + "Decimal input should have no more than {decimal_places} decimal place{expected_plural}": "Decimal-Eingabe sollte nicht mehr als {decimal_places} Dezimalstelle{expected_plural} haben" + "Decimal input should have no more than {whole_digits} digit{expected_plural} before the decimal point": "Decimal-Eingabe sollte vor dem Dezimalpunkt nicht mehr als {whole_digits} Ziffer{expected_plural} haben" + ? "Input should be a valid python complex object, a number, or a valid complex string following the rules at https://docs.python.org/3/library/functions.html#complex" + : "Eingabe sollte ein gültiges Python-komplexes Objekt, eine Zahl oder eine gültige komplexe Zeichenkette sein, gemäß https://docs.python.org/3/library/functions.html#complex" + "Input should be a valid complex string following the rules at https://docs.python.org/3/library/functions.html#complex": "Eingabe sollte eine gültige komplexe Zeichenkette sein, gemäß https://docs.python.org/3/library/functions.html#complex" + + format_validation_error: + "validation error": "Validationsfehler" + "%s for [%s]:": "%s für %s" + "' or '": "' oder '" + ################################################# kleinanzeigen_bot/utils/web_scraping_mixin.py: ################################################# diff --git a/src/kleinanzeigen_bot/utils/dicts.py b/src/kleinanzeigen_bot/utils/dicts.py index 3709f7d..f5bce64 100644 --- a/src/kleinanzeigen_bot/utils/dicts.py +++ b/src/kleinanzeigen_bot/utils/dicts.py @@ -58,9 +58,9 @@ def apply_defaults( return target -def defaultdict_to_dict(d: defaultdict[K, V]) -> dict[K, V]: +def defaultdict_to_dict(d:defaultdict[K, V]) -> dict[K, V]: """Recursively convert defaultdict to dict.""" - result: dict[K, V] = {} + result:dict[K, V] = {} for key, value in d.items(): if isinstance(value, defaultdict): result[key] = defaultdict_to_dict(value) # type: ignore[assignment] diff --git a/src/kleinanzeigen_bot/utils/error_handlers.py b/src/kleinanzeigen_bot/utils/error_handlers.py index 3f93e6e..2f5ac16 100644 --- a/src/kleinanzeigen_bot/utils/error_handlers.py +++ b/src/kleinanzeigen_bot/utils/error_handlers.py @@ -5,24 +5,29 @@ import sys, traceback # isort: skip from types import FrameType, TracebackType from typing import Final +from pydantic import ValidationError + from . import loggers +from .pydantics import format_validation_error LOG:Final[loggers.Logger] = loggers.get_logger(__name__) -def on_exception(ex_type:type[BaseException] | None, ex_value:BaseException | None, ex_traceback:TracebackType | None) -> None: - if ex_type is None or ex_value is None: - LOG.error("Unknown exception occurred (missing exception info): ex_type=%s, ex_value=%s", ex_type, ex_value) +def on_exception(ex_type:type[BaseException] | None, ex:BaseException | None, ex_traceback:TracebackType | None) -> None: + if ex_type is None or ex is None: + LOG.error("Unknown exception occurred (missing exception info): ex_type=%s, ex=%s", ex_type, ex) return if issubclass(ex_type, KeyboardInterrupt): - sys.__excepthook__(ex_type, ex_value, ex_traceback) - elif loggers.is_debug(LOG) or isinstance(ex_value, (AttributeError, ImportError, NameError, TypeError)): - LOG.error("".join(traceback.format_exception(ex_type, ex_value, ex_traceback))) - elif isinstance(ex_value, AssertionError): - LOG.error(ex_value) + sys.__excepthook__(ex_type, ex, ex_traceback) + elif loggers.is_debug(LOG) or isinstance(ex, (AttributeError, ImportError, NameError, TypeError)): + LOG.error("".join(traceback.format_exception(ex_type, ex, ex_traceback))) + elif isinstance(ex, ValidationError): + LOG.error(format_validation_error(ex)) + elif isinstance(ex, AssertionError): + LOG.error(ex) else: - LOG.error("%s: %s", ex_type.__name__, ex_value) + LOG.error("%s: %s", ex_type.__name__, ex) sys.exit(1) diff --git a/src/kleinanzeigen_bot/utils/loggers.py b/src/kleinanzeigen_bot/utils/loggers.py index 15fe555..7d2d172 100644 --- a/src/kleinanzeigen_bot/utils/loggers.py +++ b/src/kleinanzeigen_bot/utils/loggers.py @@ -16,6 +16,9 @@ __all__ = [ "LogFileHandle", "DEBUG", "INFO", + "WARNING", + "ERROR", + "CRITICAL", "configure_console_logging", "configure_file_logging", "flush_all_handlers", diff --git a/src/kleinanzeigen_bot/utils/pydantics.py b/src/kleinanzeigen_bot/utils/pydantics.py new file mode 100644 index 0000000..f023cc0 --- /dev/null +++ b/src/kleinanzeigen_bot/utils/pydantics.py @@ -0,0 +1,195 @@ +# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ +from gettext import gettext as _ +from typing import Any, cast + +from pydantic import BaseModel, ValidationError +from pydantic_core import InitErrorDetails +from typing_extensions import Self + +from kleinanzeigen_bot.utils.i18n import pluralize + + +class ContextualValidationError(ValidationError): + context:Any + + +class ContextualModel(BaseModel): + + @classmethod + def model_validate( + cls, + obj:Any, + *, + strict:bool | None = None, + from_attributes:bool | None = None, + context:Any | None = None, + by_alias:bool | None = None, + by_name:bool | None = None, + ) -> Self: + """ + Proxy to BaseModel.model_validate, but on error re‐raise as + ContextualValidationError including the passed context. + """ + try: + return super().model_validate( + obj, + strict = strict, + from_attributes = from_attributes, + context = context, + by_alias = by_alias, + by_name = by_name, + ) + except ValidationError as ex: + new_ex = ContextualValidationError.from_exception_data( + title = ex.title, + line_errors = cast(list[InitErrorDetails], ex.errors()), + ) + new_ex.context = context + raise new_ex from ex + + +def format_validation_error(ex:ValidationError) -> str: + """ + Turn a Pydantic ValidationError into the classic: + N validation errors for ModelName + field + message [type=code] + + >>> from pydantic import BaseModel, ValidationError + >>> class M(BaseModel): x: int + >>> try: + ... M(x="no-int") + ... except ValidationError as e: + ... print(format_validation_error(e)) + 1 validation error for [M]: + - x: Input should be a valid integer, unable to parse string as an integer + """ + errors = ex.errors(include_url = False, include_input = False, include_context = True) + ctx = ex.context if isinstance(ex, ContextualValidationError) and ex.context else ex.title + header = _("%s for [%s]:") % (pluralize("validation error", ex.error_count()), ctx) + lines = [header] + for err in errors: + loc = ".".join(str(p) for p in err["loc"]) + msg_ctx = err.get("ctx") + code = err["type"] + msg_template = __get_message_template(code) + if msg_template: + msg = _(msg_template).format(**msg_ctx) if msg_ctx else msg_template + msg = msg.replace("' or '", _("' or '")) + lines.append(f"- {loc}: {msg}") + else: + lines.append(f"- {loc}: {err['msg']} [type={code}]") + return "\n".join(lines) + + +def __get_message_template(error_code:str) -> str | None: + # https://github.com/pydantic/pydantic-core/blob/d03bf4a01ca3b378cc8590bd481f307e82115bc6/src/errors/types.rs#L477 + # ruff: noqa: PLR0911 Too many return statements + # ruff: noqa: PLR0912 Too many branches + # ruff: noqa: E701 Multiple statements on one line (colon) + match error_code: + case "no_such_attribute": return _("Object has no attribute '{attribute}'") + case "json_invalid": return _("Invalid JSON: {error}") + case "json_type": return _("JSON input should be string, bytes or bytearray") + case "needs_python_object": return _("Cannot check `{method_name}` when validating from json, use a JsonOrPython validator instead") + case "recursion_loop": return _("Recursion error - cyclic reference detected") + case "missing": return _("Field required") + case "frozen_field": return _("Field is frozen") + case "frozen_instance": return _("Instance is frozen") + case "extra_forbidden": return _("Extra inputs are not permitted") + case "invalid_key": return _("Keys should be strings") + case "get_attribute_error": return _("Error extracting attribute: {error}") + case "model_type": return _("Input should be a valid dictionary or instance of {class_name}") + case "model_attributes_type": return _("Input should be a valid dictionary or object to extract fields from") + case "dataclass_type": return _("Input should be a dictionary or an instance of {class_name}") + case "dataclass_exact_type": return _("Input should be an instance of {class_name}") + case "none_required": return _("Input should be None") + case "greater_than": return _("Input should be greater than {gt}") + case "greater_than_equal": return _("Input should be greater than or equal to {ge}") + case "less_than": return _("Input should be less than {lt}") + case "less_than_equal": return _("Input should be less than or equal to {le}") + case "multiple_of": return _("Input should be a multiple of {multiple_of}") + case "finite_number": return _("Input should be a finite number") + case "too_short": return _("{field_type} should have at least {min_length} item{expected_plural} after validation, not {actual_length}") + case "too_long": return _("{field_type} should have at most {max_length} item{expected_plural} after validation, not {actual_length}") + case "iterable_type": return _("Input should be iterable") + case "iteration_error": return _("Error iterating over object, error: {error}") + case "string_type": return _("Input should be a valid string") + case "string_sub_type": return _("Input should be a string, not an instance of a subclass of str") + case "string_unicode": return _("Input should be a valid string, unable to parse raw data as a unicode string") + case "string_too_short": return _("String should have at least {min_length} character{expected_plural}") + case "string_too_long": return _("String should have at most {max_length} character{expected_plural}") + case "string_pattern_mismatch": return _("String should match pattern '{pattern}'") + case "enum": return _("Input should be {expected}") + case "dict_type": return _("Input should be a valid dictionary") + case "mapping_type": return _("Input should be a valid mapping, error: {error}") + case "list_type": return _("Input should be a valid list") + case "tuple_type": return _("Input should be a valid tuple") + case "set_type": return _("Input should be a valid set") + case "set_item_not_hashable": return _("Set items should be hashable") + case "bool_type": return _("Input should be a valid boolean") + case "bool_parsing": return _("Input should be a valid boolean, unable to interpret input") + case "int_type": return _("Input should be a valid integer") + case "int_parsing": return _("Input should be a valid integer, unable to parse string as an integer") + case "int_from_float": return _("Input should be a valid integer, got a number with a fractional part") + case "int_parsing_size": return _("Unable to parse input string as an integer, exceeded maximum size") + case "float_type": return _("Input should be a valid number") + case "float_parsing": return _("Input should be a valid number, unable to parse string as a number") + case "bytes_type": return _("Input should be a valid bytes") + case "bytes_too_short": return _("Data should have at least {min_length} byte{expected_plural}") + case "bytes_too_long": return _("Data should have at most {max_length} byte{expected_plural}") + case "bytes_invalid_encoding": return _("Data should be valid {encoding}: {encoding_error}") + case "value_error": return _("Value error, {error}") + case "assertion_error": return _("Assertion failed, {error}") + case "custom_error": return None # handled separately + case "literal_error": return _("Input should be {expected}") + case "date_type": return _("Input should be a valid date") + case "date_parsing": return _("Input should be a valid date in the format YYYY-MM-DD, {error}") + case "date_from_datetime_parsing": return _("Input should be a valid date or datetime, {error}") + case "date_from_datetime_inexact": return _("Datetimes provided to dates should have zero time - e.g. be exact dates") + case "date_past": return _("Date should be in the past") + case "date_future": return _("Date should be in the future") + case "time_type": return _("Input should be a valid time") + case "time_parsing": return _("Input should be in a valid time format, {error}") + case "datetime_type": return _("Input should be a valid datetime") + case "datetime_parsing": return _("Input should be a valid datetime, {error}") + case "datetime_object_invalid": return _("Invalid datetime object, got {error}") + case "datetime_from_date_parsing": return _("Input should be a valid datetime or date, {error}") + case "datetime_past": return _("Input should be in the past") + case "datetime_future": return _("Input should be in the future") + case "timezone_naive": return _("Input should not have timezone info") + case "timezone_aware": return _("Input should have timezone info") + case "timezone_offset": return _("Timezone offset of {tz_expected} required, got {tz_actual}") + case "time_delta_type": return _("Input should be a valid timedelta") + case "time_delta_parsing": return _("Input should be a valid timedelta, {error}") + case "frozen_set_type": return _("Input should be a valid frozenset") + case "is_instance_of": return _("Input should be an instance of {class}") + case "is_subclass_of": return _("Input should be a subclass of {class}") + case "callable_type": return _("Input should be callable") + case "union_tag_invalid": return _("Input tag '{tag}' found using {discriminator} does not match any of the expected tags: {expected_tags}") + case "union_tag_not_found": return _("Unable to extract tag using discriminator {discriminator}") + case "arguments_type": return _("Arguments must be a tuple, list or a dictionary") + case "missing_argument": return _("Missing required argument") + case "unexpected_keyword_argument": return _("Unexpected keyword argument") + case "missing_keyword_only_argument": return _("Missing required keyword only argument") + case "unexpected_positional_argument": return _("Unexpected positional argument") + case "missing_positional_only_argument": return _("Missing required positional only argument") + case "multiple_argument_values": return _("Got multiple values for argument") + case "url_type": return _("URL input should be a string or URL") + case "url_parsing": return _("Input should be a valid URL, {error}") + case "url_syntax_violation": return _("Input violated strict URL syntax rules, {error}") + case "url_too_long": return _("URL should have at most {max_length} character{expected_plural}") + case "url_scheme": return _("URL scheme should be {expected_schemes}") + case "uuid_type": return _("UUID input should be a string, bytes or UUID object") + case "uuid_parsing": return _("Input should be a valid UUID, {error}") + case "uuid_version": return _("UUID version {expected_version} expected") + case "decimal_type": return _("Decimal input should be an integer, float, string or Decimal object") + case "decimal_parsing": return _("Input should be a valid decimal") + case "decimal_max_digits": return _("Decimal input should have no more than {max_digits} digit{expected_plural} in total") + case "decimal_max_places": return _("Decimal input should have no more than {decimal_places} decimal place{expected_plural}") + case "decimal_whole_digits": return _("Decimal input should have no more than {whole_digits} digit{expected_plural} before the decimal point") + case "complex_type": return _("Input should be a valid python complex object, a number, or a valid complex string following the rules at https://docs.python.org/3/library/functions.html#complex") + case "complex_str_parsing": return _("Input should be a valid complex string following the rules at https://docs.python.org/3/library/functions.html#complex") + case _: return None diff --git a/src/kleinanzeigen_bot/utils/web_scraping_mixin.py b/src/kleinanzeigen_bot/utils/web_scraping_mixin.py index b4cc117..3c2577d 100644 --- a/src/kleinanzeigen_bot/utils/web_scraping_mixin.py +++ b/src/kleinanzeigen_bot/utils/web_scraping_mixin.py @@ -60,8 +60,8 @@ class BrowserConfig: self.binary_location:str | None = None self.extensions:Iterable[str] = [] self.use_private_window:bool = True - self.user_data_dir:str = "" - self.profile_name:str = "" + self.user_data_dir:str | None = None + self.profile_name:str | None = None class WebScrapingMixin: diff --git a/tests/conftest.py b/tests/conftest.py index b46b836..4503a2d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,6 +9,7 @@ import pytest from kleinanzeigen_bot import KleinanzeigenBot from kleinanzeigen_bot.extract import AdExtractor +from kleinanzeigen_bot.model.config_model import Config from kleinanzeigen_bot.utils import loggers from kleinanzeigen_bot.utils.web_scraping_mixin import Browser @@ -29,50 +30,39 @@ def test_data_dir(tmp_path:str) -> str: @pytest.fixture -def sample_config() -> dict[str, Any]: +def test_bot_config() -> Config: """Provides a basic sample configuration for testing. This configuration includes all required fields for the bot to function: - Login credentials (username/password) - - Browser settings - - Ad defaults (description prefix/suffix) - Publishing settings """ - return { - "login": { - "username": "testuser", - "password": "testpass" - }, - "browser": { - "arguments": [], - "binary_location": None, - "extensions": [], - "use_private_window": True, - "user_data_dir": None, - "profile_name": None - }, + return Config.model_validate({ "ad_defaults": { - "description": { - "prefix": "Test Prefix", - "suffix": "Test Suffix" - } + "contact": { + "name": "dummy_name" + }, + }, + "login": { + "username": "dummy_user", + "password": "dummy_password" }, "publishing": { "delete_old_ads": "BEFORE_PUBLISH", "delete_old_ads_by_title": False } - } + }) @pytest.fixture -def test_bot(sample_config:dict[str, Any]) -> KleinanzeigenBot: - """Provides a fresh KleinanzeigenBot instance for all test classes. +def test_bot(test_bot_config:Config) -> KleinanzeigenBot: + """Provides a fresh KleinanzeigenBot instance for all test methods. Dependencies: - - sample_config: Used to initialize the bot with a valid configuration + - test_bot_config: Used to initialize the bot with a valid configuration """ bot_instance = KleinanzeigenBot() - bot_instance.config = sample_config + bot_instance.config = test_bot_config return bot_instance @@ -97,14 +87,14 @@ def log_file_path(test_data_dir:str) -> str: @pytest.fixture -def test_extractor(browser_mock:MagicMock, sample_config:dict[str, Any]) -> AdExtractor: +def test_extractor(browser_mock:MagicMock, test_bot_config:Config) -> AdExtractor: """Provides a fresh AdExtractor instance for testing. Dependencies: - browser_mock: Used to mock browser interactions - - sample_config: Used to initialize the extractor with a valid configuration + - test_bot_config: Used to initialize the extractor with a valid configuration """ - return AdExtractor(browser_mock, sample_config) + return AdExtractor(browser_mock, test_bot_config) @pytest.fixture @@ -174,21 +164,6 @@ def description_test_cases() -> list[tuple[dict[str, Any], str, str]]: "Original Description", "Original Description" ), - # Test case 6: Non-string values in config - ( - { - "ad_defaults": { - "description_prefix": 123, - "description_suffix": True, - "description": { - "prefix": [], - "suffix": {} - } - } - }, - "Original Description", - "Original Description" - ) ] @@ -200,3 +175,8 @@ def mock_web_text_responses() -> list[str]: "Test Description", # Description "03.02.2025" # Creation date ] + + +@pytest.fixture(autouse = True) +def silence_nodriver_logs() -> None: + loggers.get_logger("nodriver").setLevel(loggers.WARNING) diff --git a/tests/unit/test_ads.py b/tests/unit/test_ads.py index 9b0fb20..abd1ed1 100644 --- a/tests/unit/test_ads.py +++ b/tests/unit/test_ads.py @@ -6,6 +6,7 @@ from typing import Any import pytest from kleinanzeigen_bot import ads +from kleinanzeigen_bot.model.config_model import Config def test_calculate_content_hash_with_none_values() -> None: @@ -90,12 +91,6 @@ def test_calculate_content_hash_with_none_values() -> None: True, "" ), - # Test non-string values - ( - {"ad_defaults": {"description_prefix": 123, "description_suffix": True}}, - True, - "" - ), # Add test for malformed config ( {}, # Empty config @@ -108,69 +103,13 @@ def test_calculate_content_hash_with_none_values() -> None: True, "" ), - # Test for non-dict ad_defaults - ( - {"ad_defaults": "invalid"}, - True, - "" - ), - # Test for invalid type in description field - ( - {"ad_defaults": {"description": 123}}, - True, - "" - ) ]) def test_get_description_affixes( config:dict[str, Any], prefix:bool, - expected:str + expected:str, + test_bot_config:Config ) -> None: """Test get_description_affixes function with various inputs.""" - result = ads.get_description_affixes(config, prefix = prefix) - assert result == expected - - -@pytest.mark.parametrize(("config", "prefix", "expected"), [ - # Add test for malformed config - ( - {}, # Empty config - True, - "" - ), - # Test for missing ad_defaults - ( - {"some_other_key": {}}, - True, - "" - ), - # Test for non-dict ad_defaults - ( - {"ad_defaults": "invalid"}, - True, - "" - ), - # Test for invalid type in description field - ( - {"ad_defaults": {"description": 123}}, - True, - "" - ) -]) -def test_get_description_affixes_edge_cases(config:dict[str, Any], prefix:bool, expected:str) -> None: - """Test edge cases for description affix handling.""" - assert ads.get_description_affixes(config, prefix = prefix) == expected - - -@pytest.mark.parametrize(("config", "expected"), [ - (None, ""), # Test with None - ([], ""), # Test with an empty list - ("string", ""), # Test with a string - (123, ""), # Test with an integer - (3.14, ""), # Test with a float - (set(), ""), # Test with an empty set -]) -def test_get_description_affixes_edge_cases_non_dict(config:Any, expected:str) -> None: - """Test get_description_affixes function with non-dict inputs.""" - result = ads.get_description_affixes(config, prefix = True) + result = ads.get_description_affixes(test_bot_config.with_values(config), prefix = prefix) assert result == expected diff --git a/tests/unit/test_extract.py b/tests/unit/test_extract.py index c9fbc0b..270f626 100644 --- a/tests/unit/test_extract.py +++ b/tests/unit/test_extract.py @@ -8,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, call, patch import pytest from kleinanzeigen_bot.extract import AdExtractor +from kleinanzeigen_bot.model.config_model import Config, DownloadConfig from kleinanzeigen_bot.utils.web_scraping_mixin import Browser, By, Element @@ -36,11 +37,11 @@ class _TestCaseDict(TypedDict): # noqa: PYI049 Private TypedDict `...` is never class TestAdExtractorBasics: """Basic synchronous tests for AdExtractor.""" - def test_constructor(self, browser_mock:MagicMock, sample_config:dict[str, Any]) -> None: + def test_constructor(self, browser_mock:MagicMock, test_bot_config:Config) -> None: """Test the constructor of AdExtractor""" - extractor = AdExtractor(browser_mock, sample_config) + extractor = AdExtractor(browser_mock, test_bot_config) assert extractor.browser == browser_mock - assert extractor.config == sample_config + assert extractor.config == test_bot_config @pytest.mark.parametrize(("url", "expected_id"), [ ("https://www.kleinanzeigen.de/s-anzeige/test-title/12345678", 12345678), @@ -168,7 +169,7 @@ class TestAdExtractorShipping: } # Enable all matching options in config - test_extractor.config["download"] = {"include_all_matching_shipping_options": True} + test_extractor.config.download = DownloadConfig.model_validate({"include_all_matching_shipping_options": True}) with patch.object(test_extractor, "page", MagicMock()), \ patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"), \ @@ -202,10 +203,10 @@ class TestAdExtractorShipping: } # Enable all matching options and exclude DHL in config - test_extractor.config["download"] = { + test_extractor.config.download = DownloadConfig.model_validate({ "include_all_matching_shipping_options": True, "excluded_shipping_options": ["DHL_2"] - } + }) with patch.object(test_extractor, "page", MagicMock()), \ patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"), \ @@ -238,9 +239,9 @@ class TestAdExtractorShipping: } # Exclude the matching option - test_extractor.config["download"] = { + test_extractor.config.download = DownloadConfig.model_validate({ "excluded_shipping_options": ["Hermes_Päckchen"] - } + }) with patch.object(test_extractor, "page", MagicMock()), \ patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"), \ @@ -407,13 +408,14 @@ class TestAdExtractorContent: def extractor_with_config(self) -> AdExtractor: """Create extractor with specific config for testing prefix/suffix handling.""" browser_mock = MagicMock(spec = Browser) - return AdExtractor(browser_mock, {}) # Empty config, will be overridden in tests + return AdExtractor(browser_mock, Config()) # Empty config, will be overridden in tests @pytest.mark.asyncio async def test_extract_description_with_affixes( self, test_extractor:AdExtractor, - description_test_cases:list[tuple[dict[str, Any], str, str]] + description_test_cases:list[tuple[dict[str, Any], str, str]], + test_bot_config:Config ) -> None: """Test extraction of description with various prefix/suffix configurations.""" # Mock the page @@ -422,7 +424,7 @@ class TestAdExtractorContent: test_extractor.page = page_mock for config, raw_description, _ in description_test_cases: # Changed to _ since we don't use expected_description - test_extractor.config = config + test_extractor.config = test_bot_config.with_values(config) with patch.multiple(test_extractor, web_text = AsyncMock(side_effect = [ @@ -483,7 +485,6 @@ class TestAdExtractorContent: page_mock = MagicMock() page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" test_extractor.page = page_mock - test_extractor.config = {"ad_defaults": {}} # Empty config raw_description = "Original Description" with patch.multiple(test_extractor, @@ -525,17 +526,17 @@ class TestAdExtractorCategory: """Tests for category extraction functionality.""" @pytest.fixture - def extractor(self) -> AdExtractor: + def extractor(self, test_bot_config:Config) -> AdExtractor: browser_mock = MagicMock(spec = Browser) - config_mock = { + config = test_bot_config.with_values({ "ad_defaults": { "description": { "prefix": "Test Prefix", "suffix": "Test Suffix" } } - } - return AdExtractor(browser_mock, config_mock) + }) + return AdExtractor(browser_mock, config) @pytest.mark.asyncio # pylint: disable=protected-access @@ -581,17 +582,17 @@ class TestAdExtractorContact: """Tests for contact information extraction.""" @pytest.fixture - def extractor(self) -> AdExtractor: + def extractor(self, test_bot_config:Config) -> AdExtractor: browser_mock = MagicMock(spec = Browser) - config_mock = { + config = test_bot_config.with_values({ "ad_defaults": { "description": { "prefix": "Test Prefix", "suffix": "Test Suffix" } } - } - return AdExtractor(browser_mock, config_mock) + }) + return AdExtractor(browser_mock, config) @pytest.mark.asyncio # pylint: disable=protected-access @@ -663,17 +664,17 @@ class TestAdExtractorDownload: """Tests for download functionality.""" @pytest.fixture - def extractor(self) -> AdExtractor: + def extractor(self, test_bot_config:Config) -> AdExtractor: browser_mock = MagicMock(spec = Browser) - config_mock = { + config = test_bot_config.with_values({ "ad_defaults": { "description": { "prefix": "Test Prefix", "suffix": "Test Suffix" } } - } - return AdExtractor(browser_mock, config_mock) + }) + return AdExtractor(browser_mock, config) @pytest.mark.asyncio async def test_download_ad_existing_directory(self, extractor:AdExtractor) -> None: diff --git a/tests/unit/test_init.py b/tests/unit/test_init.py index 6443384..33cbc05 100644 --- a/tests/unit/test_init.py +++ b/tests/unit/test_init.py @@ -1,19 +1,22 @@ # SPDX-FileCopyrightText: © Jens Bergmann and contributors # SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ -import copy, os, tempfile # isort: skip +import copy, io, logging, os, tempfile # isort: skip from collections.abc import Generator +from contextlib import redirect_stdout from datetime import timedelta from pathlib import Path from typing import Any from unittest.mock import AsyncMock, MagicMock, patch import pytest +from pydantic import ValidationError from ruamel.yaml import YAML from kleinanzeigen_bot import LOG, KleinanzeigenBot, misc from kleinanzeigen_bot._version import __version__ from kleinanzeigen_bot.ads import calculate_content_hash +from kleinanzeigen_bot.model.config_model import AdDefaults, Config, PublishingConfig from kleinanzeigen_bot.utils import loggers @@ -150,7 +153,7 @@ class TestKleinanzeigenBotInitialization: def test_constructor_initializes_default_values(self, test_bot:KleinanzeigenBot) -> None: """Verify that constructor sets all default values correctly.""" assert test_bot.root_url == "https://www.kleinanzeigen.de" - assert isinstance(test_bot.config, dict) + assert isinstance(test_bot.config, Config) assert test_bot.command == "help" assert test_bot.ads_selector == "due" assert test_bot.keep_old_ads is False @@ -218,15 +221,37 @@ class TestKleinanzeigenBotCommandLine: def test_parse_args_handles_help_command(self, test_bot:KleinanzeigenBot) -> None: """Verify that help command is handled correctly.""" - with pytest.raises(SystemExit) as exc_info: + buf = io.StringIO() + with pytest.raises(SystemExit) as exc_info, redirect_stdout(buf): test_bot.parse_args(["dummy", "--help"]) assert exc_info.value.code == 0 + stdout = buf.getvalue() + assert "publish" in stdout + assert "verify" in stdout + assert "help" in stdout + assert "version" in stdout + assert "--verbose" in stdout - def test_parse_args_handles_invalid_arguments(self, test_bot:KleinanzeigenBot) -> None: + def test_parse_args_handles_invalid_arguments(self, test_bot:KleinanzeigenBot, caplog:pytest.LogCaptureFixture) -> None: """Verify that invalid arguments are handled correctly.""" + caplog.set_level(logging.ERROR) with pytest.raises(SystemExit) as exc_info: test_bot.parse_args(["dummy", "--invalid-option"]) assert exc_info.value.code == 2 + assert any( + record.levelno == logging.ERROR + and ( + "--invalid-option not recognized" in record.getMessage() + or "Option --invalid-option unbekannt" in record.getMessage() + ) + for record in caplog.records + ) + + assert any( + ("--invalid-option not recognized" in m) + or ("Option --invalid-option unbekannt" in m) + for m in caplog.messages + ) def test_parse_args_handles_verbose_flag(self, test_bot:KleinanzeigenBot) -> None: """Verify that verbose flag sets correct log level.""" @@ -246,109 +271,88 @@ class TestKleinanzeigenBotConfiguration: def test_load_config_handles_missing_file( self, test_bot:KleinanzeigenBot, - test_data_dir:str, - sample_config:dict[str, Any] + test_data_dir:str ) -> None: """Verify that loading a missing config file creates default config.""" config_path = Path(test_data_dir) / "missing_config.yaml" + config_path.unlink(missing_ok = True) test_bot.config_file_path = str(config_path) - # Add categories to sample config - sample_config_with_categories = sample_config.copy() - sample_config_with_categories["categories"] = {} + with patch.object(LOG, "warning") as mock_warning: + with pytest.raises(ValidationError) as exc_info: + test_bot.load_config() - with patch("kleinanzeigen_bot.utils.dicts.load_dict_if_exists", return_value = None), \ - patch.object(LOG, "warning") as mock_warning, \ - patch("kleinanzeigen_bot.utils.dicts.save_dict") as mock_save, \ - patch("kleinanzeigen_bot.utils.dicts.load_dict_from_module") as mock_load_module: - - mock_load_module.side_effect = [ - sample_config_with_categories, # config_defaults.yaml - {"cat1": "id1"}, # categories.yaml - {"cat2": "id2"} # categories_old.yaml - ] - - test_bot.load_config() mock_warning.assert_called_once() - mock_save.assert_called_once_with(str(config_path), sample_config_with_categories) - - # Verify categories were loaded - assert test_bot.categories == {"cat1": "id1", "cat2": "id2"} - assert test_bot.config == sample_config_with_categories + assert config_path.exists() + assert "login.username" in str(exc_info.value) + assert "login.password" in str(exc_info.value) def test_load_config_validates_required_fields(self, test_bot:KleinanzeigenBot, test_data_dir:str) -> None: """Verify that config validation checks required fields.""" config_path = Path(test_data_dir) / "config.yaml" config_content = """ login: - username: testuser + username: dummy_user # Missing password -browser: - arguments: [] """ with open(config_path, "w", encoding = "utf-8") as f: f.write(config_content) test_bot.config_file_path = str(config_path) - with pytest.raises(AssertionError) as exc_info: + with pytest.raises(ValidationError) as exc_info: test_bot.load_config() - assert "[login.password] not specified" in str(exc_info.value) + assert "login.username" not in str(exc_info.value) + assert "login.password" in str(exc_info.value) class TestKleinanzeigenBotAuthentication: """Tests for login and authentication functionality.""" - @pytest.fixture - def configured_bot(self, test_bot:KleinanzeigenBot, sample_config:dict[str, Any]) -> KleinanzeigenBot: - """Provides a bot instance with basic configuration.""" - test_bot.config = sample_config - return test_bot - @pytest.mark.asyncio - async def test_assert_free_ad_limit_not_reached_success(self, configured_bot:KleinanzeigenBot) -> None: + async def test_assert_free_ad_limit_not_reached_success(self, test_bot:KleinanzeigenBot) -> None: """Verify that free ad limit check succeeds when limit not reached.""" - with patch.object(configured_bot, "web_find", side_effect = TimeoutError): - await configured_bot.assert_free_ad_limit_not_reached() + with patch.object(test_bot, "web_find", side_effect = TimeoutError): + await test_bot.assert_free_ad_limit_not_reached() @pytest.mark.asyncio - async def test_assert_free_ad_limit_not_reached_limit_reached(self, configured_bot:KleinanzeigenBot) -> None: + async def test_assert_free_ad_limit_not_reached_limit_reached(self, test_bot:KleinanzeigenBot) -> None: """Verify that free ad limit check fails when limit is reached.""" - with patch.object(configured_bot, "web_find", return_value = AsyncMock()): + with patch.object(test_bot, "web_find", return_value = AsyncMock()): with pytest.raises(AssertionError) as exc_info: - await configured_bot.assert_free_ad_limit_not_reached() + await test_bot.assert_free_ad_limit_not_reached() assert "Cannot publish more ads" in str(exc_info.value) @pytest.mark.asyncio - async def test_is_logged_in_returns_true_when_logged_in(self, configured_bot:KleinanzeigenBot) -> None: + async def test_is_logged_in_returns_true_when_logged_in(self, test_bot:KleinanzeigenBot) -> None: """Verify that login check returns true when logged in.""" - with patch.object(configured_bot, "web_text", return_value = "Welcome testuser"): - assert await configured_bot.is_logged_in() is True + with patch.object(test_bot, "web_text", return_value = "Welcome dummy_user"): + assert await test_bot.is_logged_in() is True @pytest.mark.asyncio - async def test_is_logged_in_returns_true_with_alternative_element(self, configured_bot:KleinanzeigenBot) -> None: + async def test_is_logged_in_returns_true_with_alternative_element(self, test_bot:KleinanzeigenBot) -> None: """Verify that login check returns true when logged in with alternative element.""" - with patch.object(configured_bot, "web_text", side_effect = [ + with patch.object(test_bot, "web_text", side_effect = [ TimeoutError(), # First try with mr-medium fails - "angemeldet als: testuser" # Second try with user-email succeeds + "angemeldet als: dummy_user" # Second try with user-email succeeds ]): - assert await configured_bot.is_logged_in() is True + assert await test_bot.is_logged_in() is True @pytest.mark.asyncio - async def test_is_logged_in_returns_false_when_not_logged_in(self, configured_bot:KleinanzeigenBot) -> None: + async def test_is_logged_in_returns_false_when_not_logged_in(self, test_bot:KleinanzeigenBot) -> None: """Verify that login check returns false when not logged in.""" - with patch.object(configured_bot, "web_text", side_effect = TimeoutError): - assert await configured_bot.is_logged_in() is False + with patch.object(test_bot, "web_text", side_effect = TimeoutError): + assert await test_bot.is_logged_in() is False @pytest.mark.asyncio - async def test_login_flow_completes_successfully(self, configured_bot:KleinanzeigenBot) -> None: + async def test_login_flow_completes_successfully(self, test_bot:KleinanzeigenBot) -> None: """Verify that normal login flow completes successfully.""" - with patch.object(configured_bot, "web_open") as mock_open, \ - patch.object(configured_bot, "is_logged_in", side_effect = [False, True]) as mock_logged_in, \ - patch.object(configured_bot, "web_find", side_effect = TimeoutError), \ - patch.object(configured_bot, "web_input") as mock_input, \ - patch.object(configured_bot, "web_click") as mock_click: + with patch.object(test_bot, "web_open") as mock_open, \ + patch.object(test_bot, "is_logged_in", side_effect = [False, True]) as mock_logged_in, \ + patch.object(test_bot, "web_find", side_effect = TimeoutError), \ + patch.object(test_bot, "web_input") as mock_input, \ + patch.object(test_bot, "web_click") as mock_click: - await configured_bot.login() + await test_bot.login() mock_open.assert_called() mock_logged_in.assert_called() @@ -356,14 +360,14 @@ class TestKleinanzeigenBotAuthentication: mock_click.assert_called() @pytest.mark.asyncio - async def test_login_flow_handles_captcha(self, configured_bot:KleinanzeigenBot) -> None: + async def test_login_flow_handles_captcha(self, test_bot:KleinanzeigenBot) -> None: """Verify that login flow handles captcha correctly.""" - with patch.object(configured_bot, "web_open"), \ - patch.object(configured_bot, "is_logged_in", return_value = False), \ - patch.object(configured_bot, "web_find") as mock_find, \ - patch.object(configured_bot, "web_await") as mock_await, \ - patch.object(configured_bot, "web_input"), \ - patch.object(configured_bot, "web_click"), \ + with patch.object(test_bot, "web_open"), \ + patch.object(test_bot, "is_logged_in", return_value = False), \ + patch.object(test_bot, "web_find") as mock_find, \ + patch.object(test_bot, "web_await") as mock_await, \ + patch.object(test_bot, "web_input"), \ + patch.object(test_bot, "web_click"), \ patch("kleinanzeigen_bot.ainput") as mock_ainput: mock_find.side_effect = [ @@ -376,7 +380,7 @@ class TestKleinanzeigenBotAuthentication: mock_await.return_value = True mock_ainput.return_value = "" - await configured_bot.login() + await test_bot.login() assert mock_find.call_count >= 2 mock_await.assert_called_once() @@ -440,7 +444,7 @@ class TestKleinanzeigenBotBasics: def test_get_config_defaults(self, test_bot:KleinanzeigenBot) -> None: """Test default configuration values.""" - assert isinstance(test_bot.config, dict) + assert isinstance(test_bot.config, Config) assert test_bot.command == "help" assert test_bot.ads_selector == "due" assert test_bot.keep_old_ads is False @@ -578,7 +582,7 @@ login: """) test_bot.config_file_path = str(config_path) await test_bot.run(["script.py", "verify"]) - assert test_bot.config["login"]["username"] == "test" + assert test_bot.config.login.username == "test" class TestKleinanzeigenBotAdOperations: @@ -607,7 +611,7 @@ class TestKleinanzeigenBotAdOperations: def test_load_ads_no_files(self, test_bot:KleinanzeigenBot) -> None: """Test loading ads with no files.""" - test_bot.config["ad_files"] = ["nonexistent/*.yaml"] + test_bot.config.ad_files = ["nonexistent/*.yaml"] ads = test_bot.load_ads() assert len(ads) == 0 @@ -676,7 +680,7 @@ categories: # Set config file path to tmp_path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml") - test_bot.config["ad_files"] = ["ads/*.yaml"] + test_bot.config.ad_files = ["ads/*.yaml"] with pytest.raises(AssertionError) as exc_info: test_bot.load_ads() assert "must be at least 10 characters long" in str(exc_info.value) @@ -700,7 +704,7 @@ categories: # Set config file path to tmp_path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml") - test_bot.config["ad_files"] = ["ads/*.yaml"] + test_bot.config.ad_files = ["ads/*.yaml"] with pytest.raises(AssertionError) as exc_info: test_bot.load_ads() assert "property [price_type] must be one of:" in str(exc_info.value) @@ -724,7 +728,7 @@ categories: # Set config file path to tmp_path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml") - test_bot.config["ad_files"] = ["ads/*.yaml"] + test_bot.config.ad_files = ["ads/*.yaml"] with pytest.raises(AssertionError) as exc_info: test_bot.load_ads() assert "property [shipping_type] must be one of:" in str(exc_info.value) @@ -749,7 +753,7 @@ categories: # Set config file path to tmp_path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml") - test_bot.config["ad_files"] = ["ads/*.yaml"] + test_bot.config.ad_files = ["ads/*.yaml"] with pytest.raises(AssertionError) as exc_info: test_bot.load_ads() assert "must not be specified for GIVE_AWAY ad" in str(exc_info.value) @@ -774,7 +778,7 @@ categories: # Set config file path to tmp_path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml") - test_bot.config["ad_files"] = ["ads/*.yaml"] + test_bot.config.ad_files = ["ads/*.yaml"] with pytest.raises(AssertionError) as exc_info: test_bot.load_ads() assert "not specified" in str(exc_info.value) @@ -794,12 +798,7 @@ categories: ) # Mock the config to prevent auto-detection - test_bot.config["ad_defaults"] = { - "description": { - "prefix": "", - "suffix": "" - } - } + test_bot.config.ad_defaults = AdDefaults() yaml = YAML() with open(ad_file, "w", encoding = "utf-8") as f: @@ -807,7 +806,7 @@ categories: # Set config file path to tmp_path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml") - test_bot.config["ad_files"] = ["ads/*.yaml"] + test_bot.config.ad_files = ["ads/*.yaml"] with pytest.raises(AssertionError) as exc_info: test_bot.load_ads() assert "property [description] not specified" in str(exc_info.value) @@ -876,12 +875,12 @@ class TestKleinanzeigenBotAdRepublication: def test_check_ad_republication_with_changes(self, test_bot:KleinanzeigenBot, base_ad_config:dict[str, Any]) -> None: """Test that ads with changes are marked for republication.""" # Mock the description config to prevent modification of the description - test_bot.config["ad_defaults"] = { + test_bot.config.ad_defaults = AdDefaults.model_validate({ "description": { "prefix": "", "suffix": "" } - } + }) # Create ad config with all necessary fields for republication ad_cfg = create_ad_config( @@ -905,7 +904,7 @@ class TestKleinanzeigenBotAdRepublication: # Set config file path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml") - test_bot.config["ad_files"] = ["ads/*.yaml"] + test_bot.config.ad_files = ["ads/*.yaml"] # Mock the loading of the original ad configuration with patch("kleinanzeigen_bot.utils.dicts.load_dict", side_effect = [ @@ -934,7 +933,7 @@ class TestKleinanzeigenBotAdRepublication: ad_cfg_orig["content_hash"] = current_hash # Mock the config to prevent actual file operations - test_bot.config["ad_files"] = ["test.yaml"] + test_bot.config.ad_files = ["test.yaml"] with patch("kleinanzeigen_bot.utils.dicts.load_dict_if_exists", return_value = ad_cfg_orig), \ patch("kleinanzeigen_bot.utils.dicts.load_dict", return_value = {}): # Mock ad_fields.yaml ads_to_publish = test_bot.load_ads() @@ -966,10 +965,10 @@ class TestKleinanzeigenBotShippingOptions: published_ads:list[dict[str, Any]] = [] # Set up default config values needed for the test - test_bot.config["publishing"] = { + test_bot.config.publishing = PublishingConfig.model_validate({ "delete_old_ads": "BEFORE_PUBLISH", "delete_old_ads_by_title": False - } + }) # Create temporary file path ad_file = Path(tmp_path) / "test_ad.yaml" @@ -1046,25 +1045,27 @@ class TestKleinanzeigenBotPrefixSuffix: def test_description_prefix_suffix_handling( self, - test_bot:KleinanzeigenBot, + test_bot_config:Config, description_test_cases:list[tuple[dict[str, Any], str, str]] ) -> None: """Test handling of description prefix/suffix in various configurations.""" for config, raw_description, expected_description in description_test_cases: - test_bot.config = config + test_bot = KleinanzeigenBot() + test_bot.config = test_bot_config.with_values(config) ad_cfg = {"description": raw_description, "active": True} # Access private method using the correct name mangling description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True) assert description == expected_description - def test_description_length_validation(self, test_bot:KleinanzeigenBot) -> None: + def test_description_length_validation(self, test_bot_config:Config) -> None: """Test that long descriptions with affixes raise appropriate error.""" - test_bot.config = { + test_bot = KleinanzeigenBot() + test_bot.config = test_bot_config.with_values({ "ad_defaults": { "description_prefix": "P" * 1000, "description_suffix": "S" * 1000 } - } + }) ad_cfg = { "description": "D" * 2001, # This plus affixes will exceed 4000 chars "active": True @@ -1080,14 +1081,10 @@ class TestKleinanzeigenBotPrefixSuffix: class TestKleinanzeigenBotDescriptionHandling: """Tests for description handling functionality.""" - def test_description_without_main_config_description(self, test_bot:KleinanzeigenBot) -> None: + def test_description_without_main_config_description(self, test_bot_config:Config) -> None: """Test that description works correctly when description is missing from main config.""" - # Set up config without any description fields - test_bot.config = { - "ad_defaults": { - # No description field at all - } - } + test_bot = KleinanzeigenBot() + test_bot.config = test_bot_config # Test with a simple ad config ad_cfg = { @@ -1099,14 +1096,15 @@ class TestKleinanzeigenBotDescriptionHandling: description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True) assert description == "Test Description" - def test_description_with_only_new_format_affixes(self, test_bot:KleinanzeigenBot) -> None: + def test_description_with_only_new_format_affixes(self, test_bot_config:Config) -> None: """Test that description works with only new format affixes in config.""" - test_bot.config = { + test_bot = KleinanzeigenBot() + test_bot.config = test_bot_config.with_values({ "ad_defaults": { "description_prefix": "Prefix: ", "description_suffix": " :Suffix" } - } + }) ad_cfg = { "description": "Test Description", @@ -1116,9 +1114,10 @@ class TestKleinanzeigenBotDescriptionHandling: description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True) assert description == "Prefix: Test Description :Suffix" - def test_description_with_mixed_config_formats(self, test_bot:KleinanzeigenBot) -> None: + def test_description_with_mixed_config_formats(self, test_bot_config:Config) -> None: """Test that description works with both old and new format affixes in config.""" - test_bot.config = { + test_bot = KleinanzeigenBot() + test_bot.config = test_bot_config.with_values({ "ad_defaults": { "description_prefix": "New Prefix: ", "description_suffix": " :New Suffix", @@ -1127,7 +1126,7 @@ class TestKleinanzeigenBotDescriptionHandling: "suffix": " :Old Suffix" } } - } + }) ad_cfg = { "description": "Test Description", @@ -1137,14 +1136,15 @@ class TestKleinanzeigenBotDescriptionHandling: description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True) assert description == "New Prefix: Test Description :New Suffix" - def test_description_with_ad_level_affixes(self, test_bot:KleinanzeigenBot) -> None: + def test_description_with_ad_level_affixes(self, test_bot_config:Config) -> None: """Test that ad-level affixes take precedence over config affixes.""" - test_bot.config = { + test_bot = KleinanzeigenBot() + test_bot.config = test_bot_config.with_values({ "ad_defaults": { "description_prefix": "Config Prefix: ", "description_suffix": " :Config Suffix" } - } + }) ad_cfg = { "description": "Test Description", @@ -1156,9 +1156,10 @@ class TestKleinanzeigenBotDescriptionHandling: description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True) assert description == "Ad Prefix: Test Description :Ad Suffix" - def test_description_with_none_values(self, test_bot:KleinanzeigenBot) -> None: + def test_description_with_none_values(self, test_bot_config:Config) -> None: """Test that None values in affixes are handled correctly.""" - test_bot.config = { + test_bot = KleinanzeigenBot() + test_bot.config = test_bot_config.with_values({ "ad_defaults": { "description_prefix": None, "description_suffix": None, @@ -1167,7 +1168,7 @@ class TestKleinanzeigenBotDescriptionHandling: "suffix": None } } - } + }) ad_cfg = { "description": "Test Description", @@ -1177,11 +1178,10 @@ class TestKleinanzeigenBotDescriptionHandling: description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True) assert description == "Test Description" - def test_description_with_email_replacement(self, test_bot:KleinanzeigenBot) -> None: + def test_description_with_email_replacement(self, test_bot_config:Config) -> None: """Test that @ symbols in description are replaced with (at).""" - test_bot.config = { - "ad_defaults": {} - } + test_bot = KleinanzeigenBot() + test_bot.config = test_bot_config ad_cfg = { "description": "Contact: test@example.com", @@ -1195,16 +1195,19 @@ class TestKleinanzeigenBotDescriptionHandling: class TestKleinanzeigenBotChangedAds: """Tests for the 'changed' ads selector functionality.""" - def test_load_ads_with_changed_selector(self, test_bot:KleinanzeigenBot, base_ad_config:dict[str, Any]) -> None: + def test_load_ads_with_changed_selector(self, test_bot_config:Config, base_ad_config:dict[str, Any]) -> None: """Test that only changed ads are loaded when using the 'changed' selector.""" # Set up the bot with the 'changed' selector + test_bot = KleinanzeigenBot() test_bot.ads_selector = "changed" - test_bot.config["ad_defaults"] = { - "description": { - "prefix": "", - "suffix": "" + test_bot.config = test_bot_config.with_values({ + "ad_defaults": { + "description": { + "prefix": "", + "suffix": "" + } } - } + }) # Create a changed ad changed_ad = create_ad_config( @@ -1237,7 +1240,7 @@ class TestKleinanzeigenBotChangedAds: # Set config file path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml") - test_bot.config["ad_files"] = ["ads/*.yaml"] + test_bot.config.ad_files = ["ads/*.yaml"] # Mock the loading of the ad configuration with patch("kleinanzeigen_bot.utils.dicts.load_dict", side_effect = [ @@ -1254,12 +1257,6 @@ class TestKleinanzeigenBotChangedAds: """Test that 'due' selector includes all ads that are due for republication, regardless of changes.""" # Set up the bot with the 'due' selector test_bot.ads_selector = "due" - test_bot.config["ad_defaults"] = { - "description": { - "prefix": "", - "suffix": "" - } - } # Create a changed ad that is also due for republication current_time = misc.now() @@ -1289,7 +1286,7 @@ class TestKleinanzeigenBotChangedAds: # Set config file path and use relative path for ad_files test_bot.config_file_path = str(temp_path / "config.yaml") - test_bot.config["ad_files"] = ["ads/*.yaml"] + test_bot.config.ad_files = ["ads/*.yaml"] # Mock the loading of the ad configuration with patch("kleinanzeigen_bot.utils.dicts.load_dict", side_effect = [