feat: add type safe Config model

This commit is contained in:
sebthom
2025-05-14 00:30:59 +02:00
committed by Sebastian Thomschke
parent e7a3d46d25
commit 1369da1c34
21 changed files with 1132 additions and 389 deletions

View File

@@ -229,6 +229,8 @@ Valid file extensions are `.json`, `.yaml` and `.yml`
The following parameters can be configured:
```yaml
# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/config.schema.json
# glob (wildcard) patterns to select ad configuration files
# if relative paths are specified, then they are relative to this configuration file
ad_files:
@@ -425,6 +427,7 @@ By default a new browser process will be launched. To reuse a manually launched
- all tests: `pdm run test` - with coverage: `pdm run test:cov`
- Run syntax checks: `pdm run lint`
- Linting issues found by ruff can be auto-fixed using `pdm run lint:fix`
- Derive JSON schema files from Pydantic data model: `pdm run generate-schemas`
- Create platform-specific executable: `pdm run compile`
- Application bootstrap works like this:
```python

12
pdm.lock generated
View File

@@ -5,7 +5,7 @@
groups = ["default", "dev"]
strategy = ["inherit_metadata"]
lock_version = "4.5.0"
content_hash = "sha256:2ce8b5b77bbdaa380fbb3b50bc888b1f7c7c03fc4072cb7db379a787b2532d20"
content_hash = "sha256:4c861bebeac9e92661923a7e8d04a695c2185a5d0f85179fb858febd2503fdaf"
[[metadata.targets]]
requires_python = ">=3.10,<3.14"
@@ -25,7 +25,7 @@ name = "annotated-types"
version = "0.7.0"
requires_python = ">=3.8"
summary = "Reusable constraint types to use with typing.Annotated"
groups = ["dev"]
groups = ["default", "dev"]
dependencies = [
"typing-extensions>=4.0.0; python_version < \"3.9\"",
]
@@ -910,7 +910,7 @@ name = "pydantic"
version = "2.11.4"
requires_python = ">=3.9"
summary = "Data validation using Python type hints"
groups = ["dev"]
groups = ["default", "dev"]
dependencies = [
"annotated-types>=0.6.0",
"pydantic-core==2.33.2",
@@ -927,7 +927,7 @@ name = "pydantic-core"
version = "2.33.2"
requires_python = ">=3.9"
summary = "Core functionality for Pydantic validation and serialization"
groups = ["dev"]
groups = ["default", "dev"]
dependencies = [
"typing-extensions!=4.7.0,>=4.6.0",
]
@@ -1371,7 +1371,7 @@ name = "typing-extensions"
version = "4.13.2"
requires_python = ">=3.8"
summary = "Backported and Experimental Type Hints for Python 3.8+"
groups = ["dev"]
groups = ["default", "dev"]
files = [
{file = "typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c"},
{file = "typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"},
@@ -1382,7 +1382,7 @@ name = "typing-inspection"
version = "0.4.0"
requires_python = ">=3.9"
summary = "Runtime typing introspection tools"
groups = ["dev"]
groups = ["default", "dev"]
dependencies = [
"typing-extensions>=4.12.0",
]

View File

@@ -39,6 +39,7 @@ dependencies = [
"colorama",
"jaraco.text", # required by pkg_resources during runtime
"nodriver==0.39.0", # 0.40-0.44 have issues starting browsers and evaluating self.web_execute("window.BelenConf") fails
"pydantic>=2.0.0",
"ruamel.yaml",
"psutil",
"wcmatch",
@@ -256,7 +257,7 @@ min-file-size = 256
[tool.ruff.lint.pylint]
# https://pylint.pycqa.org/en/latest/user_guide/configuration/all-options.html#design-checker
# https://pylint.pycqa.org/en/latest/user_guide/checkers/features.html#design-checker-messages
max-args = 5 # max. number of args for function / method (R0913)
max-args = 6 # max. number of args for function / method (R0913)
# max-attributes = 15 # max. number of instance attrs for a class (R0902)
max-branches = 40 # max. number of branch for function / method body (R0912)
max-locals = 30 # max. number of local vars for function / method body (R0914)

377
schemas/config.schema.json Normal file
View File

@@ -0,0 +1,377 @@
{
"$defs": {
"AdDefaults": {
"properties": {
"active": {
"default": true,
"title": "Active",
"type": "boolean"
},
"type": {
"default": "OFFER",
"enum": [
"OFFER",
"WANTED"
],
"title": "Type",
"type": "string"
},
"description": {
"anyOf": [
{
"$ref": "#/$defs/DescriptionAffixes"
},
{
"type": "null"
}
],
"default": null
},
"description_prefix": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "prefix for the ad description",
"title": "Description Prefix"
},
"description_suffix": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": " suffix for the ad description",
"title": "Description Suffix"
},
"price_type": {
"default": "NEGOTIABLE",
"enum": [
"FIXED",
"NEGOTIABLE",
"GIVE_AWAY",
"NOT_APPLICABLE"
],
"title": "Price Type",
"type": "string"
},
"shipping_type": {
"default": "SHIPPING",
"enum": [
"PICKUP",
"SHIPPING",
"NOT_APPLICABLE"
],
"title": "Shipping Type",
"type": "string"
},
"sell_directly": {
"default": false,
"description": "requires shipping_type SHIPPING to take effect",
"title": "Sell Directly",
"type": "boolean"
},
"contact": {
"$ref": "#/$defs/ContactDefaults"
},
"republication_interval": {
"default": 7,
"title": "Republication Interval",
"type": "integer"
}
},
"title": "AdDefaults",
"type": "object"
},
"BrowserConfig": {
"properties": {
"arguments": {
"description": "See https://peter.sh/experiments/chromium-command-line-switches/",
"items": {
"type": "string"
},
"title": "Arguments",
"type": "array"
},
"binary_location": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "path to custom browser executable, if not specified will be looked up on PATH",
"title": "Binary Location"
},
"extensions": {
"description": "a list of .crx extension files to be loaded",
"items": {
"type": "string"
},
"title": "Extensions",
"type": "array"
},
"use_private_window": {
"default": true,
"title": "Use Private Window",
"type": "boolean"
},
"user_data_dir": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "See https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md",
"title": "User Data Dir"
},
"profile_name": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Profile Name"
}
},
"title": "BrowserConfig",
"type": "object"
},
"CaptchaConfig": {
"properties": {
"auto_restart": {
"default": false,
"title": "Auto Restart",
"type": "boolean"
},
"restart_delay": {
"default": "6h",
"title": "Restart Delay",
"type": "string"
}
},
"title": "CaptchaConfig",
"type": "object"
},
"ContactDefaults": {
"properties": {
"name": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Name"
},
"street": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Street"
},
"zipcode": {
"anyOf": [
{
"type": "integer"
},
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Zipcode"
},
"phone": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Phone"
}
},
"title": "ContactDefaults",
"type": "object"
},
"DescriptionAffixes": {
"deprecated": true,
"properties": {
"prefix": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Prefix"
},
"suffix": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Suffix"
}
},
"title": "DescriptionAffixes",
"type": "object"
},
"DownloadConfig": {
"properties": {
"include_all_matching_shipping_options": {
"default": false,
"description": "if true, all shipping options matching the package size will be included",
"title": "Include All Matching Shipping Options",
"type": "boolean"
},
"excluded_shipping_options": {
"description": "list of shipping options to exclude, e.g. ['DHL_2', 'DHL_5']",
"items": {
"type": "string"
},
"title": "Excluded Shipping Options",
"type": "array"
}
},
"title": "DownloadConfig",
"type": "object"
},
"LoginConfig": {
"properties": {
"username": {
"minLength": 1,
"title": "Username",
"type": "string"
},
"password": {
"minLength": 1,
"title": "Password",
"type": "string"
}
},
"required": [
"username",
"password"
],
"title": "LoginConfig",
"type": "object"
},
"PublishingConfig": {
"properties": {
"delete_old_ads": {
"anyOf": [
{
"enum": [
"BEFORE_PUBLISH",
"AFTER_PUBLISH",
"NEVER"
],
"type": "string"
},
{
"type": "null"
}
],
"default": "AFTER_PUBLISH",
"title": "Delete Old Ads"
},
"delete_old_ads_by_title": {
"default": true,
"description": "only works if delete_old_ads is set to BEFORE_PUBLISH",
"title": "Delete Old Ads By Title",
"type": "boolean"
}
},
"title": "PublishingConfig",
"type": "object"
}
},
"properties": {
"ad_files": {
"description": "\nglob (wildcard) patterns to select ad configuration files\nif relative paths are specified, then they are relative to this configuration file\n",
"items": {
"type": "string"
},
"minItems": 1,
"title": "Ad Files",
"type": "array"
},
"ad_defaults": {
"$ref": "#/$defs/AdDefaults",
"description": "Default values for ads, can be overwritten in each ad configuration file"
},
"categories": {
"additionalProperties": {
"type": "string"
},
"description": "\nadditional name to category ID mappings, see default list at\nhttps://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml\n\nExample:\n categories:\n Elektronik > Notebooks: 161/278\n Jobs > Praktika: 102/125\n ",
"title": "Categories",
"type": "object"
},
"download": {
"$ref": "#/$defs/DownloadConfig"
},
"publishing": {
"$ref": "#/$defs/PublishingConfig"
},
"browser": {
"$ref": "#/$defs/BrowserConfig",
"description": "Browser configuration"
},
"login": {
"$ref": "#/$defs/LoginConfig",
"description": "Login credentials"
},
"captcha": {
"$ref": "#/$defs/CaptchaConfig"
}
},
"title": "Config",
"type": "object",
"description": "Auto-generated JSON Schema for Config"
}

View File

@@ -0,0 +1,39 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import json
from pathlib import Path
from typing import Type
from pydantic import BaseModel
from kleinanzeigen_bot.model.config_model import Config
def generate_schema(model:Type[BaseModel], out_dir:Path) -> None:
"""
Generate and write JSON schema for the given model.
"""
name = model.__name__
print(f"[+] Generating schema for model [{name}]...")
# Create JSON Schema dict
schema = model.model_json_schema(mode = "validation")
schema.setdefault("title", f"{name} Schema")
schema.setdefault("description", f"Auto-generated JSON Schema for {name}")
# Write JSON
json_path = out_dir / f"{name.lower()}.schema.json"
with json_path.open("w", encoding = "utf-8") as f_json:
json.dump(schema, f_json, indent = 2)
f_json.write("\n")
print(f"[✓] {json_path}")
project_root = Path(__file__).parent.parent
out_dir = project_root / "schemas"
out_dir.mkdir(parents = True, exist_ok = True)
print(f"Generating schemas in: {out_dir.resolve()}")
generate_schema(Config, out_dir)
print("All schemas generated successfully.")

View File

@@ -15,6 +15,7 @@ from wcmatch import glob
from . import extract, resources
from ._version import __version__
from .ads import MAX_DESCRIPTION_LENGTH, calculate_content_hash, get_description_affixes
from .model.config_model import Config
from .utils import dicts, error_handlers, loggers, misc
from .utils.exceptions import CaptchaEncountered
from .utils.files import abspath
@@ -42,7 +43,7 @@ class KleinanzeigenBot(WebScrapingMixin):
self.root_url = "https://www.kleinanzeigen.de"
self.config:dict[str, Any] = {}
self.config:Config
self.config_file_path = abspath("config.yaml")
self.categories:dict[str, str] = {}
@@ -325,7 +326,7 @@ class KleinanzeigenBot(WebScrapingMixin):
ad_files:dict[str, str] = {}
data_root_dir = os.path.dirname(self.config_file_path)
for file_pattern in self.config["ad_files"]:
for file_pattern in self.config.ad_files:
for ad_file in glob.glob(file_pattern, root_dir = data_root_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB):
if not str(ad_file).endswith("ad_fields.yaml"):
ad_files[abspath(ad_file, relative_to = data_root_dir)] = ad_file
@@ -349,7 +350,7 @@ class KleinanzeigenBot(WebScrapingMixin):
ad_cfg_orig = dicts.load_dict(ad_file, "ad")
ad_cfg = copy.deepcopy(ad_cfg_orig)
dicts.apply_defaults(ad_cfg,
self.config["ad_defaults"],
self.config.ad_defaults.model_dump(),
ignore = lambda k, _: k == "description",
override = lambda _, v: v == "" # noqa: PLC1901 can be simplified to `not v` as an empty string is falsey
)
@@ -462,40 +463,44 @@ class KleinanzeigenBot(WebScrapingMixin):
return ads
def load_config(self) -> None:
config_defaults = dicts.load_dict_from_module(resources, "config_defaults.yaml")
config = dicts.load_dict_if_exists(self.config_file_path, _("config"))
if config is None:
# write default config.yaml if config file does not exist
if not os.path.exists(self.config_file_path):
LOG.warning("Config file %s does not exist. Creating it with default values...", self.config_file_path)
dicts.save_dict(self.config_file_path, config_defaults)
config = {}
default_config = Config.model_construct()
default_config.login.username = ""
default_config.login.password = ""
dicts.save_dict(self.config_file_path, default_config.model_dump(exclude_none = True, exclude = {
"ad_defaults": {
"description" # deprecated
}
}), header = "# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/config.schema.json")
self.config = dicts.apply_defaults(config, config_defaults)
config_yaml = dicts.load_dict_if_exists(self.config_file_path, _("config"))
self.config = Config.model_validate(config_yaml, strict = True, context = self.config_file_path)
# load built-in category mappings
self.categories = dicts.load_dict_from_module(resources, "categories.yaml", "categories")
deprecated_categories = dicts.load_dict_from_module(resources, "categories_old.yaml", "categories")
self.categories.update(deprecated_categories)
if self.config["categories"]:
self.categories.update(self.config["categories"])
if self.config.categories:
self.categories.update(self.config.categories)
LOG.info(" -> found %s", pluralize("category", self.categories))
ensure(self.config["login"]["username"], f"[login.username] not specified @ [{self.config_file_path}]")
ensure(self.config["login"]["password"], f"[login.password] not specified @ [{self.config_file_path}]")
self.browser_config.arguments = self.config["browser"]["arguments"]
self.browser_config.binary_location = self.config["browser"]["binary_location"]
self.browser_config.extensions = [abspath(item, relative_to = self.config_file_path) for item in self.config["browser"]["extensions"]]
self.browser_config.use_private_window = self.config["browser"]["use_private_window"]
if self.config["browser"]["user_data_dir"]:
self.browser_config.user_data_dir = abspath(self.config["browser"]["user_data_dir"], relative_to = self.config_file_path)
self.browser_config.profile_name = self.config["browser"]["profile_name"]
# populate browser_config object used by WebScrapingMixin
self.browser_config.arguments = self.config.browser.arguments
self.browser_config.binary_location = self.config.browser.binary_location
self.browser_config.extensions = [abspath(item, relative_to = self.config_file_path) for item in self.config.browser.extensions]
self.browser_config.use_private_window = self.config.browser.use_private_window
if self.config.browser.user_data_dir:
self.browser_config.user_data_dir = abspath(self.config.browser.user_data_dir, relative_to = self.config_file_path)
self.browser_config.profile_name = self.config.browser.profile_name
async def login(self) -> None:
LOG.info("Checking if already logged in...")
await self.web_open(f"{self.root_url}")
if await self.is_logged_in():
LOG.info("Already logged in as [%s]. Skipping login.", self.config["login"]["username"])
LOG.info("Already logged in as [%s]. Skipping login.", self.config.login.username)
return
LOG.info("Opening login page...")
@@ -519,9 +524,9 @@ class KleinanzeigenBot(WebScrapingMixin):
await self.handle_after_login_logic()
async def fill_login_data_and_send(self) -> None:
LOG.info("Logging in as [%s]...", self.config["login"]["username"])
await self.web_input(By.ID, "email", self.config["login"]["username"])
await self.web_input(By.ID, "password", self.config["login"]["password"])
LOG.info("Logging in as [%s]...", self.config.login.username)
await self.web_input(By.ID, "email", self.config.login.username)
await self.web_input(By.ID, "password", self.config.login.password)
await self.web_click(By.CSS_SELECTOR, "form#login-form button[type='submit']")
async def handle_after_login_logic(self) -> None:
@@ -546,13 +551,13 @@ class KleinanzeigenBot(WebScrapingMixin):
try:
# Try to find the standard element first
user_info = await self.web_text(By.CLASS_NAME, "mr-medium")
if self.config["login"]["username"].lower() in user_info.lower():
if self.config.login.username.lower() in user_info.lower():
return True
except TimeoutError:
try:
# If standard element not found, try the alternative
user_info = await self.web_text(By.ID, "user-email")
if self.config["login"]["username"].lower() in user_info.lower():
if self.config.login.username.lower() in user_info.lower():
return True
except TimeoutError:
return False
@@ -567,7 +572,7 @@ class KleinanzeigenBot(WebScrapingMixin):
for (ad_file, ad_cfg, _ad_cfg_orig) in ad_cfgs:
count += 1
LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg["title"], ad_file)
await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = self.config["publishing"]["delete_old_ads_by_title"])
await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = self.config.publishing.delete_old_ads_by_title)
await self.web_sleep()
LOG.info("############################################")
@@ -624,7 +629,7 @@ class KleinanzeigenBot(WebScrapingMixin):
await self.publish_ad(ad_file, ad_cfg, ad_cfg_orig, published_ads)
await self.web_await(lambda: self.web_check(By.ID, "checking-done", Is.DISPLAYED), timeout = 5 * 60)
if self.config["publishing"]["delete_old_ads"] == "AFTER_PUBLISH" and not self.keep_old_ads:
if self.config.publishing.delete_old_ads == "AFTER_PUBLISH" and not self.keep_old_ads:
await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = False)
LOG.info("############################################")
@@ -639,8 +644,8 @@ class KleinanzeigenBot(WebScrapingMixin):
"""
await self.assert_free_ad_limit_not_reached()
if self.config["publishing"]["delete_old_ads"] == "BEFORE_PUBLISH" and not self.keep_old_ads:
await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = self.config["publishing"]["delete_old_ads_by_title"])
if self.config.publishing.delete_old_ads == "BEFORE_PUBLISH" and not self.keep_old_ads:
await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = self.config.publishing.delete_old_ads_by_title)
LOG.info("Publishing ad '%s'...", ad_cfg["title"])
@@ -779,9 +784,9 @@ class KleinanzeigenBot(WebScrapingMixin):
"iframe[name^='a-'][src^='https://www.google.com/recaptcha/api2/anchor?']",
timeout = 2)
if self.config.get("captcha", {}).get("auto_restart", False):
if self.config.captcha.auto_restart:
LOG.warning("Captcha recognized - auto-restart enabled, abort run...")
raise CaptchaEncountered(misc.parse_duration(self.config.get("captcha", {}).get("restart_delay", "6h")))
raise CaptchaEncountered(misc.parse_duration(self.config.captcha.restart_delay))
# Fallback: manuell
LOG.warning("############################################")
@@ -1036,7 +1041,7 @@ class KleinanzeigenBot(WebScrapingMixin):
async def assert_free_ad_limit_not_reached(self) -> None:
try:
await self.web_find(By.XPATH, "/html/body/div[1]/form/fieldset[6]/div[1]/header", timeout = 2)
raise AssertionError(f"Cannot publish more ads. The monthly limit of free ads of account {self.config['login']['username']} is reached.")
raise AssertionError(f"Cannot publish more ads. The monthly limit of free ads of account {self.config.login.username} is reached.")
except TimeoutError:
pass

View File

@@ -4,7 +4,7 @@
import hashlib, json, os # isort: skip
from typing import Any, Final
from .utils import dicts
from .model.config_model import Config
MAX_DESCRIPTION_LENGTH:Final[int] = 4000
@@ -40,7 +40,7 @@ def calculate_content_hash(ad_cfg:dict[str, Any]) -> str:
return hashlib.sha256(content_str.encode()).hexdigest()
def get_description_affixes(config:dict[str, Any], *, prefix:bool = True) -> str:
def get_description_affixes(config:Config, *, prefix:bool = True) -> str:
"""Get prefix or suffix for description with proper precedence.
This function handles both the new flattened format and legacy nested format:
@@ -65,24 +65,21 @@ def get_description_affixes(config:dict[str, Any], *, prefix:bool = True) -> str
Example:
>>> config = {"ad_defaults": {"description_prefix": "Hello", "description": {"prefix": "Hi"}}}
>>> get_description_affixes(config, prefix=True)
>>> get_description_affixes(Config.model_validate(config), prefix=True)
'Hello'
"""
# Handle edge cases
if not isinstance(config, dict):
return ""
affix_type = "prefix" if prefix else "suffix"
# First try new flattened format (description_prefix/description_suffix)
flattened_key = f"description_{affix_type}"
flattened_value = dicts.safe_get(config, "ad_defaults", flattened_key)
flattened_value = getattr(config.ad_defaults, flattened_key)
if isinstance(flattened_value, str):
return flattened_value
# Then try legacy nested format (description.prefix/description.suffix)
nested_value = dicts.safe_get(config, "ad_defaults", "description", affix_type)
if isinstance(nested_value, str):
return nested_value
if config.ad_defaults.description:
nested_value = getattr(config.ad_defaults.description, affix_type)
if isinstance(nested_value, str):
return nested_value
return ""

View File

@@ -7,6 +7,7 @@ from datetime import datetime
from typing import Any, Final
from .ads import calculate_content_hash, get_description_affixes
from .model.config_model import Config
from .utils import dicts, i18n, loggers, misc, reflect
from .utils.web_scraping_mixin import Browser, By, Element, WebScrapingMixin
@@ -22,7 +23,7 @@ class AdExtractor(WebScrapingMixin):
Wrapper class for ad extraction that uses an active bot´s browser session to extract specific elements from an ad page.
"""
def __init__(self, browser:Browser, config:dict[str, Any]) -> None:
def __init__(self, browser:Browser, config:Config) -> None:
super().__init__()
self.browser = browser
self.config = config
@@ -432,11 +433,8 @@ class AdExtractor(WebScrapingMixin):
# Convert Euro to cents and round to nearest integer
price_in_cent = round(ship_costs * 100)
# Get excluded shipping options from config
excluded_options = self.config.get("download", {}).get("excluded_shipping_options", [])
# If include_all_matching_shipping_options is enabled, get all options for the same package size
if self.config.get("download", {}).get("include_all_matching_shipping_options", False):
if self.config.download.include_all_matching_shipping_options:
# Find all options with the same price to determine the package size
matching_options = [opt for opt in shipping_costs if opt["priceInEuroCent"] == price_in_cent]
if not matching_options:
@@ -451,7 +449,7 @@ class AdExtractor(WebScrapingMixin):
for opt in shipping_costs
if opt["packageSize"] == matching_size
and opt["id"] in shipping_option_mapping
and shipping_option_mapping[opt["id"]] not in excluded_options
and shipping_option_mapping[opt["id"]] not in self.config.download.excluded_shipping_options
]
else:
# Only use the matching option if it's not excluded
@@ -460,7 +458,7 @@ class AdExtractor(WebScrapingMixin):
return "NOT_APPLICABLE", ship_costs, shipping_options
shipping_option = shipping_option_mapping.get(matching_option["id"])
if not shipping_option or shipping_option in excluded_options:
if not shipping_option or shipping_option in self.config.download.excluded_shipping_options:
return "NOT_APPLICABLE", ship_costs, shipping_options
shipping_options = [shipping_option]

View File

View File

@@ -0,0 +1,144 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
from __future__ import annotations
import copy
from typing import Any, Dict, List, Literal
from pydantic import Field, model_validator, validator
from typing_extensions import deprecated
from kleinanzeigen_bot.utils import dicts
from kleinanzeigen_bot.utils.pydantics import ContextualModel
class ContactDefaults(ContextualModel):
name:str | None = None
street:str | None = None
zipcode:int | str | None = None
phone:str | None = None
@deprecated("Use description_prefix/description_suffix instead")
class DescriptionAffixes(ContextualModel):
prefix:str | None = None
suffix:str | None = None
class AdDefaults(ContextualModel):
active:bool = True
type:Literal["OFFER", "WANTED"] = "OFFER"
description:DescriptionAffixes | None = None
description_prefix:str | None = Field(default = None, description = "prefix for the ad description")
description_suffix:str | None = Field(default = None, description = " suffix for the ad description")
price_type:Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"] = "NEGOTIABLE"
shipping_type:Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"] = "SHIPPING"
sell_directly:bool = Field(default = False, description = "requires shipping_type SHIPPING to take effect")
contact:ContactDefaults = Field(default_factory = ContactDefaults)
republication_interval:int = 7
@model_validator(mode = "before")
@classmethod
def unify_description(cls, values:Dict[str, Any]) -> Dict[str, Any]:
# Ensure flat prefix/suffix take precedence over deprecated nested "description"
desc = values.get("description")
flat_prefix = values.get("description_prefix")
flat_suffix = values.get("description_suffix")
if not flat_prefix and isinstance(desc, dict) and desc.get("prefix") is not None:
values["description_prefix"] = desc.get("prefix", "")
if not flat_suffix and isinstance(desc, dict) and desc.get("suffix") is not None:
values["description_suffix"] = desc.get("suffix", "")
return values
class DownloadConfig(ContextualModel):
include_all_matching_shipping_options:bool = Field(
default = False,
description = "if true, all shipping options matching the package size will be included"
)
excluded_shipping_options:List[str] = Field(
default_factory = list,
description = "list of shipping options to exclude, e.g. ['DHL_2', 'DHL_5']"
)
class BrowserConfig(ContextualModel):
arguments:List[str] = Field(
default_factory = list,
description = "See https://peter.sh/experiments/chromium-command-line-switches/"
)
binary_location:str | None = Field(
default = None,
description = "path to custom browser executable, if not specified will be looked up on PATH"
)
extensions:List[str] = Field(
default_factory = list,
description = "a list of .crx extension files to be loaded"
)
use_private_window:bool = True
user_data_dir:str | None = Field(
default = None,
description = "See https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md"
)
profile_name:str | None = None
class LoginConfig(ContextualModel):
username:str = Field(..., min_length = 1)
password:str = Field(..., min_length = 1)
class PublishingConfig(ContextualModel):
delete_old_ads:Literal["BEFORE_PUBLISH", "AFTER_PUBLISH", "NEVER"] | None = "AFTER_PUBLISH"
delete_old_ads_by_title:bool = Field(default = True, description = "only works if delete_old_ads is set to BEFORE_PUBLISH")
class CaptchaConfig(ContextualModel):
auto_restart:bool = False
restart_delay:str = "6h"
class Config(ContextualModel):
ad_files:List[str] = Field(
default_factory = lambda: ["./**/ad_*.{json,yml,yaml}"],
min_items = 1,
description = """
glob (wildcard) patterns to select ad configuration files
if relative paths are specified, then they are relative to this configuration file
"""
) # type: ignore[call-overload]
ad_defaults:AdDefaults = Field(
default_factory = AdDefaults,
description = "Default values for ads, can be overwritten in each ad configuration file"
)
categories:Dict[str, str] = Field(default_factory = dict, description = """
additional name to category ID mappings, see default list at
https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml
Example:
categories:
Elektronik > Notebooks: 161/278
Jobs > Praktika: 102/125
""")
download:DownloadConfig = Field(default_factory = DownloadConfig)
publishing:PublishingConfig = Field(default_factory = PublishingConfig)
browser:BrowserConfig = Field(default_factory = BrowserConfig, description = "Browser configuration")
login:LoginConfig = Field(default_factory = LoginConfig.model_construct, description = "Login credentials")
captcha:CaptchaConfig = Field(default_factory = CaptchaConfig)
def with_values(self, values:dict[str, Any]) -> Config:
return Config.model_validate(
dicts.apply_defaults(copy.deepcopy(values), defaults = self.model_dump())
)
@validator("ad_files", each_item = True)
@classmethod
def _non_empty_glob_pattern(cls, v:str) -> str:
if not v.strip():
raise ValueError("ad_files entries must be non-empty glob patterns")
return v

View File

@@ -1,52 +0,0 @@
ad_files:
- "./**/ad_*.{json,yml,yaml}"
# default values for ads, can be overwritten in each ad configuration file
ad_defaults:
active: true
type: OFFER # one of: OFFER, WANTED
description_prefix: "" # prefix for the ad description
description_suffix: "" # suffix for the ad description
price_type: NEGOTIABLE # one of: FIXED, NEGOTIABLE, GIVE_AWAY, NOT_APPLICABLE
shipping_type: SHIPPING # one of: PICKUP, SHIPPING, NOT_APPLICABLE
sell_directly: false # requires shipping_options to take effect
contact:
name: ""
street: ""
zipcode:
phone: "" # IMPORTANT: surround phone number with quotes to prevent removal of leading zeros
republication_interval: 7 # every X days ads should be re-published
# additional name to category ID mappings, see default list at
# https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml
#
# categories:
# Elektronik > Notebooks: 161/278
# Jobs > Praktika: 102/125
categories: {}
download:
# if true, all shipping options matching the package size will be included
include_all_matching_shipping_options: false
# list of shipping options to exclude, e.g. ["DHL_2", "DHL_5"]
excluded_shipping_options: []
publishing:
delete_old_ads: "AFTER_PUBLISH" # one of: AFTER_PUBLISH, BEFORE_PUBLISH, NEVER
delete_old_ads_by_title: true # only works if delete_old_ads is set to BEFORE_PUBLISH
# browser configuration
browser:
# https://peter.sh/experiments/chromium-command-line-switches/
arguments: []
binary_location: # path to custom browser executable, if not specified will be looked up on PATH
extensions: [] # a list of .crx extension files to be loaded
use_private_window: true
user_data_dir: "" # see https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md
profile_name: ""
# login credentials
login:
username: ""
password: ""

View File

@@ -207,7 +207,7 @@ kleinanzeigen_bot/utils/error_handlers.py:
"Aborted on user request.": "Auf Benutzeranfrage abgebrochen."
on_exception:
"%s: %s": "%s: %s"
"Unknown exception occurred (missing exception info): ex_type=%s, ex_value=%s": "Unbekannter Fehler aufgetreten (fehlende Fehlerinformation): ex_type=%s, ex_value=%s"
"Unknown exception occurred (missing exception info): ex_type=%s, ex=%s": "Unbekannter Fehler aufgetreten (fehlende Fehlerinformation): ex_type=%s, ex_value=%s"
#################################################
kleinanzeigen_bot/utils/loggers.py:
@@ -229,6 +229,117 @@ kleinanzeigen_bot/utils/dicts.py:
load_dict_from_module:
"Loading %s[%s.%s]...": "Lade %s[%s.%s]..."
#################################################
kleinanzeigen_bot/utils/pydantics.py:
#################################################
__get_message_template:
"Object has no attribute '{attribute}'": "Objekt hat kein Attribut '{attribute}'"
"Invalid JSON: {error}": "Ungültiges JSON: {error}"
"JSON input should be string, bytes or bytearray": "JSON-Eingabe sollte eine Zeichenkette, Bytes oder Bytearray sein"
"Cannot check `{method_name}` when validating from json, use a JsonOrPython validator instead": "Kann `{method_name}` beim Validieren von JSON nicht prüfen, verwende stattdessen einen JsonOrPython-Validator"
"Recursion error - cyclic reference detected": "Rekursionsfehler zirkuläre Referenz erkannt"
"Field required": "Feld erforderlich"
"Field is frozen": "Feld ist gesperrt"
"Instance is frozen": "Instanz ist gesperrt"
"Extra inputs are not permitted": "Zusätzliche Eingaben sind nicht erlaubt"
"Keys should be strings": "Schlüssel sollten Zeichenketten sein"
"Error extracting attribute: {error}": "Fehler beim Extrahieren des Attributs: {error}"
"Input should be a valid dictionary or instance of {class_name}": "Eingabe sollte ein gültiges Wörterbuch oder eine Instanz von {class_name} sein"
"Input should be a valid dictionary or object to extract fields from": "Eingabe sollte ein gültiges Wörterbuch oder Objekt sein, um Felder daraus zu extrahieren"
"Input should be a dictionary or an instance of {class_name}": "Eingabe sollte ein Wörterbuch oder eine Instanz von {class_name} sein"
"Input should be an instance of {class_name}": "Eingabe sollte eine Instanz von {class_name} sein"
"Input should be None": "Eingabe sollte None sein"
"Input should be greater than {gt}": "Eingabe sollte größer als {gt} sein"
"Input should be greater than or equal to {ge}": "Eingabe sollte größer oder gleich {ge} sein"
"Input should be less than {lt}": "Eingabe sollte kleiner als {lt} sein"
"Input should be less than or equal to {le}": "Eingabe sollte kleiner oder gleich {le} sein"
"Input should be a multiple of {multiple_of}": "Eingabe sollte ein Vielfaches von {multiple_of} sein"
"Input should be a finite number": "Eingabe sollte eine endliche Zahl sein"
"{field_type} should have at least {min_length} item{expected_plural} after validation, not {actual_length}": "{field_type} sollte nach der Validierung mindestens {min_length} Element{expected_plural} haben, nicht {actual_length}"
"{field_type} should have at most {max_length} item{expected_plural} after validation, not {actual_length}": "{field_type} sollte nach der Validierung höchstens {max_length} Element{expected_plural} haben, nicht {actual_length}"
"Input should be iterable": "Eingabe sollte iterierbar sein"
"Error iterating over object, error: {error}": "Fehler beim Iterieren des Objekts: {error}"
"Input should be a valid string": "Eingabe sollte eine gültige Zeichenkette sein"
"Input should be a string, not an instance of a subclass of str": "Eingabe sollte ein String sein, keine Instanz einer Unterklasse von str"
"Input should be a valid string, unable to parse raw data as a unicode string": "Eingabe sollte eine gültige Zeichenkette sein, Rohdaten können nicht als Unicode-String geparst werden"
"String should have at least {min_length} character{expected_plural}": "String sollte mindestens {min_length} Zeichen{expected_plural} haben"
"String should have at most {max_length} character{expected_plural}": "String sollte höchstens {max_length} Zeichen{expected_plural} haben"
"String should match pattern '{pattern}'": "String sollte dem Muster '{pattern}' entsprechen"
"Input should be {expected}": "Eingabe sollte {expected} sein"
"Input should be a valid dictionary": "Eingabe sollte ein gültiges Wörterbuch sein"
"Input should be a valid mapping, error: {error}": "Eingabe sollte eine gültige Zuordnung sein, Fehler: {error}"
"Input should be a valid list": "Eingabe sollte eine gültige Liste sein"
"Input should be a valid tuple": "Eingabe sollte ein gültiges Tupel sein"
"Input should be a valid set": "Eingabe sollte eine gültige Menge sein"
"Set items should be hashable": "Elemente einer Menge sollten hashbar sein"
"Input should be a valid boolean": "Eingabe sollte ein gültiger Boolescher Wert sein"
"Input should be a valid boolean, unable to interpret input": "Eingabe sollte ein gültiger Boolescher Wert sein, Eingabe kann nicht interpretiert werden"
"Input should be a valid integer": "Eingabe sollte eine gültige Ganzzahl sein"
"Input should be a valid integer, unable to parse string as an integer": "Eingabe sollte eine gültige Ganzzahl sein, Zeichenkette konnte nicht als Ganzzahl geparst werden"
"Input should be a valid integer, got a number with a fractional part": "Eingabe sollte eine gültige Ganzzahl sein, Zahl hat einen Dezimalteil"
"Unable to parse input string as an integer, exceeded maximum size": "Zeichenkette konnte nicht als Ganzzahl geparst werden, maximale Größe überschritten"
"Input should be a valid number": "Eingabe sollte eine gültige Zahl sein"
"Input should be a valid number, unable to parse string as a number": "Eingabe sollte eine gültige Zahl sein, Zeichenkette kann nicht als Zahl geparst werden"
"Input should be a valid bytes": "Eingabe sollte gültige Bytes sein"
"Data should have at least {min_length} byte{expected_plural}": "Daten sollten mindestens {min_length} Byte{expected_plural} enthalten"
"Data should have at most {max_length} byte{expected_plural}": "Daten sollten höchstens {max_length} Byte{expected_plural} enthalten"
"Data should be valid {encoding}: {encoding_error}": "Daten sollten gültiges {encoding} sein: {encoding_error}"
"Value error, {error}": "Wertfehler: {error}"
"Assertion failed, {error}": "Assertion fehlgeschlagen: {error}"
"Input should be a valid date": "Eingabe sollte ein gültiges Datum sein"
"Input should be a valid date in the format YYYY-MM-DD, {error}": "Eingabe sollte ein gültiges Datum im Format YYYY-MM-DD sein: {error}"
"Input should be a valid date or datetime, {error}": "Eingabe sollte ein gültiges Datum oder eine gültige Datums-Uhrzeit sein: {error}"
"Datetimes provided to dates should have zero time - e.g. be exact dates": "Datetime-Werte für Datum sollten keine Uhrzeit enthalten also exakte Daten sein"
"Date should be in the past": "Datum sollte in der Vergangenheit liegen"
"Date should be in the future": "Datum sollte in der Zukunft liegen"
"Input should be a valid time": "Eingabe sollte eine gültige Uhrzeit sein"
"Input should be in a valid time format, {error}": "Eingabe sollte in einem gültigen Zeitformat sein: {error}"
"Input should be a valid datetime": "Eingabe sollte ein gültiges Datum mit Uhrzeit sein"
"Input should be a valid datetime, {error}": "Eingabe sollte ein gültiges Datum mit Uhrzeit sein: {error}"
"Invalid datetime object, got {error}": "Ungültiges Datetime-Objekt: {error}"
"Input should be a valid datetime or date, {error}": "Eingabe sollte ein gültiges Datum oder Datum mit Uhrzeit sein: {error}"
"Input should be in the past": "Eingabe sollte in der Vergangenheit liegen"
"Input should be in the future": "Eingabe sollte in der Zukunft liegen"
"Input should not have timezone info": "Eingabe sollte keine Zeitzonen-Information enthalten"
"Input should have timezone info": "Eingabe sollte Zeitzonen-Information enthalten"
"Timezone offset of {tz_expected} required, got {tz_actual}": "Zeitzonen-Offset von {tz_expected} erforderlich, erhalten: {tz_actual}"
"Input should be a valid timedelta": "Eingabe sollte ein gültiges Zeitdelta sein"
"Input should be a valid timedelta, {error}": "Eingabe sollte ein gültiges Zeitdelta sein: {error}"
"Input should be a valid frozenset": "Eingabe sollte ein gültiges Frozenset sein"
"Input should be an instance of {class}": "Eingabe sollte eine Instanz von {class} sein"
"Input should be a subclass of {class}": "Eingabe sollte eine Unterklasse von {class} sein"
"Input should be callable": "Eingabe sollte aufrufbar sein"
"Input tag '{tag}' found using {discriminator} does not match any of the expected tags: {expected_tags}": "Eingabe-Tag '{tag}', ermittelt durch {discriminator}, stimmt mit keinem der erwarteten Tags überein: {expected_tags}"
"Unable to extract tag using discriminator {discriminator}": "Tag kann mit {discriminator} nicht extrahiert werden"
"Arguments must be a tuple, list or a dictionary": "Argumente müssen ein Tupel, eine Liste oder ein Wörterbuch sein"
"Missing required argument": "Erforderliches Argument fehlt"
"Unexpected keyword argument": "Unerwartetes Schlüsselwort-Argument"
"Missing required keyword only argument": "Erforderliches keyword-only-Argument fehlt"
"Unexpected positional argument": "Unerwartetes Positionsargument"
"Missing required positional only argument": "Erforderliches positional-only-Argument fehlt"
"Got multiple values for argument": "Mehrere Werte für Argument erhalten"
"URL input should be a string or URL": "URL-Eingabe sollte eine Zeichenkette oder URL sein"
"Input should be a valid URL, {error}": "Eingabe sollte eine gültige URL sein: {error}"
"Input violated strict URL syntax rules, {error}": "Eingabe hat strikte URL-Syntaxregeln verletzt: {error}"
"URL should have at most {max_length} character{expected_plural}": "URL sollte höchstens {max_length} Zeichen{expected_plural} haben"
"URL scheme should be {expected_schemes}": "URL-Schema sollte {expected_schemes} sein"
"UUID input should be a string, bytes or UUID object": "UUID-Eingabe sollte eine Zeichenkette, Bytes oder ein UUID-Objekt sein"
"Input should be a valid UUID, {error}": "Eingabe sollte eine gültige UUID sein: {error}"
"UUID version {expected_version} expected": "UUID-Version {expected_version} erwartet"
"Decimal input should be an integer, float, string or Decimal object": "Decimal-Eingabe sollte eine Ganzzahl, Gleitkommazahl, Zeichenkette oder ein Decimal-Objekt sein"
"Input should be a valid decimal": "Eingabe sollte ein gültiges Decimal sein"
"Decimal input should have no more than {max_digits} digit{expected_plural} in total": "Decimal-Eingabe sollte insgesamt nicht mehr als {max_digits} Ziffer{expected_plural} haben"
"Decimal input should have no more than {decimal_places} decimal place{expected_plural}": "Decimal-Eingabe sollte nicht mehr als {decimal_places} Dezimalstelle{expected_plural} haben"
"Decimal input should have no more than {whole_digits} digit{expected_plural} before the decimal point": "Decimal-Eingabe sollte vor dem Dezimalpunkt nicht mehr als {whole_digits} Ziffer{expected_plural} haben"
? "Input should be a valid python complex object, a number, or a valid complex string following the rules at https://docs.python.org/3/library/functions.html#complex"
: "Eingabe sollte ein gültiges Python-komplexes Objekt, eine Zahl oder eine gültige komplexe Zeichenkette sein, gemäß https://docs.python.org/3/library/functions.html#complex"
"Input should be a valid complex string following the rules at https://docs.python.org/3/library/functions.html#complex": "Eingabe sollte eine gültige komplexe Zeichenkette sein, gemäß https://docs.python.org/3/library/functions.html#complex"
format_validation_error:
"validation error": "Validationsfehler"
"%s for [%s]:": "%s für %s"
"' or '": "' oder '"
#################################################
kleinanzeigen_bot/utils/web_scraping_mixin.py:
#################################################

View File

@@ -58,9 +58,9 @@ def apply_defaults(
return target
def defaultdict_to_dict(d: defaultdict[K, V]) -> dict[K, V]:
def defaultdict_to_dict(d:defaultdict[K, V]) -> dict[K, V]:
"""Recursively convert defaultdict to dict."""
result: dict[K, V] = {}
result:dict[K, V] = {}
for key, value in d.items():
if isinstance(value, defaultdict):
result[key] = defaultdict_to_dict(value) # type: ignore[assignment]

View File

@@ -5,24 +5,29 @@ import sys, traceback # isort: skip
from types import FrameType, TracebackType
from typing import Final
from pydantic import ValidationError
from . import loggers
from .pydantics import format_validation_error
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
def on_exception(ex_type:type[BaseException] | None, ex_value:BaseException | None, ex_traceback:TracebackType | None) -> None:
if ex_type is None or ex_value is None:
LOG.error("Unknown exception occurred (missing exception info): ex_type=%s, ex_value=%s", ex_type, ex_value)
def on_exception(ex_type:type[BaseException] | None, ex:BaseException | None, ex_traceback:TracebackType | None) -> None:
if ex_type is None or ex is None:
LOG.error("Unknown exception occurred (missing exception info): ex_type=%s, ex=%s", ex_type, ex)
return
if issubclass(ex_type, KeyboardInterrupt):
sys.__excepthook__(ex_type, ex_value, ex_traceback)
elif loggers.is_debug(LOG) or isinstance(ex_value, (AttributeError, ImportError, NameError, TypeError)):
LOG.error("".join(traceback.format_exception(ex_type, ex_value, ex_traceback)))
elif isinstance(ex_value, AssertionError):
LOG.error(ex_value)
sys.__excepthook__(ex_type, ex, ex_traceback)
elif loggers.is_debug(LOG) or isinstance(ex, (AttributeError, ImportError, NameError, TypeError)):
LOG.error("".join(traceback.format_exception(ex_type, ex, ex_traceback)))
elif isinstance(ex, ValidationError):
LOG.error(format_validation_error(ex))
elif isinstance(ex, AssertionError):
LOG.error(ex)
else:
LOG.error("%s: %s", ex_type.__name__, ex_value)
LOG.error("%s: %s", ex_type.__name__, ex)
sys.exit(1)

View File

@@ -16,6 +16,9 @@ __all__ = [
"LogFileHandle",
"DEBUG",
"INFO",
"WARNING",
"ERROR",
"CRITICAL",
"configure_console_logging",
"configure_file_logging",
"flush_all_handlers",

View File

@@ -0,0 +1,195 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
from gettext import gettext as _
from typing import Any, cast
from pydantic import BaseModel, ValidationError
from pydantic_core import InitErrorDetails
from typing_extensions import Self
from kleinanzeigen_bot.utils.i18n import pluralize
class ContextualValidationError(ValidationError):
context:Any
class ContextualModel(BaseModel):
@classmethod
def model_validate(
cls,
obj:Any,
*,
strict:bool | None = None,
from_attributes:bool | None = None,
context:Any | None = None,
by_alias:bool | None = None,
by_name:bool | None = None,
) -> Self:
"""
Proxy to BaseModel.model_validate, but on error reraise as
ContextualValidationError including the passed context.
"""
try:
return super().model_validate(
obj,
strict = strict,
from_attributes = from_attributes,
context = context,
by_alias = by_alias,
by_name = by_name,
)
except ValidationError as ex:
new_ex = ContextualValidationError.from_exception_data(
title = ex.title,
line_errors = cast(list[InitErrorDetails], ex.errors()),
)
new_ex.context = context
raise new_ex from ex
def format_validation_error(ex:ValidationError) -> str:
"""
Turn a Pydantic ValidationError into the classic:
N validation errors for ModelName
field
message [type=code]
>>> from pydantic import BaseModel, ValidationError
>>> class M(BaseModel): x: int
>>> try:
... M(x="no-int")
... except ValidationError as e:
... print(format_validation_error(e))
1 validation error for [M]:
- x: Input should be a valid integer, unable to parse string as an integer
"""
errors = ex.errors(include_url = False, include_input = False, include_context = True)
ctx = ex.context if isinstance(ex, ContextualValidationError) and ex.context else ex.title
header = _("%s for [%s]:") % (pluralize("validation error", ex.error_count()), ctx)
lines = [header]
for err in errors:
loc = ".".join(str(p) for p in err["loc"])
msg_ctx = err.get("ctx")
code = err["type"]
msg_template = __get_message_template(code)
if msg_template:
msg = _(msg_template).format(**msg_ctx) if msg_ctx else msg_template
msg = msg.replace("' or '", _("' or '"))
lines.append(f"- {loc}: {msg}")
else:
lines.append(f"- {loc}: {err['msg']} [type={code}]")
return "\n".join(lines)
def __get_message_template(error_code:str) -> str | None:
# https://github.com/pydantic/pydantic-core/blob/d03bf4a01ca3b378cc8590bd481f307e82115bc6/src/errors/types.rs#L477
# ruff: noqa: PLR0911 Too many return statements
# ruff: noqa: PLR0912 Too many branches
# ruff: noqa: E701 Multiple statements on one line (colon)
match error_code:
case "no_such_attribute": return _("Object has no attribute '{attribute}'")
case "json_invalid": return _("Invalid JSON: {error}")
case "json_type": return _("JSON input should be string, bytes or bytearray")
case "needs_python_object": return _("Cannot check `{method_name}` when validating from json, use a JsonOrPython validator instead")
case "recursion_loop": return _("Recursion error - cyclic reference detected")
case "missing": return _("Field required")
case "frozen_field": return _("Field is frozen")
case "frozen_instance": return _("Instance is frozen")
case "extra_forbidden": return _("Extra inputs are not permitted")
case "invalid_key": return _("Keys should be strings")
case "get_attribute_error": return _("Error extracting attribute: {error}")
case "model_type": return _("Input should be a valid dictionary or instance of {class_name}")
case "model_attributes_type": return _("Input should be a valid dictionary or object to extract fields from")
case "dataclass_type": return _("Input should be a dictionary or an instance of {class_name}")
case "dataclass_exact_type": return _("Input should be an instance of {class_name}")
case "none_required": return _("Input should be None")
case "greater_than": return _("Input should be greater than {gt}")
case "greater_than_equal": return _("Input should be greater than or equal to {ge}")
case "less_than": return _("Input should be less than {lt}")
case "less_than_equal": return _("Input should be less than or equal to {le}")
case "multiple_of": return _("Input should be a multiple of {multiple_of}")
case "finite_number": return _("Input should be a finite number")
case "too_short": return _("{field_type} should have at least {min_length} item{expected_plural} after validation, not {actual_length}")
case "too_long": return _("{field_type} should have at most {max_length} item{expected_plural} after validation, not {actual_length}")
case "iterable_type": return _("Input should be iterable")
case "iteration_error": return _("Error iterating over object, error: {error}")
case "string_type": return _("Input should be a valid string")
case "string_sub_type": return _("Input should be a string, not an instance of a subclass of str")
case "string_unicode": return _("Input should be a valid string, unable to parse raw data as a unicode string")
case "string_too_short": return _("String should have at least {min_length} character{expected_plural}")
case "string_too_long": return _("String should have at most {max_length} character{expected_plural}")
case "string_pattern_mismatch": return _("String should match pattern '{pattern}'")
case "enum": return _("Input should be {expected}")
case "dict_type": return _("Input should be a valid dictionary")
case "mapping_type": return _("Input should be a valid mapping, error: {error}")
case "list_type": return _("Input should be a valid list")
case "tuple_type": return _("Input should be a valid tuple")
case "set_type": return _("Input should be a valid set")
case "set_item_not_hashable": return _("Set items should be hashable")
case "bool_type": return _("Input should be a valid boolean")
case "bool_parsing": return _("Input should be a valid boolean, unable to interpret input")
case "int_type": return _("Input should be a valid integer")
case "int_parsing": return _("Input should be a valid integer, unable to parse string as an integer")
case "int_from_float": return _("Input should be a valid integer, got a number with a fractional part")
case "int_parsing_size": return _("Unable to parse input string as an integer, exceeded maximum size")
case "float_type": return _("Input should be a valid number")
case "float_parsing": return _("Input should be a valid number, unable to parse string as a number")
case "bytes_type": return _("Input should be a valid bytes")
case "bytes_too_short": return _("Data should have at least {min_length} byte{expected_plural}")
case "bytes_too_long": return _("Data should have at most {max_length} byte{expected_plural}")
case "bytes_invalid_encoding": return _("Data should be valid {encoding}: {encoding_error}")
case "value_error": return _("Value error, {error}")
case "assertion_error": return _("Assertion failed, {error}")
case "custom_error": return None # handled separately
case "literal_error": return _("Input should be {expected}")
case "date_type": return _("Input should be a valid date")
case "date_parsing": return _("Input should be a valid date in the format YYYY-MM-DD, {error}")
case "date_from_datetime_parsing": return _("Input should be a valid date or datetime, {error}")
case "date_from_datetime_inexact": return _("Datetimes provided to dates should have zero time - e.g. be exact dates")
case "date_past": return _("Date should be in the past")
case "date_future": return _("Date should be in the future")
case "time_type": return _("Input should be a valid time")
case "time_parsing": return _("Input should be in a valid time format, {error}")
case "datetime_type": return _("Input should be a valid datetime")
case "datetime_parsing": return _("Input should be a valid datetime, {error}")
case "datetime_object_invalid": return _("Invalid datetime object, got {error}")
case "datetime_from_date_parsing": return _("Input should be a valid datetime or date, {error}")
case "datetime_past": return _("Input should be in the past")
case "datetime_future": return _("Input should be in the future")
case "timezone_naive": return _("Input should not have timezone info")
case "timezone_aware": return _("Input should have timezone info")
case "timezone_offset": return _("Timezone offset of {tz_expected} required, got {tz_actual}")
case "time_delta_type": return _("Input should be a valid timedelta")
case "time_delta_parsing": return _("Input should be a valid timedelta, {error}")
case "frozen_set_type": return _("Input should be a valid frozenset")
case "is_instance_of": return _("Input should be an instance of {class}")
case "is_subclass_of": return _("Input should be a subclass of {class}")
case "callable_type": return _("Input should be callable")
case "union_tag_invalid": return _("Input tag '{tag}' found using {discriminator} does not match any of the expected tags: {expected_tags}")
case "union_tag_not_found": return _("Unable to extract tag using discriminator {discriminator}")
case "arguments_type": return _("Arguments must be a tuple, list or a dictionary")
case "missing_argument": return _("Missing required argument")
case "unexpected_keyword_argument": return _("Unexpected keyword argument")
case "missing_keyword_only_argument": return _("Missing required keyword only argument")
case "unexpected_positional_argument": return _("Unexpected positional argument")
case "missing_positional_only_argument": return _("Missing required positional only argument")
case "multiple_argument_values": return _("Got multiple values for argument")
case "url_type": return _("URL input should be a string or URL")
case "url_parsing": return _("Input should be a valid URL, {error}")
case "url_syntax_violation": return _("Input violated strict URL syntax rules, {error}")
case "url_too_long": return _("URL should have at most {max_length} character{expected_plural}")
case "url_scheme": return _("URL scheme should be {expected_schemes}")
case "uuid_type": return _("UUID input should be a string, bytes or UUID object")
case "uuid_parsing": return _("Input should be a valid UUID, {error}")
case "uuid_version": return _("UUID version {expected_version} expected")
case "decimal_type": return _("Decimal input should be an integer, float, string or Decimal object")
case "decimal_parsing": return _("Input should be a valid decimal")
case "decimal_max_digits": return _("Decimal input should have no more than {max_digits} digit{expected_plural} in total")
case "decimal_max_places": return _("Decimal input should have no more than {decimal_places} decimal place{expected_plural}")
case "decimal_whole_digits": return _("Decimal input should have no more than {whole_digits} digit{expected_plural} before the decimal point")
case "complex_type": return _("Input should be a valid python complex object, a number, or a valid complex string following the rules at https://docs.python.org/3/library/functions.html#complex")
case "complex_str_parsing": return _("Input should be a valid complex string following the rules at https://docs.python.org/3/library/functions.html#complex")
case _: return None

View File

@@ -60,8 +60,8 @@ class BrowserConfig:
self.binary_location:str | None = None
self.extensions:Iterable[str] = []
self.use_private_window:bool = True
self.user_data_dir:str = ""
self.profile_name:str = ""
self.user_data_dir:str | None = None
self.profile_name:str | None = None
class WebScrapingMixin:

View File

@@ -9,6 +9,7 @@ import pytest
from kleinanzeigen_bot import KleinanzeigenBot
from kleinanzeigen_bot.extract import AdExtractor
from kleinanzeigen_bot.model.config_model import Config
from kleinanzeigen_bot.utils import loggers
from kleinanzeigen_bot.utils.web_scraping_mixin import Browser
@@ -29,50 +30,39 @@ def test_data_dir(tmp_path:str) -> str:
@pytest.fixture
def sample_config() -> dict[str, Any]:
def test_bot_config() -> Config:
"""Provides a basic sample configuration for testing.
This configuration includes all required fields for the bot to function:
- Login credentials (username/password)
- Browser settings
- Ad defaults (description prefix/suffix)
- Publishing settings
"""
return {
"login": {
"username": "testuser",
"password": "testpass"
},
"browser": {
"arguments": [],
"binary_location": None,
"extensions": [],
"use_private_window": True,
"user_data_dir": None,
"profile_name": None
},
return Config.model_validate({
"ad_defaults": {
"description": {
"prefix": "Test Prefix",
"suffix": "Test Suffix"
}
"contact": {
"name": "dummy_name"
},
},
"login": {
"username": "dummy_user",
"password": "dummy_password"
},
"publishing": {
"delete_old_ads": "BEFORE_PUBLISH",
"delete_old_ads_by_title": False
}
}
})
@pytest.fixture
def test_bot(sample_config:dict[str, Any]) -> KleinanzeigenBot:
"""Provides a fresh KleinanzeigenBot instance for all test classes.
def test_bot(test_bot_config:Config) -> KleinanzeigenBot:
"""Provides a fresh KleinanzeigenBot instance for all test methods.
Dependencies:
- sample_config: Used to initialize the bot with a valid configuration
- test_bot_config: Used to initialize the bot with a valid configuration
"""
bot_instance = KleinanzeigenBot()
bot_instance.config = sample_config
bot_instance.config = test_bot_config
return bot_instance
@@ -97,14 +87,14 @@ def log_file_path(test_data_dir:str) -> str:
@pytest.fixture
def test_extractor(browser_mock:MagicMock, sample_config:dict[str, Any]) -> AdExtractor:
def test_extractor(browser_mock:MagicMock, test_bot_config:Config) -> AdExtractor:
"""Provides a fresh AdExtractor instance for testing.
Dependencies:
- browser_mock: Used to mock browser interactions
- sample_config: Used to initialize the extractor with a valid configuration
- test_bot_config: Used to initialize the extractor with a valid configuration
"""
return AdExtractor(browser_mock, sample_config)
return AdExtractor(browser_mock, test_bot_config)
@pytest.fixture
@@ -174,21 +164,6 @@ def description_test_cases() -> list[tuple[dict[str, Any], str, str]]:
"Original Description",
"Original Description"
),
# Test case 6: Non-string values in config
(
{
"ad_defaults": {
"description_prefix": 123,
"description_suffix": True,
"description": {
"prefix": [],
"suffix": {}
}
}
},
"Original Description",
"Original Description"
)
]
@@ -200,3 +175,8 @@ def mock_web_text_responses() -> list[str]:
"Test Description", # Description
"03.02.2025" # Creation date
]
@pytest.fixture(autouse = True)
def silence_nodriver_logs() -> None:
loggers.get_logger("nodriver").setLevel(loggers.WARNING)

View File

@@ -6,6 +6,7 @@ from typing import Any
import pytest
from kleinanzeigen_bot import ads
from kleinanzeigen_bot.model.config_model import Config
def test_calculate_content_hash_with_none_values() -> None:
@@ -90,12 +91,6 @@ def test_calculate_content_hash_with_none_values() -> None:
True,
""
),
# Test non-string values
(
{"ad_defaults": {"description_prefix": 123, "description_suffix": True}},
True,
""
),
# Add test for malformed config
(
{}, # Empty config
@@ -108,69 +103,13 @@ def test_calculate_content_hash_with_none_values() -> None:
True,
""
),
# Test for non-dict ad_defaults
(
{"ad_defaults": "invalid"},
True,
""
),
# Test for invalid type in description field
(
{"ad_defaults": {"description": 123}},
True,
""
)
])
def test_get_description_affixes(
config:dict[str, Any],
prefix:bool,
expected:str
expected:str,
test_bot_config:Config
) -> None:
"""Test get_description_affixes function with various inputs."""
result = ads.get_description_affixes(config, prefix = prefix)
assert result == expected
@pytest.mark.parametrize(("config", "prefix", "expected"), [
# Add test for malformed config
(
{}, # Empty config
True,
""
),
# Test for missing ad_defaults
(
{"some_other_key": {}},
True,
""
),
# Test for non-dict ad_defaults
(
{"ad_defaults": "invalid"},
True,
""
),
# Test for invalid type in description field
(
{"ad_defaults": {"description": 123}},
True,
""
)
])
def test_get_description_affixes_edge_cases(config:dict[str, Any], prefix:bool, expected:str) -> None:
"""Test edge cases for description affix handling."""
assert ads.get_description_affixes(config, prefix = prefix) == expected
@pytest.mark.parametrize(("config", "expected"), [
(None, ""), # Test with None
([], ""), # Test with an empty list
("string", ""), # Test with a string
(123, ""), # Test with an integer
(3.14, ""), # Test with a float
(set(), ""), # Test with an empty set
])
def test_get_description_affixes_edge_cases_non_dict(config:Any, expected:str) -> None:
"""Test get_description_affixes function with non-dict inputs."""
result = ads.get_description_affixes(config, prefix = True)
result = ads.get_description_affixes(test_bot_config.with_values(config), prefix = prefix)
assert result == expected

View File

@@ -8,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, call, patch
import pytest
from kleinanzeigen_bot.extract import AdExtractor
from kleinanzeigen_bot.model.config_model import Config, DownloadConfig
from kleinanzeigen_bot.utils.web_scraping_mixin import Browser, By, Element
@@ -36,11 +37,11 @@ class _TestCaseDict(TypedDict): # noqa: PYI049 Private TypedDict `...` is never
class TestAdExtractorBasics:
"""Basic synchronous tests for AdExtractor."""
def test_constructor(self, browser_mock:MagicMock, sample_config:dict[str, Any]) -> None:
def test_constructor(self, browser_mock:MagicMock, test_bot_config:Config) -> None:
"""Test the constructor of AdExtractor"""
extractor = AdExtractor(browser_mock, sample_config)
extractor = AdExtractor(browser_mock, test_bot_config)
assert extractor.browser == browser_mock
assert extractor.config == sample_config
assert extractor.config == test_bot_config
@pytest.mark.parametrize(("url", "expected_id"), [
("https://www.kleinanzeigen.de/s-anzeige/test-title/12345678", 12345678),
@@ -168,7 +169,7 @@ class TestAdExtractorShipping:
}
# Enable all matching options in config
test_extractor.config["download"] = {"include_all_matching_shipping_options": True}
test_extractor.config.download = DownloadConfig.model_validate({"include_all_matching_shipping_options": True})
with patch.object(test_extractor, "page", MagicMock()), \
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"), \
@@ -202,10 +203,10 @@ class TestAdExtractorShipping:
}
# Enable all matching options and exclude DHL in config
test_extractor.config["download"] = {
test_extractor.config.download = DownloadConfig.model_validate({
"include_all_matching_shipping_options": True,
"excluded_shipping_options": ["DHL_2"]
}
})
with patch.object(test_extractor, "page", MagicMock()), \
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"), \
@@ -238,9 +239,9 @@ class TestAdExtractorShipping:
}
# Exclude the matching option
test_extractor.config["download"] = {
test_extractor.config.download = DownloadConfig.model_validate({
"excluded_shipping_options": ["Hermes_Päckchen"]
}
})
with patch.object(test_extractor, "page", MagicMock()), \
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"), \
@@ -407,13 +408,14 @@ class TestAdExtractorContent:
def extractor_with_config(self) -> AdExtractor:
"""Create extractor with specific config for testing prefix/suffix handling."""
browser_mock = MagicMock(spec = Browser)
return AdExtractor(browser_mock, {}) # Empty config, will be overridden in tests
return AdExtractor(browser_mock, Config()) # Empty config, will be overridden in tests
@pytest.mark.asyncio
async def test_extract_description_with_affixes(
self,
test_extractor:AdExtractor,
description_test_cases:list[tuple[dict[str, Any], str, str]]
description_test_cases:list[tuple[dict[str, Any], str, str]],
test_bot_config:Config
) -> None:
"""Test extraction of description with various prefix/suffix configurations."""
# Mock the page
@@ -422,7 +424,7 @@ class TestAdExtractorContent:
test_extractor.page = page_mock
for config, raw_description, _ in description_test_cases: # Changed to _ since we don't use expected_description
test_extractor.config = config
test_extractor.config = test_bot_config.with_values(config)
with patch.multiple(test_extractor,
web_text = AsyncMock(side_effect = [
@@ -483,7 +485,6 @@ class TestAdExtractorContent:
page_mock = MagicMock()
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
test_extractor.page = page_mock
test_extractor.config = {"ad_defaults": {}} # Empty config
raw_description = "Original Description"
with patch.multiple(test_extractor,
@@ -525,17 +526,17 @@ class TestAdExtractorCategory:
"""Tests for category extraction functionality."""
@pytest.fixture
def extractor(self) -> AdExtractor:
def extractor(self, test_bot_config:Config) -> AdExtractor:
browser_mock = MagicMock(spec = Browser)
config_mock = {
config = test_bot_config.with_values({
"ad_defaults": {
"description": {
"prefix": "Test Prefix",
"suffix": "Test Suffix"
}
}
}
return AdExtractor(browser_mock, config_mock)
})
return AdExtractor(browser_mock, config)
@pytest.mark.asyncio
# pylint: disable=protected-access
@@ -581,17 +582,17 @@ class TestAdExtractorContact:
"""Tests for contact information extraction."""
@pytest.fixture
def extractor(self) -> AdExtractor:
def extractor(self, test_bot_config:Config) -> AdExtractor:
browser_mock = MagicMock(spec = Browser)
config_mock = {
config = test_bot_config.with_values({
"ad_defaults": {
"description": {
"prefix": "Test Prefix",
"suffix": "Test Suffix"
}
}
}
return AdExtractor(browser_mock, config_mock)
})
return AdExtractor(browser_mock, config)
@pytest.mark.asyncio
# pylint: disable=protected-access
@@ -663,17 +664,17 @@ class TestAdExtractorDownload:
"""Tests for download functionality."""
@pytest.fixture
def extractor(self) -> AdExtractor:
def extractor(self, test_bot_config:Config) -> AdExtractor:
browser_mock = MagicMock(spec = Browser)
config_mock = {
config = test_bot_config.with_values({
"ad_defaults": {
"description": {
"prefix": "Test Prefix",
"suffix": "Test Suffix"
}
}
}
return AdExtractor(browser_mock, config_mock)
})
return AdExtractor(browser_mock, config)
@pytest.mark.asyncio
async def test_download_ad_existing_directory(self, extractor:AdExtractor) -> None:

View File

@@ -1,19 +1,22 @@
# SPDX-FileCopyrightText: © Jens Bergmann and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import copy, os, tempfile # isort: skip
import copy, io, logging, os, tempfile # isort: skip
from collections.abc import Generator
from contextlib import redirect_stdout
from datetime import timedelta
from pathlib import Path
from typing import Any
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from pydantic import ValidationError
from ruamel.yaml import YAML
from kleinanzeigen_bot import LOG, KleinanzeigenBot, misc
from kleinanzeigen_bot._version import __version__
from kleinanzeigen_bot.ads import calculate_content_hash
from kleinanzeigen_bot.model.config_model import AdDefaults, Config, PublishingConfig
from kleinanzeigen_bot.utils import loggers
@@ -150,7 +153,7 @@ class TestKleinanzeigenBotInitialization:
def test_constructor_initializes_default_values(self, test_bot:KleinanzeigenBot) -> None:
"""Verify that constructor sets all default values correctly."""
assert test_bot.root_url == "https://www.kleinanzeigen.de"
assert isinstance(test_bot.config, dict)
assert isinstance(test_bot.config, Config)
assert test_bot.command == "help"
assert test_bot.ads_selector == "due"
assert test_bot.keep_old_ads is False
@@ -218,15 +221,37 @@ class TestKleinanzeigenBotCommandLine:
def test_parse_args_handles_help_command(self, test_bot:KleinanzeigenBot) -> None:
"""Verify that help command is handled correctly."""
with pytest.raises(SystemExit) as exc_info:
buf = io.StringIO()
with pytest.raises(SystemExit) as exc_info, redirect_stdout(buf):
test_bot.parse_args(["dummy", "--help"])
assert exc_info.value.code == 0
stdout = buf.getvalue()
assert "publish" in stdout
assert "verify" in stdout
assert "help" in stdout
assert "version" in stdout
assert "--verbose" in stdout
def test_parse_args_handles_invalid_arguments(self, test_bot:KleinanzeigenBot) -> None:
def test_parse_args_handles_invalid_arguments(self, test_bot:KleinanzeigenBot, caplog:pytest.LogCaptureFixture) -> None:
"""Verify that invalid arguments are handled correctly."""
caplog.set_level(logging.ERROR)
with pytest.raises(SystemExit) as exc_info:
test_bot.parse_args(["dummy", "--invalid-option"])
assert exc_info.value.code == 2
assert any(
record.levelno == logging.ERROR
and (
"--invalid-option not recognized" in record.getMessage()
or "Option --invalid-option unbekannt" in record.getMessage()
)
for record in caplog.records
)
assert any(
("--invalid-option not recognized" in m)
or ("Option --invalid-option unbekannt" in m)
for m in caplog.messages
)
def test_parse_args_handles_verbose_flag(self, test_bot:KleinanzeigenBot) -> None:
"""Verify that verbose flag sets correct log level."""
@@ -246,109 +271,88 @@ class TestKleinanzeigenBotConfiguration:
def test_load_config_handles_missing_file(
self,
test_bot:KleinanzeigenBot,
test_data_dir:str,
sample_config:dict[str, Any]
test_data_dir:str
) -> None:
"""Verify that loading a missing config file creates default config."""
config_path = Path(test_data_dir) / "missing_config.yaml"
config_path.unlink(missing_ok = True)
test_bot.config_file_path = str(config_path)
# Add categories to sample config
sample_config_with_categories = sample_config.copy()
sample_config_with_categories["categories"] = {}
with patch.object(LOG, "warning") as mock_warning:
with pytest.raises(ValidationError) as exc_info:
test_bot.load_config()
with patch("kleinanzeigen_bot.utils.dicts.load_dict_if_exists", return_value = None), \
patch.object(LOG, "warning") as mock_warning, \
patch("kleinanzeigen_bot.utils.dicts.save_dict") as mock_save, \
patch("kleinanzeigen_bot.utils.dicts.load_dict_from_module") as mock_load_module:
mock_load_module.side_effect = [
sample_config_with_categories, # config_defaults.yaml
{"cat1": "id1"}, # categories.yaml
{"cat2": "id2"} # categories_old.yaml
]
test_bot.load_config()
mock_warning.assert_called_once()
mock_save.assert_called_once_with(str(config_path), sample_config_with_categories)
# Verify categories were loaded
assert test_bot.categories == {"cat1": "id1", "cat2": "id2"}
assert test_bot.config == sample_config_with_categories
assert config_path.exists()
assert "login.username" in str(exc_info.value)
assert "login.password" in str(exc_info.value)
def test_load_config_validates_required_fields(self, test_bot:KleinanzeigenBot, test_data_dir:str) -> None:
"""Verify that config validation checks required fields."""
config_path = Path(test_data_dir) / "config.yaml"
config_content = """
login:
username: testuser
username: dummy_user
# Missing password
browser:
arguments: []
"""
with open(config_path, "w", encoding = "utf-8") as f:
f.write(config_content)
test_bot.config_file_path = str(config_path)
with pytest.raises(AssertionError) as exc_info:
with pytest.raises(ValidationError) as exc_info:
test_bot.load_config()
assert "[login.password] not specified" in str(exc_info.value)
assert "login.username" not in str(exc_info.value)
assert "login.password" in str(exc_info.value)
class TestKleinanzeigenBotAuthentication:
"""Tests for login and authentication functionality."""
@pytest.fixture
def configured_bot(self, test_bot:KleinanzeigenBot, sample_config:dict[str, Any]) -> KleinanzeigenBot:
"""Provides a bot instance with basic configuration."""
test_bot.config = sample_config
return test_bot
@pytest.mark.asyncio
async def test_assert_free_ad_limit_not_reached_success(self, configured_bot:KleinanzeigenBot) -> None:
async def test_assert_free_ad_limit_not_reached_success(self, test_bot:KleinanzeigenBot) -> None:
"""Verify that free ad limit check succeeds when limit not reached."""
with patch.object(configured_bot, "web_find", side_effect = TimeoutError):
await configured_bot.assert_free_ad_limit_not_reached()
with patch.object(test_bot, "web_find", side_effect = TimeoutError):
await test_bot.assert_free_ad_limit_not_reached()
@pytest.mark.asyncio
async def test_assert_free_ad_limit_not_reached_limit_reached(self, configured_bot:KleinanzeigenBot) -> None:
async def test_assert_free_ad_limit_not_reached_limit_reached(self, test_bot:KleinanzeigenBot) -> None:
"""Verify that free ad limit check fails when limit is reached."""
with patch.object(configured_bot, "web_find", return_value = AsyncMock()):
with patch.object(test_bot, "web_find", return_value = AsyncMock()):
with pytest.raises(AssertionError) as exc_info:
await configured_bot.assert_free_ad_limit_not_reached()
await test_bot.assert_free_ad_limit_not_reached()
assert "Cannot publish more ads" in str(exc_info.value)
@pytest.mark.asyncio
async def test_is_logged_in_returns_true_when_logged_in(self, configured_bot:KleinanzeigenBot) -> None:
async def test_is_logged_in_returns_true_when_logged_in(self, test_bot:KleinanzeigenBot) -> None:
"""Verify that login check returns true when logged in."""
with patch.object(configured_bot, "web_text", return_value = "Welcome testuser"):
assert await configured_bot.is_logged_in() is True
with patch.object(test_bot, "web_text", return_value = "Welcome dummy_user"):
assert await test_bot.is_logged_in() is True
@pytest.mark.asyncio
async def test_is_logged_in_returns_true_with_alternative_element(self, configured_bot:KleinanzeigenBot) -> None:
async def test_is_logged_in_returns_true_with_alternative_element(self, test_bot:KleinanzeigenBot) -> None:
"""Verify that login check returns true when logged in with alternative element."""
with patch.object(configured_bot, "web_text", side_effect = [
with patch.object(test_bot, "web_text", side_effect = [
TimeoutError(), # First try with mr-medium fails
"angemeldet als: testuser" # Second try with user-email succeeds
"angemeldet als: dummy_user" # Second try with user-email succeeds
]):
assert await configured_bot.is_logged_in() is True
assert await test_bot.is_logged_in() is True
@pytest.mark.asyncio
async def test_is_logged_in_returns_false_when_not_logged_in(self, configured_bot:KleinanzeigenBot) -> None:
async def test_is_logged_in_returns_false_when_not_logged_in(self, test_bot:KleinanzeigenBot) -> None:
"""Verify that login check returns false when not logged in."""
with patch.object(configured_bot, "web_text", side_effect = TimeoutError):
assert await configured_bot.is_logged_in() is False
with patch.object(test_bot, "web_text", side_effect = TimeoutError):
assert await test_bot.is_logged_in() is False
@pytest.mark.asyncio
async def test_login_flow_completes_successfully(self, configured_bot:KleinanzeigenBot) -> None:
async def test_login_flow_completes_successfully(self, test_bot:KleinanzeigenBot) -> None:
"""Verify that normal login flow completes successfully."""
with patch.object(configured_bot, "web_open") as mock_open, \
patch.object(configured_bot, "is_logged_in", side_effect = [False, True]) as mock_logged_in, \
patch.object(configured_bot, "web_find", side_effect = TimeoutError), \
patch.object(configured_bot, "web_input") as mock_input, \
patch.object(configured_bot, "web_click") as mock_click:
with patch.object(test_bot, "web_open") as mock_open, \
patch.object(test_bot, "is_logged_in", side_effect = [False, True]) as mock_logged_in, \
patch.object(test_bot, "web_find", side_effect = TimeoutError), \
patch.object(test_bot, "web_input") as mock_input, \
patch.object(test_bot, "web_click") as mock_click:
await configured_bot.login()
await test_bot.login()
mock_open.assert_called()
mock_logged_in.assert_called()
@@ -356,14 +360,14 @@ class TestKleinanzeigenBotAuthentication:
mock_click.assert_called()
@pytest.mark.asyncio
async def test_login_flow_handles_captcha(self, configured_bot:KleinanzeigenBot) -> None:
async def test_login_flow_handles_captcha(self, test_bot:KleinanzeigenBot) -> None:
"""Verify that login flow handles captcha correctly."""
with patch.object(configured_bot, "web_open"), \
patch.object(configured_bot, "is_logged_in", return_value = False), \
patch.object(configured_bot, "web_find") as mock_find, \
patch.object(configured_bot, "web_await") as mock_await, \
patch.object(configured_bot, "web_input"), \
patch.object(configured_bot, "web_click"), \
with patch.object(test_bot, "web_open"), \
patch.object(test_bot, "is_logged_in", return_value = False), \
patch.object(test_bot, "web_find") as mock_find, \
patch.object(test_bot, "web_await") as mock_await, \
patch.object(test_bot, "web_input"), \
patch.object(test_bot, "web_click"), \
patch("kleinanzeigen_bot.ainput") as mock_ainput:
mock_find.side_effect = [
@@ -376,7 +380,7 @@ class TestKleinanzeigenBotAuthentication:
mock_await.return_value = True
mock_ainput.return_value = ""
await configured_bot.login()
await test_bot.login()
assert mock_find.call_count >= 2
mock_await.assert_called_once()
@@ -440,7 +444,7 @@ class TestKleinanzeigenBotBasics:
def test_get_config_defaults(self, test_bot:KleinanzeigenBot) -> None:
"""Test default configuration values."""
assert isinstance(test_bot.config, dict)
assert isinstance(test_bot.config, Config)
assert test_bot.command == "help"
assert test_bot.ads_selector == "due"
assert test_bot.keep_old_ads is False
@@ -578,7 +582,7 @@ login:
""")
test_bot.config_file_path = str(config_path)
await test_bot.run(["script.py", "verify"])
assert test_bot.config["login"]["username"] == "test"
assert test_bot.config.login.username == "test"
class TestKleinanzeigenBotAdOperations:
@@ -607,7 +611,7 @@ class TestKleinanzeigenBotAdOperations:
def test_load_ads_no_files(self, test_bot:KleinanzeigenBot) -> None:
"""Test loading ads with no files."""
test_bot.config["ad_files"] = ["nonexistent/*.yaml"]
test_bot.config.ad_files = ["nonexistent/*.yaml"]
ads = test_bot.load_ads()
assert len(ads) == 0
@@ -676,7 +680,7 @@ categories:
# Set config file path to tmp_path and use relative path for ad_files
test_bot.config_file_path = str(temp_path / "config.yaml")
test_bot.config["ad_files"] = ["ads/*.yaml"]
test_bot.config.ad_files = ["ads/*.yaml"]
with pytest.raises(AssertionError) as exc_info:
test_bot.load_ads()
assert "must be at least 10 characters long" in str(exc_info.value)
@@ -700,7 +704,7 @@ categories:
# Set config file path to tmp_path and use relative path for ad_files
test_bot.config_file_path = str(temp_path / "config.yaml")
test_bot.config["ad_files"] = ["ads/*.yaml"]
test_bot.config.ad_files = ["ads/*.yaml"]
with pytest.raises(AssertionError) as exc_info:
test_bot.load_ads()
assert "property [price_type] must be one of:" in str(exc_info.value)
@@ -724,7 +728,7 @@ categories:
# Set config file path to tmp_path and use relative path for ad_files
test_bot.config_file_path = str(temp_path / "config.yaml")
test_bot.config["ad_files"] = ["ads/*.yaml"]
test_bot.config.ad_files = ["ads/*.yaml"]
with pytest.raises(AssertionError) as exc_info:
test_bot.load_ads()
assert "property [shipping_type] must be one of:" in str(exc_info.value)
@@ -749,7 +753,7 @@ categories:
# Set config file path to tmp_path and use relative path for ad_files
test_bot.config_file_path = str(temp_path / "config.yaml")
test_bot.config["ad_files"] = ["ads/*.yaml"]
test_bot.config.ad_files = ["ads/*.yaml"]
with pytest.raises(AssertionError) as exc_info:
test_bot.load_ads()
assert "must not be specified for GIVE_AWAY ad" in str(exc_info.value)
@@ -774,7 +778,7 @@ categories:
# Set config file path to tmp_path and use relative path for ad_files
test_bot.config_file_path = str(temp_path / "config.yaml")
test_bot.config["ad_files"] = ["ads/*.yaml"]
test_bot.config.ad_files = ["ads/*.yaml"]
with pytest.raises(AssertionError) as exc_info:
test_bot.load_ads()
assert "not specified" in str(exc_info.value)
@@ -794,12 +798,7 @@ categories:
)
# Mock the config to prevent auto-detection
test_bot.config["ad_defaults"] = {
"description": {
"prefix": "",
"suffix": ""
}
}
test_bot.config.ad_defaults = AdDefaults()
yaml = YAML()
with open(ad_file, "w", encoding = "utf-8") as f:
@@ -807,7 +806,7 @@ categories:
# Set config file path to tmp_path and use relative path for ad_files
test_bot.config_file_path = str(temp_path / "config.yaml")
test_bot.config["ad_files"] = ["ads/*.yaml"]
test_bot.config.ad_files = ["ads/*.yaml"]
with pytest.raises(AssertionError) as exc_info:
test_bot.load_ads()
assert "property [description] not specified" in str(exc_info.value)
@@ -876,12 +875,12 @@ class TestKleinanzeigenBotAdRepublication:
def test_check_ad_republication_with_changes(self, test_bot:KleinanzeigenBot, base_ad_config:dict[str, Any]) -> None:
"""Test that ads with changes are marked for republication."""
# Mock the description config to prevent modification of the description
test_bot.config["ad_defaults"] = {
test_bot.config.ad_defaults = AdDefaults.model_validate({
"description": {
"prefix": "",
"suffix": ""
}
}
})
# Create ad config with all necessary fields for republication
ad_cfg = create_ad_config(
@@ -905,7 +904,7 @@ class TestKleinanzeigenBotAdRepublication:
# Set config file path and use relative path for ad_files
test_bot.config_file_path = str(temp_path / "config.yaml")
test_bot.config["ad_files"] = ["ads/*.yaml"]
test_bot.config.ad_files = ["ads/*.yaml"]
# Mock the loading of the original ad configuration
with patch("kleinanzeigen_bot.utils.dicts.load_dict", side_effect = [
@@ -934,7 +933,7 @@ class TestKleinanzeigenBotAdRepublication:
ad_cfg_orig["content_hash"] = current_hash
# Mock the config to prevent actual file operations
test_bot.config["ad_files"] = ["test.yaml"]
test_bot.config.ad_files = ["test.yaml"]
with patch("kleinanzeigen_bot.utils.dicts.load_dict_if_exists", return_value = ad_cfg_orig), \
patch("kleinanzeigen_bot.utils.dicts.load_dict", return_value = {}): # Mock ad_fields.yaml
ads_to_publish = test_bot.load_ads()
@@ -966,10 +965,10 @@ class TestKleinanzeigenBotShippingOptions:
published_ads:list[dict[str, Any]] = []
# Set up default config values needed for the test
test_bot.config["publishing"] = {
test_bot.config.publishing = PublishingConfig.model_validate({
"delete_old_ads": "BEFORE_PUBLISH",
"delete_old_ads_by_title": False
}
})
# Create temporary file path
ad_file = Path(tmp_path) / "test_ad.yaml"
@@ -1046,25 +1045,27 @@ class TestKleinanzeigenBotPrefixSuffix:
def test_description_prefix_suffix_handling(
self,
test_bot:KleinanzeigenBot,
test_bot_config:Config,
description_test_cases:list[tuple[dict[str, Any], str, str]]
) -> None:
"""Test handling of description prefix/suffix in various configurations."""
for config, raw_description, expected_description in description_test_cases:
test_bot.config = config
test_bot = KleinanzeigenBot()
test_bot.config = test_bot_config.with_values(config)
ad_cfg = {"description": raw_description, "active": True}
# Access private method using the correct name mangling
description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True)
assert description == expected_description
def test_description_length_validation(self, test_bot:KleinanzeigenBot) -> None:
def test_description_length_validation(self, test_bot_config:Config) -> None:
"""Test that long descriptions with affixes raise appropriate error."""
test_bot.config = {
test_bot = KleinanzeigenBot()
test_bot.config = test_bot_config.with_values({
"ad_defaults": {
"description_prefix": "P" * 1000,
"description_suffix": "S" * 1000
}
}
})
ad_cfg = {
"description": "D" * 2001, # This plus affixes will exceed 4000 chars
"active": True
@@ -1080,14 +1081,10 @@ class TestKleinanzeigenBotPrefixSuffix:
class TestKleinanzeigenBotDescriptionHandling:
"""Tests for description handling functionality."""
def test_description_without_main_config_description(self, test_bot:KleinanzeigenBot) -> None:
def test_description_without_main_config_description(self, test_bot_config:Config) -> None:
"""Test that description works correctly when description is missing from main config."""
# Set up config without any description fields
test_bot.config = {
"ad_defaults": {
# No description field at all
}
}
test_bot = KleinanzeigenBot()
test_bot.config = test_bot_config
# Test with a simple ad config
ad_cfg = {
@@ -1099,14 +1096,15 @@ class TestKleinanzeigenBotDescriptionHandling:
description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True)
assert description == "Test Description"
def test_description_with_only_new_format_affixes(self, test_bot:KleinanzeigenBot) -> None:
def test_description_with_only_new_format_affixes(self, test_bot_config:Config) -> None:
"""Test that description works with only new format affixes in config."""
test_bot.config = {
test_bot = KleinanzeigenBot()
test_bot.config = test_bot_config.with_values({
"ad_defaults": {
"description_prefix": "Prefix: ",
"description_suffix": " :Suffix"
}
}
})
ad_cfg = {
"description": "Test Description",
@@ -1116,9 +1114,10 @@ class TestKleinanzeigenBotDescriptionHandling:
description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True)
assert description == "Prefix: Test Description :Suffix"
def test_description_with_mixed_config_formats(self, test_bot:KleinanzeigenBot) -> None:
def test_description_with_mixed_config_formats(self, test_bot_config:Config) -> None:
"""Test that description works with both old and new format affixes in config."""
test_bot.config = {
test_bot = KleinanzeigenBot()
test_bot.config = test_bot_config.with_values({
"ad_defaults": {
"description_prefix": "New Prefix: ",
"description_suffix": " :New Suffix",
@@ -1127,7 +1126,7 @@ class TestKleinanzeigenBotDescriptionHandling:
"suffix": " :Old Suffix"
}
}
}
})
ad_cfg = {
"description": "Test Description",
@@ -1137,14 +1136,15 @@ class TestKleinanzeigenBotDescriptionHandling:
description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True)
assert description == "New Prefix: Test Description :New Suffix"
def test_description_with_ad_level_affixes(self, test_bot:KleinanzeigenBot) -> None:
def test_description_with_ad_level_affixes(self, test_bot_config:Config) -> None:
"""Test that ad-level affixes take precedence over config affixes."""
test_bot.config = {
test_bot = KleinanzeigenBot()
test_bot.config = test_bot_config.with_values({
"ad_defaults": {
"description_prefix": "Config Prefix: ",
"description_suffix": " :Config Suffix"
}
}
})
ad_cfg = {
"description": "Test Description",
@@ -1156,9 +1156,10 @@ class TestKleinanzeigenBotDescriptionHandling:
description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True)
assert description == "Ad Prefix: Test Description :Ad Suffix"
def test_description_with_none_values(self, test_bot:KleinanzeigenBot) -> None:
def test_description_with_none_values(self, test_bot_config:Config) -> None:
"""Test that None values in affixes are handled correctly."""
test_bot.config = {
test_bot = KleinanzeigenBot()
test_bot.config = test_bot_config.with_values({
"ad_defaults": {
"description_prefix": None,
"description_suffix": None,
@@ -1167,7 +1168,7 @@ class TestKleinanzeigenBotDescriptionHandling:
"suffix": None
}
}
}
})
ad_cfg = {
"description": "Test Description",
@@ -1177,11 +1178,10 @@ class TestKleinanzeigenBotDescriptionHandling:
description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True)
assert description == "Test Description"
def test_description_with_email_replacement(self, test_bot:KleinanzeigenBot) -> None:
def test_description_with_email_replacement(self, test_bot_config:Config) -> None:
"""Test that @ symbols in description are replaced with (at)."""
test_bot.config = {
"ad_defaults": {}
}
test_bot = KleinanzeigenBot()
test_bot.config = test_bot_config
ad_cfg = {
"description": "Contact: test@example.com",
@@ -1195,16 +1195,19 @@ class TestKleinanzeigenBotDescriptionHandling:
class TestKleinanzeigenBotChangedAds:
"""Tests for the 'changed' ads selector functionality."""
def test_load_ads_with_changed_selector(self, test_bot:KleinanzeigenBot, base_ad_config:dict[str, Any]) -> None:
def test_load_ads_with_changed_selector(self, test_bot_config:Config, base_ad_config:dict[str, Any]) -> None:
"""Test that only changed ads are loaded when using the 'changed' selector."""
# Set up the bot with the 'changed' selector
test_bot = KleinanzeigenBot()
test_bot.ads_selector = "changed"
test_bot.config["ad_defaults"] = {
"description": {
"prefix": "",
"suffix": ""
test_bot.config = test_bot_config.with_values({
"ad_defaults": {
"description": {
"prefix": "",
"suffix": ""
}
}
}
})
# Create a changed ad
changed_ad = create_ad_config(
@@ -1237,7 +1240,7 @@ class TestKleinanzeigenBotChangedAds:
# Set config file path and use relative path for ad_files
test_bot.config_file_path = str(temp_path / "config.yaml")
test_bot.config["ad_files"] = ["ads/*.yaml"]
test_bot.config.ad_files = ["ads/*.yaml"]
# Mock the loading of the ad configuration
with patch("kleinanzeigen_bot.utils.dicts.load_dict", side_effect = [
@@ -1254,12 +1257,6 @@ class TestKleinanzeigenBotChangedAds:
"""Test that 'due' selector includes all ads that are due for republication, regardless of changes."""
# Set up the bot with the 'due' selector
test_bot.ads_selector = "due"
test_bot.config["ad_defaults"] = {
"description": {
"prefix": "",
"suffix": ""
}
}
# Create a changed ad that is also due for republication
current_time = misc.now()
@@ -1289,7 +1286,7 @@ class TestKleinanzeigenBotChangedAds:
# Set config file path and use relative path for ad_files
test_bot.config_file_path = str(temp_path / "config.yaml")
test_bot.config["ad_files"] = ["ads/*.yaml"]
test_bot.config.ad_files = ["ads/*.yaml"]
# Mock the loading of the ad configuration
with patch("kleinanzeigen_bot.utils.dicts.load_dict", side_effect = [