feat: add type safe Config model

This commit is contained in:
sebthom
2025-05-14 00:30:59 +02:00
committed by Sebastian Thomschke
parent e7a3d46d25
commit 1369da1c34
21 changed files with 1132 additions and 389 deletions

View File

@@ -15,6 +15,7 @@ from wcmatch import glob
from . import extract, resources
from ._version import __version__
from .ads import MAX_DESCRIPTION_LENGTH, calculate_content_hash, get_description_affixes
from .model.config_model import Config
from .utils import dicts, error_handlers, loggers, misc
from .utils.exceptions import CaptchaEncountered
from .utils.files import abspath
@@ -42,7 +43,7 @@ class KleinanzeigenBot(WebScrapingMixin):
self.root_url = "https://www.kleinanzeigen.de"
self.config:dict[str, Any] = {}
self.config:Config
self.config_file_path = abspath("config.yaml")
self.categories:dict[str, str] = {}
@@ -325,7 +326,7 @@ class KleinanzeigenBot(WebScrapingMixin):
ad_files:dict[str, str] = {}
data_root_dir = os.path.dirname(self.config_file_path)
for file_pattern in self.config["ad_files"]:
for file_pattern in self.config.ad_files:
for ad_file in glob.glob(file_pattern, root_dir = data_root_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB):
if not str(ad_file).endswith("ad_fields.yaml"):
ad_files[abspath(ad_file, relative_to = data_root_dir)] = ad_file
@@ -349,7 +350,7 @@ class KleinanzeigenBot(WebScrapingMixin):
ad_cfg_orig = dicts.load_dict(ad_file, "ad")
ad_cfg = copy.deepcopy(ad_cfg_orig)
dicts.apply_defaults(ad_cfg,
self.config["ad_defaults"],
self.config.ad_defaults.model_dump(),
ignore = lambda k, _: k == "description",
override = lambda _, v: v == "" # noqa: PLC1901 can be simplified to `not v` as an empty string is falsey
)
@@ -462,40 +463,44 @@ class KleinanzeigenBot(WebScrapingMixin):
return ads
def load_config(self) -> None:
config_defaults = dicts.load_dict_from_module(resources, "config_defaults.yaml")
config = dicts.load_dict_if_exists(self.config_file_path, _("config"))
if config is None:
# write default config.yaml if config file does not exist
if not os.path.exists(self.config_file_path):
LOG.warning("Config file %s does not exist. Creating it with default values...", self.config_file_path)
dicts.save_dict(self.config_file_path, config_defaults)
config = {}
default_config = Config.model_construct()
default_config.login.username = ""
default_config.login.password = ""
dicts.save_dict(self.config_file_path, default_config.model_dump(exclude_none = True, exclude = {
"ad_defaults": {
"description" # deprecated
}
}), header = "# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/config.schema.json")
self.config = dicts.apply_defaults(config, config_defaults)
config_yaml = dicts.load_dict_if_exists(self.config_file_path, _("config"))
self.config = Config.model_validate(config_yaml, strict = True, context = self.config_file_path)
# load built-in category mappings
self.categories = dicts.load_dict_from_module(resources, "categories.yaml", "categories")
deprecated_categories = dicts.load_dict_from_module(resources, "categories_old.yaml", "categories")
self.categories.update(deprecated_categories)
if self.config["categories"]:
self.categories.update(self.config["categories"])
if self.config.categories:
self.categories.update(self.config.categories)
LOG.info(" -> found %s", pluralize("category", self.categories))
ensure(self.config["login"]["username"], f"[login.username] not specified @ [{self.config_file_path}]")
ensure(self.config["login"]["password"], f"[login.password] not specified @ [{self.config_file_path}]")
self.browser_config.arguments = self.config["browser"]["arguments"]
self.browser_config.binary_location = self.config["browser"]["binary_location"]
self.browser_config.extensions = [abspath(item, relative_to = self.config_file_path) for item in self.config["browser"]["extensions"]]
self.browser_config.use_private_window = self.config["browser"]["use_private_window"]
if self.config["browser"]["user_data_dir"]:
self.browser_config.user_data_dir = abspath(self.config["browser"]["user_data_dir"], relative_to = self.config_file_path)
self.browser_config.profile_name = self.config["browser"]["profile_name"]
# populate browser_config object used by WebScrapingMixin
self.browser_config.arguments = self.config.browser.arguments
self.browser_config.binary_location = self.config.browser.binary_location
self.browser_config.extensions = [abspath(item, relative_to = self.config_file_path) for item in self.config.browser.extensions]
self.browser_config.use_private_window = self.config.browser.use_private_window
if self.config.browser.user_data_dir:
self.browser_config.user_data_dir = abspath(self.config.browser.user_data_dir, relative_to = self.config_file_path)
self.browser_config.profile_name = self.config.browser.profile_name
async def login(self) -> None:
LOG.info("Checking if already logged in...")
await self.web_open(f"{self.root_url}")
if await self.is_logged_in():
LOG.info("Already logged in as [%s]. Skipping login.", self.config["login"]["username"])
LOG.info("Already logged in as [%s]. Skipping login.", self.config.login.username)
return
LOG.info("Opening login page...")
@@ -519,9 +524,9 @@ class KleinanzeigenBot(WebScrapingMixin):
await self.handle_after_login_logic()
async def fill_login_data_and_send(self) -> None:
LOG.info("Logging in as [%s]...", self.config["login"]["username"])
await self.web_input(By.ID, "email", self.config["login"]["username"])
await self.web_input(By.ID, "password", self.config["login"]["password"])
LOG.info("Logging in as [%s]...", self.config.login.username)
await self.web_input(By.ID, "email", self.config.login.username)
await self.web_input(By.ID, "password", self.config.login.password)
await self.web_click(By.CSS_SELECTOR, "form#login-form button[type='submit']")
async def handle_after_login_logic(self) -> None:
@@ -546,13 +551,13 @@ class KleinanzeigenBot(WebScrapingMixin):
try:
# Try to find the standard element first
user_info = await self.web_text(By.CLASS_NAME, "mr-medium")
if self.config["login"]["username"].lower() in user_info.lower():
if self.config.login.username.lower() in user_info.lower():
return True
except TimeoutError:
try:
# If standard element not found, try the alternative
user_info = await self.web_text(By.ID, "user-email")
if self.config["login"]["username"].lower() in user_info.lower():
if self.config.login.username.lower() in user_info.lower():
return True
except TimeoutError:
return False
@@ -567,7 +572,7 @@ class KleinanzeigenBot(WebScrapingMixin):
for (ad_file, ad_cfg, _ad_cfg_orig) in ad_cfgs:
count += 1
LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg["title"], ad_file)
await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = self.config["publishing"]["delete_old_ads_by_title"])
await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = self.config.publishing.delete_old_ads_by_title)
await self.web_sleep()
LOG.info("############################################")
@@ -624,7 +629,7 @@ class KleinanzeigenBot(WebScrapingMixin):
await self.publish_ad(ad_file, ad_cfg, ad_cfg_orig, published_ads)
await self.web_await(lambda: self.web_check(By.ID, "checking-done", Is.DISPLAYED), timeout = 5 * 60)
if self.config["publishing"]["delete_old_ads"] == "AFTER_PUBLISH" and not self.keep_old_ads:
if self.config.publishing.delete_old_ads == "AFTER_PUBLISH" and not self.keep_old_ads:
await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = False)
LOG.info("############################################")
@@ -639,8 +644,8 @@ class KleinanzeigenBot(WebScrapingMixin):
"""
await self.assert_free_ad_limit_not_reached()
if self.config["publishing"]["delete_old_ads"] == "BEFORE_PUBLISH" and not self.keep_old_ads:
await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = self.config["publishing"]["delete_old_ads_by_title"])
if self.config.publishing.delete_old_ads == "BEFORE_PUBLISH" and not self.keep_old_ads:
await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = self.config.publishing.delete_old_ads_by_title)
LOG.info("Publishing ad '%s'...", ad_cfg["title"])
@@ -779,9 +784,9 @@ class KleinanzeigenBot(WebScrapingMixin):
"iframe[name^='a-'][src^='https://www.google.com/recaptcha/api2/anchor?']",
timeout = 2)
if self.config.get("captcha", {}).get("auto_restart", False):
if self.config.captcha.auto_restart:
LOG.warning("Captcha recognized - auto-restart enabled, abort run...")
raise CaptchaEncountered(misc.parse_duration(self.config.get("captcha", {}).get("restart_delay", "6h")))
raise CaptchaEncountered(misc.parse_duration(self.config.captcha.restart_delay))
# Fallback: manuell
LOG.warning("############################################")
@@ -1036,7 +1041,7 @@ class KleinanzeigenBot(WebScrapingMixin):
async def assert_free_ad_limit_not_reached(self) -> None:
try:
await self.web_find(By.XPATH, "/html/body/div[1]/form/fieldset[6]/div[1]/header", timeout = 2)
raise AssertionError(f"Cannot publish more ads. The monthly limit of free ads of account {self.config['login']['username']} is reached.")
raise AssertionError(f"Cannot publish more ads. The monthly limit of free ads of account {self.config.login.username} is reached.")
except TimeoutError:
pass

View File

@@ -4,7 +4,7 @@
import hashlib, json, os # isort: skip
from typing import Any, Final
from .utils import dicts
from .model.config_model import Config
MAX_DESCRIPTION_LENGTH:Final[int] = 4000
@@ -40,7 +40,7 @@ def calculate_content_hash(ad_cfg:dict[str, Any]) -> str:
return hashlib.sha256(content_str.encode()).hexdigest()
def get_description_affixes(config:dict[str, Any], *, prefix:bool = True) -> str:
def get_description_affixes(config:Config, *, prefix:bool = True) -> str:
"""Get prefix or suffix for description with proper precedence.
This function handles both the new flattened format and legacy nested format:
@@ -65,24 +65,21 @@ def get_description_affixes(config:dict[str, Any], *, prefix:bool = True) -> str
Example:
>>> config = {"ad_defaults": {"description_prefix": "Hello", "description": {"prefix": "Hi"}}}
>>> get_description_affixes(config, prefix=True)
>>> get_description_affixes(Config.model_validate(config), prefix=True)
'Hello'
"""
# Handle edge cases
if not isinstance(config, dict):
return ""
affix_type = "prefix" if prefix else "suffix"
# First try new flattened format (description_prefix/description_suffix)
flattened_key = f"description_{affix_type}"
flattened_value = dicts.safe_get(config, "ad_defaults", flattened_key)
flattened_value = getattr(config.ad_defaults, flattened_key)
if isinstance(flattened_value, str):
return flattened_value
# Then try legacy nested format (description.prefix/description.suffix)
nested_value = dicts.safe_get(config, "ad_defaults", "description", affix_type)
if isinstance(nested_value, str):
return nested_value
if config.ad_defaults.description:
nested_value = getattr(config.ad_defaults.description, affix_type)
if isinstance(nested_value, str):
return nested_value
return ""

View File

@@ -7,6 +7,7 @@ from datetime import datetime
from typing import Any, Final
from .ads import calculate_content_hash, get_description_affixes
from .model.config_model import Config
from .utils import dicts, i18n, loggers, misc, reflect
from .utils.web_scraping_mixin import Browser, By, Element, WebScrapingMixin
@@ -22,7 +23,7 @@ class AdExtractor(WebScrapingMixin):
Wrapper class for ad extraction that uses an active bot´s browser session to extract specific elements from an ad page.
"""
def __init__(self, browser:Browser, config:dict[str, Any]) -> None:
def __init__(self, browser:Browser, config:Config) -> None:
super().__init__()
self.browser = browser
self.config = config
@@ -432,11 +433,8 @@ class AdExtractor(WebScrapingMixin):
# Convert Euro to cents and round to nearest integer
price_in_cent = round(ship_costs * 100)
# Get excluded shipping options from config
excluded_options = self.config.get("download", {}).get("excluded_shipping_options", [])
# If include_all_matching_shipping_options is enabled, get all options for the same package size
if self.config.get("download", {}).get("include_all_matching_shipping_options", False):
if self.config.download.include_all_matching_shipping_options:
# Find all options with the same price to determine the package size
matching_options = [opt for opt in shipping_costs if opt["priceInEuroCent"] == price_in_cent]
if not matching_options:
@@ -451,7 +449,7 @@ class AdExtractor(WebScrapingMixin):
for opt in shipping_costs
if opt["packageSize"] == matching_size
and opt["id"] in shipping_option_mapping
and shipping_option_mapping[opt["id"]] not in excluded_options
and shipping_option_mapping[opt["id"]] not in self.config.download.excluded_shipping_options
]
else:
# Only use the matching option if it's not excluded
@@ -460,7 +458,7 @@ class AdExtractor(WebScrapingMixin):
return "NOT_APPLICABLE", ship_costs, shipping_options
shipping_option = shipping_option_mapping.get(matching_option["id"])
if not shipping_option or shipping_option in excluded_options:
if not shipping_option or shipping_option in self.config.download.excluded_shipping_options:
return "NOT_APPLICABLE", ship_costs, shipping_options
shipping_options = [shipping_option]

View File

View File

@@ -0,0 +1,144 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
from __future__ import annotations
import copy
from typing import Any, Dict, List, Literal
from pydantic import Field, model_validator, validator
from typing_extensions import deprecated
from kleinanzeigen_bot.utils import dicts
from kleinanzeigen_bot.utils.pydantics import ContextualModel
class ContactDefaults(ContextualModel):
name:str | None = None
street:str | None = None
zipcode:int | str | None = None
phone:str | None = None
@deprecated("Use description_prefix/description_suffix instead")
class DescriptionAffixes(ContextualModel):
prefix:str | None = None
suffix:str | None = None
class AdDefaults(ContextualModel):
active:bool = True
type:Literal["OFFER", "WANTED"] = "OFFER"
description:DescriptionAffixes | None = None
description_prefix:str | None = Field(default = None, description = "prefix for the ad description")
description_suffix:str | None = Field(default = None, description = " suffix for the ad description")
price_type:Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"] = "NEGOTIABLE"
shipping_type:Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"] = "SHIPPING"
sell_directly:bool = Field(default = False, description = "requires shipping_type SHIPPING to take effect")
contact:ContactDefaults = Field(default_factory = ContactDefaults)
republication_interval:int = 7
@model_validator(mode = "before")
@classmethod
def unify_description(cls, values:Dict[str, Any]) -> Dict[str, Any]:
# Ensure flat prefix/suffix take precedence over deprecated nested "description"
desc = values.get("description")
flat_prefix = values.get("description_prefix")
flat_suffix = values.get("description_suffix")
if not flat_prefix and isinstance(desc, dict) and desc.get("prefix") is not None:
values["description_prefix"] = desc.get("prefix", "")
if not flat_suffix and isinstance(desc, dict) and desc.get("suffix") is not None:
values["description_suffix"] = desc.get("suffix", "")
return values
class DownloadConfig(ContextualModel):
include_all_matching_shipping_options:bool = Field(
default = False,
description = "if true, all shipping options matching the package size will be included"
)
excluded_shipping_options:List[str] = Field(
default_factory = list,
description = "list of shipping options to exclude, e.g. ['DHL_2', 'DHL_5']"
)
class BrowserConfig(ContextualModel):
arguments:List[str] = Field(
default_factory = list,
description = "See https://peter.sh/experiments/chromium-command-line-switches/"
)
binary_location:str | None = Field(
default = None,
description = "path to custom browser executable, if not specified will be looked up on PATH"
)
extensions:List[str] = Field(
default_factory = list,
description = "a list of .crx extension files to be loaded"
)
use_private_window:bool = True
user_data_dir:str | None = Field(
default = None,
description = "See https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md"
)
profile_name:str | None = None
class LoginConfig(ContextualModel):
username:str = Field(..., min_length = 1)
password:str = Field(..., min_length = 1)
class PublishingConfig(ContextualModel):
delete_old_ads:Literal["BEFORE_PUBLISH", "AFTER_PUBLISH", "NEVER"] | None = "AFTER_PUBLISH"
delete_old_ads_by_title:bool = Field(default = True, description = "only works if delete_old_ads is set to BEFORE_PUBLISH")
class CaptchaConfig(ContextualModel):
auto_restart:bool = False
restart_delay:str = "6h"
class Config(ContextualModel):
ad_files:List[str] = Field(
default_factory = lambda: ["./**/ad_*.{json,yml,yaml}"],
min_items = 1,
description = """
glob (wildcard) patterns to select ad configuration files
if relative paths are specified, then they are relative to this configuration file
"""
) # type: ignore[call-overload]
ad_defaults:AdDefaults = Field(
default_factory = AdDefaults,
description = "Default values for ads, can be overwritten in each ad configuration file"
)
categories:Dict[str, str] = Field(default_factory = dict, description = """
additional name to category ID mappings, see default list at
https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml
Example:
categories:
Elektronik > Notebooks: 161/278
Jobs > Praktika: 102/125
""")
download:DownloadConfig = Field(default_factory = DownloadConfig)
publishing:PublishingConfig = Field(default_factory = PublishingConfig)
browser:BrowserConfig = Field(default_factory = BrowserConfig, description = "Browser configuration")
login:LoginConfig = Field(default_factory = LoginConfig.model_construct, description = "Login credentials")
captcha:CaptchaConfig = Field(default_factory = CaptchaConfig)
def with_values(self, values:dict[str, Any]) -> Config:
return Config.model_validate(
dicts.apply_defaults(copy.deepcopy(values), defaults = self.model_dump())
)
@validator("ad_files", each_item = True)
@classmethod
def _non_empty_glob_pattern(cls, v:str) -> str:
if not v.strip():
raise ValueError("ad_files entries must be non-empty glob patterns")
return v

View File

@@ -1,52 +0,0 @@
ad_files:
- "./**/ad_*.{json,yml,yaml}"
# default values for ads, can be overwritten in each ad configuration file
ad_defaults:
active: true
type: OFFER # one of: OFFER, WANTED
description_prefix: "" # prefix for the ad description
description_suffix: "" # suffix for the ad description
price_type: NEGOTIABLE # one of: FIXED, NEGOTIABLE, GIVE_AWAY, NOT_APPLICABLE
shipping_type: SHIPPING # one of: PICKUP, SHIPPING, NOT_APPLICABLE
sell_directly: false # requires shipping_options to take effect
contact:
name: ""
street: ""
zipcode:
phone: "" # IMPORTANT: surround phone number with quotes to prevent removal of leading zeros
republication_interval: 7 # every X days ads should be re-published
# additional name to category ID mappings, see default list at
# https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml
#
# categories:
# Elektronik > Notebooks: 161/278
# Jobs > Praktika: 102/125
categories: {}
download:
# if true, all shipping options matching the package size will be included
include_all_matching_shipping_options: false
# list of shipping options to exclude, e.g. ["DHL_2", "DHL_5"]
excluded_shipping_options: []
publishing:
delete_old_ads: "AFTER_PUBLISH" # one of: AFTER_PUBLISH, BEFORE_PUBLISH, NEVER
delete_old_ads_by_title: true # only works if delete_old_ads is set to BEFORE_PUBLISH
# browser configuration
browser:
# https://peter.sh/experiments/chromium-command-line-switches/
arguments: []
binary_location: # path to custom browser executable, if not specified will be looked up on PATH
extensions: [] # a list of .crx extension files to be loaded
use_private_window: true
user_data_dir: "" # see https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md
profile_name: ""
# login credentials
login:
username: ""
password: ""

View File

@@ -207,7 +207,7 @@ kleinanzeigen_bot/utils/error_handlers.py:
"Aborted on user request.": "Auf Benutzeranfrage abgebrochen."
on_exception:
"%s: %s": "%s: %s"
"Unknown exception occurred (missing exception info): ex_type=%s, ex_value=%s": "Unbekannter Fehler aufgetreten (fehlende Fehlerinformation): ex_type=%s, ex_value=%s"
"Unknown exception occurred (missing exception info): ex_type=%s, ex=%s": "Unbekannter Fehler aufgetreten (fehlende Fehlerinformation): ex_type=%s, ex_value=%s"
#################################################
kleinanzeigen_bot/utils/loggers.py:
@@ -229,6 +229,117 @@ kleinanzeigen_bot/utils/dicts.py:
load_dict_from_module:
"Loading %s[%s.%s]...": "Lade %s[%s.%s]..."
#################################################
kleinanzeigen_bot/utils/pydantics.py:
#################################################
__get_message_template:
"Object has no attribute '{attribute}'": "Objekt hat kein Attribut '{attribute}'"
"Invalid JSON: {error}": "Ungültiges JSON: {error}"
"JSON input should be string, bytes or bytearray": "JSON-Eingabe sollte eine Zeichenkette, Bytes oder Bytearray sein"
"Cannot check `{method_name}` when validating from json, use a JsonOrPython validator instead": "Kann `{method_name}` beim Validieren von JSON nicht prüfen, verwende stattdessen einen JsonOrPython-Validator"
"Recursion error - cyclic reference detected": "Rekursionsfehler zirkuläre Referenz erkannt"
"Field required": "Feld erforderlich"
"Field is frozen": "Feld ist gesperrt"
"Instance is frozen": "Instanz ist gesperrt"
"Extra inputs are not permitted": "Zusätzliche Eingaben sind nicht erlaubt"
"Keys should be strings": "Schlüssel sollten Zeichenketten sein"
"Error extracting attribute: {error}": "Fehler beim Extrahieren des Attributs: {error}"
"Input should be a valid dictionary or instance of {class_name}": "Eingabe sollte ein gültiges Wörterbuch oder eine Instanz von {class_name} sein"
"Input should be a valid dictionary or object to extract fields from": "Eingabe sollte ein gültiges Wörterbuch oder Objekt sein, um Felder daraus zu extrahieren"
"Input should be a dictionary or an instance of {class_name}": "Eingabe sollte ein Wörterbuch oder eine Instanz von {class_name} sein"
"Input should be an instance of {class_name}": "Eingabe sollte eine Instanz von {class_name} sein"
"Input should be None": "Eingabe sollte None sein"
"Input should be greater than {gt}": "Eingabe sollte größer als {gt} sein"
"Input should be greater than or equal to {ge}": "Eingabe sollte größer oder gleich {ge} sein"
"Input should be less than {lt}": "Eingabe sollte kleiner als {lt} sein"
"Input should be less than or equal to {le}": "Eingabe sollte kleiner oder gleich {le} sein"
"Input should be a multiple of {multiple_of}": "Eingabe sollte ein Vielfaches von {multiple_of} sein"
"Input should be a finite number": "Eingabe sollte eine endliche Zahl sein"
"{field_type} should have at least {min_length} item{expected_plural} after validation, not {actual_length}": "{field_type} sollte nach der Validierung mindestens {min_length} Element{expected_plural} haben, nicht {actual_length}"
"{field_type} should have at most {max_length} item{expected_plural} after validation, not {actual_length}": "{field_type} sollte nach der Validierung höchstens {max_length} Element{expected_plural} haben, nicht {actual_length}"
"Input should be iterable": "Eingabe sollte iterierbar sein"
"Error iterating over object, error: {error}": "Fehler beim Iterieren des Objekts: {error}"
"Input should be a valid string": "Eingabe sollte eine gültige Zeichenkette sein"
"Input should be a string, not an instance of a subclass of str": "Eingabe sollte ein String sein, keine Instanz einer Unterklasse von str"
"Input should be a valid string, unable to parse raw data as a unicode string": "Eingabe sollte eine gültige Zeichenkette sein, Rohdaten können nicht als Unicode-String geparst werden"
"String should have at least {min_length} character{expected_plural}": "String sollte mindestens {min_length} Zeichen{expected_plural} haben"
"String should have at most {max_length} character{expected_plural}": "String sollte höchstens {max_length} Zeichen{expected_plural} haben"
"String should match pattern '{pattern}'": "String sollte dem Muster '{pattern}' entsprechen"
"Input should be {expected}": "Eingabe sollte {expected} sein"
"Input should be a valid dictionary": "Eingabe sollte ein gültiges Wörterbuch sein"
"Input should be a valid mapping, error: {error}": "Eingabe sollte eine gültige Zuordnung sein, Fehler: {error}"
"Input should be a valid list": "Eingabe sollte eine gültige Liste sein"
"Input should be a valid tuple": "Eingabe sollte ein gültiges Tupel sein"
"Input should be a valid set": "Eingabe sollte eine gültige Menge sein"
"Set items should be hashable": "Elemente einer Menge sollten hashbar sein"
"Input should be a valid boolean": "Eingabe sollte ein gültiger Boolescher Wert sein"
"Input should be a valid boolean, unable to interpret input": "Eingabe sollte ein gültiger Boolescher Wert sein, Eingabe kann nicht interpretiert werden"
"Input should be a valid integer": "Eingabe sollte eine gültige Ganzzahl sein"
"Input should be a valid integer, unable to parse string as an integer": "Eingabe sollte eine gültige Ganzzahl sein, Zeichenkette konnte nicht als Ganzzahl geparst werden"
"Input should be a valid integer, got a number with a fractional part": "Eingabe sollte eine gültige Ganzzahl sein, Zahl hat einen Dezimalteil"
"Unable to parse input string as an integer, exceeded maximum size": "Zeichenkette konnte nicht als Ganzzahl geparst werden, maximale Größe überschritten"
"Input should be a valid number": "Eingabe sollte eine gültige Zahl sein"
"Input should be a valid number, unable to parse string as a number": "Eingabe sollte eine gültige Zahl sein, Zeichenkette kann nicht als Zahl geparst werden"
"Input should be a valid bytes": "Eingabe sollte gültige Bytes sein"
"Data should have at least {min_length} byte{expected_plural}": "Daten sollten mindestens {min_length} Byte{expected_plural} enthalten"
"Data should have at most {max_length} byte{expected_plural}": "Daten sollten höchstens {max_length} Byte{expected_plural} enthalten"
"Data should be valid {encoding}: {encoding_error}": "Daten sollten gültiges {encoding} sein: {encoding_error}"
"Value error, {error}": "Wertfehler: {error}"
"Assertion failed, {error}": "Assertion fehlgeschlagen: {error}"
"Input should be a valid date": "Eingabe sollte ein gültiges Datum sein"
"Input should be a valid date in the format YYYY-MM-DD, {error}": "Eingabe sollte ein gültiges Datum im Format YYYY-MM-DD sein: {error}"
"Input should be a valid date or datetime, {error}": "Eingabe sollte ein gültiges Datum oder eine gültige Datums-Uhrzeit sein: {error}"
"Datetimes provided to dates should have zero time - e.g. be exact dates": "Datetime-Werte für Datum sollten keine Uhrzeit enthalten also exakte Daten sein"
"Date should be in the past": "Datum sollte in der Vergangenheit liegen"
"Date should be in the future": "Datum sollte in der Zukunft liegen"
"Input should be a valid time": "Eingabe sollte eine gültige Uhrzeit sein"
"Input should be in a valid time format, {error}": "Eingabe sollte in einem gültigen Zeitformat sein: {error}"
"Input should be a valid datetime": "Eingabe sollte ein gültiges Datum mit Uhrzeit sein"
"Input should be a valid datetime, {error}": "Eingabe sollte ein gültiges Datum mit Uhrzeit sein: {error}"
"Invalid datetime object, got {error}": "Ungültiges Datetime-Objekt: {error}"
"Input should be a valid datetime or date, {error}": "Eingabe sollte ein gültiges Datum oder Datum mit Uhrzeit sein: {error}"
"Input should be in the past": "Eingabe sollte in der Vergangenheit liegen"
"Input should be in the future": "Eingabe sollte in der Zukunft liegen"
"Input should not have timezone info": "Eingabe sollte keine Zeitzonen-Information enthalten"
"Input should have timezone info": "Eingabe sollte Zeitzonen-Information enthalten"
"Timezone offset of {tz_expected} required, got {tz_actual}": "Zeitzonen-Offset von {tz_expected} erforderlich, erhalten: {tz_actual}"
"Input should be a valid timedelta": "Eingabe sollte ein gültiges Zeitdelta sein"
"Input should be a valid timedelta, {error}": "Eingabe sollte ein gültiges Zeitdelta sein: {error}"
"Input should be a valid frozenset": "Eingabe sollte ein gültiges Frozenset sein"
"Input should be an instance of {class}": "Eingabe sollte eine Instanz von {class} sein"
"Input should be a subclass of {class}": "Eingabe sollte eine Unterklasse von {class} sein"
"Input should be callable": "Eingabe sollte aufrufbar sein"
"Input tag '{tag}' found using {discriminator} does not match any of the expected tags: {expected_tags}": "Eingabe-Tag '{tag}', ermittelt durch {discriminator}, stimmt mit keinem der erwarteten Tags überein: {expected_tags}"
"Unable to extract tag using discriminator {discriminator}": "Tag kann mit {discriminator} nicht extrahiert werden"
"Arguments must be a tuple, list or a dictionary": "Argumente müssen ein Tupel, eine Liste oder ein Wörterbuch sein"
"Missing required argument": "Erforderliches Argument fehlt"
"Unexpected keyword argument": "Unerwartetes Schlüsselwort-Argument"
"Missing required keyword only argument": "Erforderliches keyword-only-Argument fehlt"
"Unexpected positional argument": "Unerwartetes Positionsargument"
"Missing required positional only argument": "Erforderliches positional-only-Argument fehlt"
"Got multiple values for argument": "Mehrere Werte für Argument erhalten"
"URL input should be a string or URL": "URL-Eingabe sollte eine Zeichenkette oder URL sein"
"Input should be a valid URL, {error}": "Eingabe sollte eine gültige URL sein: {error}"
"Input violated strict URL syntax rules, {error}": "Eingabe hat strikte URL-Syntaxregeln verletzt: {error}"
"URL should have at most {max_length} character{expected_plural}": "URL sollte höchstens {max_length} Zeichen{expected_plural} haben"
"URL scheme should be {expected_schemes}": "URL-Schema sollte {expected_schemes} sein"
"UUID input should be a string, bytes or UUID object": "UUID-Eingabe sollte eine Zeichenkette, Bytes oder ein UUID-Objekt sein"
"Input should be a valid UUID, {error}": "Eingabe sollte eine gültige UUID sein: {error}"
"UUID version {expected_version} expected": "UUID-Version {expected_version} erwartet"
"Decimal input should be an integer, float, string or Decimal object": "Decimal-Eingabe sollte eine Ganzzahl, Gleitkommazahl, Zeichenkette oder ein Decimal-Objekt sein"
"Input should be a valid decimal": "Eingabe sollte ein gültiges Decimal sein"
"Decimal input should have no more than {max_digits} digit{expected_plural} in total": "Decimal-Eingabe sollte insgesamt nicht mehr als {max_digits} Ziffer{expected_plural} haben"
"Decimal input should have no more than {decimal_places} decimal place{expected_plural}": "Decimal-Eingabe sollte nicht mehr als {decimal_places} Dezimalstelle{expected_plural} haben"
"Decimal input should have no more than {whole_digits} digit{expected_plural} before the decimal point": "Decimal-Eingabe sollte vor dem Dezimalpunkt nicht mehr als {whole_digits} Ziffer{expected_plural} haben"
? "Input should be a valid python complex object, a number, or a valid complex string following the rules at https://docs.python.org/3/library/functions.html#complex"
: "Eingabe sollte ein gültiges Python-komplexes Objekt, eine Zahl oder eine gültige komplexe Zeichenkette sein, gemäß https://docs.python.org/3/library/functions.html#complex"
"Input should be a valid complex string following the rules at https://docs.python.org/3/library/functions.html#complex": "Eingabe sollte eine gültige komplexe Zeichenkette sein, gemäß https://docs.python.org/3/library/functions.html#complex"
format_validation_error:
"validation error": "Validationsfehler"
"%s for [%s]:": "%s für %s"
"' or '": "' oder '"
#################################################
kleinanzeigen_bot/utils/web_scraping_mixin.py:
#################################################

View File

@@ -58,9 +58,9 @@ def apply_defaults(
return target
def defaultdict_to_dict(d: defaultdict[K, V]) -> dict[K, V]:
def defaultdict_to_dict(d:defaultdict[K, V]) -> dict[K, V]:
"""Recursively convert defaultdict to dict."""
result: dict[K, V] = {}
result:dict[K, V] = {}
for key, value in d.items():
if isinstance(value, defaultdict):
result[key] = defaultdict_to_dict(value) # type: ignore[assignment]

View File

@@ -5,24 +5,29 @@ import sys, traceback # isort: skip
from types import FrameType, TracebackType
from typing import Final
from pydantic import ValidationError
from . import loggers
from .pydantics import format_validation_error
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
def on_exception(ex_type:type[BaseException] | None, ex_value:BaseException | None, ex_traceback:TracebackType | None) -> None:
if ex_type is None or ex_value is None:
LOG.error("Unknown exception occurred (missing exception info): ex_type=%s, ex_value=%s", ex_type, ex_value)
def on_exception(ex_type:type[BaseException] | None, ex:BaseException | None, ex_traceback:TracebackType | None) -> None:
if ex_type is None or ex is None:
LOG.error("Unknown exception occurred (missing exception info): ex_type=%s, ex=%s", ex_type, ex)
return
if issubclass(ex_type, KeyboardInterrupt):
sys.__excepthook__(ex_type, ex_value, ex_traceback)
elif loggers.is_debug(LOG) or isinstance(ex_value, (AttributeError, ImportError, NameError, TypeError)):
LOG.error("".join(traceback.format_exception(ex_type, ex_value, ex_traceback)))
elif isinstance(ex_value, AssertionError):
LOG.error(ex_value)
sys.__excepthook__(ex_type, ex, ex_traceback)
elif loggers.is_debug(LOG) or isinstance(ex, (AttributeError, ImportError, NameError, TypeError)):
LOG.error("".join(traceback.format_exception(ex_type, ex, ex_traceback)))
elif isinstance(ex, ValidationError):
LOG.error(format_validation_error(ex))
elif isinstance(ex, AssertionError):
LOG.error(ex)
else:
LOG.error("%s: %s", ex_type.__name__, ex_value)
LOG.error("%s: %s", ex_type.__name__, ex)
sys.exit(1)

View File

@@ -16,6 +16,9 @@ __all__ = [
"LogFileHandle",
"DEBUG",
"INFO",
"WARNING",
"ERROR",
"CRITICAL",
"configure_console_logging",
"configure_file_logging",
"flush_all_handlers",

View File

@@ -0,0 +1,195 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
from gettext import gettext as _
from typing import Any, cast
from pydantic import BaseModel, ValidationError
from pydantic_core import InitErrorDetails
from typing_extensions import Self
from kleinanzeigen_bot.utils.i18n import pluralize
class ContextualValidationError(ValidationError):
context:Any
class ContextualModel(BaseModel):
@classmethod
def model_validate(
cls,
obj:Any,
*,
strict:bool | None = None,
from_attributes:bool | None = None,
context:Any | None = None,
by_alias:bool | None = None,
by_name:bool | None = None,
) -> Self:
"""
Proxy to BaseModel.model_validate, but on error reraise as
ContextualValidationError including the passed context.
"""
try:
return super().model_validate(
obj,
strict = strict,
from_attributes = from_attributes,
context = context,
by_alias = by_alias,
by_name = by_name,
)
except ValidationError as ex:
new_ex = ContextualValidationError.from_exception_data(
title = ex.title,
line_errors = cast(list[InitErrorDetails], ex.errors()),
)
new_ex.context = context
raise new_ex from ex
def format_validation_error(ex:ValidationError) -> str:
"""
Turn a Pydantic ValidationError into the classic:
N validation errors for ModelName
field
message [type=code]
>>> from pydantic import BaseModel, ValidationError
>>> class M(BaseModel): x: int
>>> try:
... M(x="no-int")
... except ValidationError as e:
... print(format_validation_error(e))
1 validation error for [M]:
- x: Input should be a valid integer, unable to parse string as an integer
"""
errors = ex.errors(include_url = False, include_input = False, include_context = True)
ctx = ex.context if isinstance(ex, ContextualValidationError) and ex.context else ex.title
header = _("%s for [%s]:") % (pluralize("validation error", ex.error_count()), ctx)
lines = [header]
for err in errors:
loc = ".".join(str(p) for p in err["loc"])
msg_ctx = err.get("ctx")
code = err["type"]
msg_template = __get_message_template(code)
if msg_template:
msg = _(msg_template).format(**msg_ctx) if msg_ctx else msg_template
msg = msg.replace("' or '", _("' or '"))
lines.append(f"- {loc}: {msg}")
else:
lines.append(f"- {loc}: {err['msg']} [type={code}]")
return "\n".join(lines)
def __get_message_template(error_code:str) -> str | None:
# https://github.com/pydantic/pydantic-core/blob/d03bf4a01ca3b378cc8590bd481f307e82115bc6/src/errors/types.rs#L477
# ruff: noqa: PLR0911 Too many return statements
# ruff: noqa: PLR0912 Too many branches
# ruff: noqa: E701 Multiple statements on one line (colon)
match error_code:
case "no_such_attribute": return _("Object has no attribute '{attribute}'")
case "json_invalid": return _("Invalid JSON: {error}")
case "json_type": return _("JSON input should be string, bytes or bytearray")
case "needs_python_object": return _("Cannot check `{method_name}` when validating from json, use a JsonOrPython validator instead")
case "recursion_loop": return _("Recursion error - cyclic reference detected")
case "missing": return _("Field required")
case "frozen_field": return _("Field is frozen")
case "frozen_instance": return _("Instance is frozen")
case "extra_forbidden": return _("Extra inputs are not permitted")
case "invalid_key": return _("Keys should be strings")
case "get_attribute_error": return _("Error extracting attribute: {error}")
case "model_type": return _("Input should be a valid dictionary or instance of {class_name}")
case "model_attributes_type": return _("Input should be a valid dictionary or object to extract fields from")
case "dataclass_type": return _("Input should be a dictionary or an instance of {class_name}")
case "dataclass_exact_type": return _("Input should be an instance of {class_name}")
case "none_required": return _("Input should be None")
case "greater_than": return _("Input should be greater than {gt}")
case "greater_than_equal": return _("Input should be greater than or equal to {ge}")
case "less_than": return _("Input should be less than {lt}")
case "less_than_equal": return _("Input should be less than or equal to {le}")
case "multiple_of": return _("Input should be a multiple of {multiple_of}")
case "finite_number": return _("Input should be a finite number")
case "too_short": return _("{field_type} should have at least {min_length} item{expected_plural} after validation, not {actual_length}")
case "too_long": return _("{field_type} should have at most {max_length} item{expected_plural} after validation, not {actual_length}")
case "iterable_type": return _("Input should be iterable")
case "iteration_error": return _("Error iterating over object, error: {error}")
case "string_type": return _("Input should be a valid string")
case "string_sub_type": return _("Input should be a string, not an instance of a subclass of str")
case "string_unicode": return _("Input should be a valid string, unable to parse raw data as a unicode string")
case "string_too_short": return _("String should have at least {min_length} character{expected_plural}")
case "string_too_long": return _("String should have at most {max_length} character{expected_plural}")
case "string_pattern_mismatch": return _("String should match pattern '{pattern}'")
case "enum": return _("Input should be {expected}")
case "dict_type": return _("Input should be a valid dictionary")
case "mapping_type": return _("Input should be a valid mapping, error: {error}")
case "list_type": return _("Input should be a valid list")
case "tuple_type": return _("Input should be a valid tuple")
case "set_type": return _("Input should be a valid set")
case "set_item_not_hashable": return _("Set items should be hashable")
case "bool_type": return _("Input should be a valid boolean")
case "bool_parsing": return _("Input should be a valid boolean, unable to interpret input")
case "int_type": return _("Input should be a valid integer")
case "int_parsing": return _("Input should be a valid integer, unable to parse string as an integer")
case "int_from_float": return _("Input should be a valid integer, got a number with a fractional part")
case "int_parsing_size": return _("Unable to parse input string as an integer, exceeded maximum size")
case "float_type": return _("Input should be a valid number")
case "float_parsing": return _("Input should be a valid number, unable to parse string as a number")
case "bytes_type": return _("Input should be a valid bytes")
case "bytes_too_short": return _("Data should have at least {min_length} byte{expected_plural}")
case "bytes_too_long": return _("Data should have at most {max_length} byte{expected_plural}")
case "bytes_invalid_encoding": return _("Data should be valid {encoding}: {encoding_error}")
case "value_error": return _("Value error, {error}")
case "assertion_error": return _("Assertion failed, {error}")
case "custom_error": return None # handled separately
case "literal_error": return _("Input should be {expected}")
case "date_type": return _("Input should be a valid date")
case "date_parsing": return _("Input should be a valid date in the format YYYY-MM-DD, {error}")
case "date_from_datetime_parsing": return _("Input should be a valid date or datetime, {error}")
case "date_from_datetime_inexact": return _("Datetimes provided to dates should have zero time - e.g. be exact dates")
case "date_past": return _("Date should be in the past")
case "date_future": return _("Date should be in the future")
case "time_type": return _("Input should be a valid time")
case "time_parsing": return _("Input should be in a valid time format, {error}")
case "datetime_type": return _("Input should be a valid datetime")
case "datetime_parsing": return _("Input should be a valid datetime, {error}")
case "datetime_object_invalid": return _("Invalid datetime object, got {error}")
case "datetime_from_date_parsing": return _("Input should be a valid datetime or date, {error}")
case "datetime_past": return _("Input should be in the past")
case "datetime_future": return _("Input should be in the future")
case "timezone_naive": return _("Input should not have timezone info")
case "timezone_aware": return _("Input should have timezone info")
case "timezone_offset": return _("Timezone offset of {tz_expected} required, got {tz_actual}")
case "time_delta_type": return _("Input should be a valid timedelta")
case "time_delta_parsing": return _("Input should be a valid timedelta, {error}")
case "frozen_set_type": return _("Input should be a valid frozenset")
case "is_instance_of": return _("Input should be an instance of {class}")
case "is_subclass_of": return _("Input should be a subclass of {class}")
case "callable_type": return _("Input should be callable")
case "union_tag_invalid": return _("Input tag '{tag}' found using {discriminator} does not match any of the expected tags: {expected_tags}")
case "union_tag_not_found": return _("Unable to extract tag using discriminator {discriminator}")
case "arguments_type": return _("Arguments must be a tuple, list or a dictionary")
case "missing_argument": return _("Missing required argument")
case "unexpected_keyword_argument": return _("Unexpected keyword argument")
case "missing_keyword_only_argument": return _("Missing required keyword only argument")
case "unexpected_positional_argument": return _("Unexpected positional argument")
case "missing_positional_only_argument": return _("Missing required positional only argument")
case "multiple_argument_values": return _("Got multiple values for argument")
case "url_type": return _("URL input should be a string or URL")
case "url_parsing": return _("Input should be a valid URL, {error}")
case "url_syntax_violation": return _("Input violated strict URL syntax rules, {error}")
case "url_too_long": return _("URL should have at most {max_length} character{expected_plural}")
case "url_scheme": return _("URL scheme should be {expected_schemes}")
case "uuid_type": return _("UUID input should be a string, bytes or UUID object")
case "uuid_parsing": return _("Input should be a valid UUID, {error}")
case "uuid_version": return _("UUID version {expected_version} expected")
case "decimal_type": return _("Decimal input should be an integer, float, string or Decimal object")
case "decimal_parsing": return _("Input should be a valid decimal")
case "decimal_max_digits": return _("Decimal input should have no more than {max_digits} digit{expected_plural} in total")
case "decimal_max_places": return _("Decimal input should have no more than {decimal_places} decimal place{expected_plural}")
case "decimal_whole_digits": return _("Decimal input should have no more than {whole_digits} digit{expected_plural} before the decimal point")
case "complex_type": return _("Input should be a valid python complex object, a number, or a valid complex string following the rules at https://docs.python.org/3/library/functions.html#complex")
case "complex_str_parsing": return _("Input should be a valid complex string following the rules at https://docs.python.org/3/library/functions.html#complex")
case _: return None

View File

@@ -60,8 +60,8 @@ class BrowserConfig:
self.binary_location:str | None = None
self.extensions:Iterable[str] = []
self.use_private_window:bool = True
self.user_data_dir:str = ""
self.profile_name:str = ""
self.user_data_dir:str | None = None
self.profile_name:str | None = None
class WebScrapingMixin: