refact: reorganize utility modules

This commit is contained in:
sebthom
2025-02-10 06:23:17 +01:00
parent e8d342dc68
commit 2402ba2572
21 changed files with 734 additions and 638 deletions

View File

@@ -17,16 +17,18 @@ import certifi, colorama, nodriver
from ruamel.yaml import YAML from ruamel.yaml import YAML
from wcmatch import glob from wcmatch import glob
from . import utils, resources, extract from . import extract, resources
from .i18n import Locale, get_current_locale, set_current_locale, get_translating_logger, pluralize from .ads import calculate_content_hash
from .utils import abspath, ainput, apply_defaults, ensure, is_frozen, safe_get, parse_datetime, calculate_content_hash from .utils import dicts, error_handlers, loggers, misc
from .web_scraping_mixin import By, Element, Page, Is, WebScrapingMixin from .utils.files import abspath
from .utils.i18n import Locale, get_current_locale, set_current_locale, pluralize
from .utils.misc import ainput, ensure, is_frozen, parse_datetime, parse_decimal
from .utils.web_scraping_mixin import By, Element, Page, Is, WebScrapingMixin
from ._version import __version__ from ._version import __version__
# W0406: possibly a bug, see https://github.com/PyCQA/pylint/issues/3933 # W0406: possibly a bug, see https://github.com/PyCQA/pylint/issues/3933
LOG_ROOT:Final[logging.Logger] = logging.getLogger() LOG:Final[logging.Logger] = loggers.get_logger(__name__)
LOG:Final[logging.Logger] = get_translating_logger(__name__)
LOG.setLevel(logging.INFO) LOG.setLevel(logging.INFO)
colorama.just_fix_windows_console() colorama.just_fix_windows_console()
@@ -59,7 +61,8 @@ class KleinanzeigenBot(WebScrapingMixin):
def __del__(self) -> None: def __del__(self) -> None:
if self.file_log: if self.file_log:
LOG_ROOT.removeHandler(self.file_log) self.file_log.flush()
loggers.LOG_ROOT.removeHandler(self.file_log)
self.file_log.close() self.file_log.close()
self.close_browser_session() self.close_browser_session()
@@ -258,7 +261,7 @@ class KleinanzeigenBot(WebScrapingMixin):
self.file_log = RotatingFileHandler(filename = self.log_file_path, maxBytes = 10 * 1024 * 1024, backupCount = 10, encoding = "utf-8") self.file_log = RotatingFileHandler(filename = self.log_file_path, maxBytes = 10 * 1024 * 1024, backupCount = 10, encoding = "utf-8")
self.file_log.setLevel(logging.DEBUG) self.file_log.setLevel(logging.DEBUG)
self.file_log.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")) self.file_log.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s"))
LOG_ROOT.addHandler(self.file_log) loggers.LOG_ROOT.addHandler(self.file_log)
LOG.info("App version: %s", self.get_version()) LOG.info("App version: %s", self.get_version())
LOG.info("Python version: %s", sys.version) LOG.info("Python version: %s", sys.version)
@@ -333,13 +336,13 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.info('Start fetch task for the ad(s) with id(s):') LOG.info('Start fetch task for the ad(s) with id(s):')
LOG.info(' | '.join([str(id_) for id_ in ids])) LOG.info(' | '.join([str(id_) for id_ in ids]))
ad_fields = utils.load_dict_from_module(resources, "ad_fields.yaml") ad_fields = dicts.load_dict_from_module(resources, "ad_fields.yaml")
ads = [] ads = []
for ad_file, ad_file_relative in sorted(ad_files.items()): for ad_file, ad_file_relative in sorted(ad_files.items()):
ad_cfg_orig = utils.load_dict(ad_file, "ad") ad_cfg_orig = dicts.load_dict(ad_file, "ad")
ad_cfg = copy.deepcopy(ad_cfg_orig) ad_cfg = copy.deepcopy(ad_cfg_orig)
apply_defaults(ad_cfg, self.config["ad_defaults"], ignore = lambda k, _: k == "description", override = lambda _, v: v == "") dicts.apply_defaults(ad_cfg, self.config["ad_defaults"], ignore = lambda k, _: k == "description", override = lambda _, v: v == "")
apply_defaults(ad_cfg, ad_fields) dicts.apply_defaults(ad_cfg, ad_fields)
if ignore_inactive and not ad_cfg["active"]: if ignore_inactive and not ad_cfg["active"]:
LOG.info(" -> SKIPPED: inactive ad [%s]", ad_file_relative) LOG.info(" -> SKIPPED: inactive ad [%s]", ad_file_relative)
@@ -365,13 +368,13 @@ class KleinanzeigenBot(WebScrapingMixin):
# pylint: disable=cell-var-from-loop # pylint: disable=cell-var-from-loop
def assert_one_of(path:str, allowed:Iterable[str]) -> None: def assert_one_of(path:str, allowed:Iterable[str]) -> None:
ensure(safe_get(ad_cfg, *path.split(".")) in allowed, f"-> property [{path}] must be one of: {allowed} @ [{ad_file}]") ensure(dicts.safe_get(ad_cfg, *path.split(".")) in allowed, f"-> property [{path}] must be one of: {allowed} @ [{ad_file}]")
def assert_min_len(path:str, minlen:int) -> None: def assert_min_len(path:str, minlen:int) -> None:
ensure(len(safe_get(ad_cfg, *path.split("."))) >= minlen, f"-> property [{path}] must be at least {minlen} characters long @ [{ad_file}]") ensure(len(dicts.safe_get(ad_cfg, *path.split("."))) >= minlen, f"-> property [{path}] must be at least {minlen} characters long @ [{ad_file}]")
def assert_has_value(path:str) -> None: def assert_has_value(path:str) -> None:
ensure(safe_get(ad_cfg, *path.split(".")), f"-> property [{path}] not specified @ [{ad_file}]") ensure(dicts.safe_get(ad_cfg, *path.split(".")), f"-> property [{path}] not specified @ [{ad_file}]")
# pylint: enable=cell-var-from-loop # pylint: enable=cell-var-from-loop
assert_one_of("type", {"OFFER", "WANTED"}) assert_one_of("type", {"OFFER", "WANTED"})
@@ -379,7 +382,7 @@ class KleinanzeigenBot(WebScrapingMixin):
assert_has_value("description") assert_has_value("description")
assert_one_of("price_type", {"FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"}) assert_one_of("price_type", {"FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"})
if ad_cfg["price_type"] == "GIVE_AWAY": if ad_cfg["price_type"] == "GIVE_AWAY":
ensure(not safe_get(ad_cfg, "price"), f"-> [price] must not be specified for GIVE_AWAY ad @ [{ad_file}]") ensure(not dicts.safe_get(ad_cfg, "price"), f"-> [price] must not be specified for GIVE_AWAY ad @ [{ad_file}]")
elif ad_cfg["price_type"] == "FIXED": elif ad_cfg["price_type"] == "FIXED":
assert_has_value("price") assert_has_value("price")
@@ -405,7 +408,7 @@ class KleinanzeigenBot(WebScrapingMixin):
ad_cfg["category"] = resolved_category_id ad_cfg["category"] = resolved_category_id
if ad_cfg["shipping_costs"]: if ad_cfg["shipping_costs"]:
ad_cfg["shipping_costs"] = str(round(utils.parse_decimal(ad_cfg["shipping_costs"]), 2)) ad_cfg["shipping_costs"] = str(round(misc.parse_decimal(ad_cfg["shipping_costs"]), 2))
if ad_cfg["images"]: if ad_cfg["images"]:
images = [] images = []
@@ -433,18 +436,18 @@ class KleinanzeigenBot(WebScrapingMixin):
return ads return ads
def load_config(self) -> None: def load_config(self) -> None:
config_defaults = utils.load_dict_from_module(resources, "config_defaults.yaml") config_defaults = dicts.load_dict_from_module(resources, "config_defaults.yaml")
config = utils.load_dict_if_exists(self.config_file_path, _("config")) config = dicts.load_dict_if_exists(self.config_file_path, _("config"))
if config is None: if config is None:
LOG.warning("Config file %s does not exist. Creating it with default values...", self.config_file_path) LOG.warning("Config file %s does not exist. Creating it with default values...", self.config_file_path)
utils.save_dict(self.config_file_path, config_defaults) dicts.save_dict(self.config_file_path, config_defaults)
config = {} config = {}
self.config = apply_defaults(config, config_defaults) self.config = dicts.apply_defaults(config, config_defaults)
self.categories = utils.load_dict_from_module(resources, "categories.yaml", "categories") self.categories = dicts.load_dict_from_module(resources, "categories.yaml", "categories")
deprecated_categories = utils.load_dict_from_module(resources, "categories_old.yaml", "categories") deprecated_categories = dicts.load_dict_from_module(resources, "categories_old.yaml", "categories")
self.categories.update(deprecated_categories) self.categories.update(deprecated_categories)
if self.config["categories"]: if self.config["categories"]:
self.categories.update(self.config["categories"]) self.categories.update(self.config["categories"])
@@ -675,7 +678,7 @@ class KleinanzeigenBot(WebScrapingMixin):
await self.web_select(By.CSS_SELECTOR, "select#price-type-react, select#micro-frontend-price-type, select#priceType", price_type) await self.web_select(By.CSS_SELECTOR, "select#price-type-react, select#micro-frontend-price-type, select#priceType", price_type)
except TimeoutError: except TimeoutError:
pass pass
if safe_get(ad_cfg, "price"): if dicts.safe_get(ad_cfg, "price"):
await self.web_input(By.CSS_SELECTOR, "input#post-ad-frontend-price, input#micro-frontend-price, input#pstad-price", ad_cfg["price"]) await self.web_input(By.CSS_SELECTOR, "input#post-ad-frontend-price, input#micro-frontend-price, input#pstad-price", ad_cfg["price"])
############################# #############################
@@ -797,7 +800,7 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.info(" -> SUCCESS: ad published with ID %s", ad_id) LOG.info(" -> SUCCESS: ad published with ID %s", ad_id)
utils.save_dict(ad_file, ad_cfg_orig) dicts.save_dict(ad_file, ad_cfg_orig)
async def __set_condition(self, condition_value: str) -> None: async def __set_condition(self, condition_value: str) -> None:
condition_mapping = { condition_mapping = {
@@ -1047,11 +1050,11 @@ def main(args:list[str]) -> None:
https://github.com/Second-Hand-Friends/kleinanzeigen-bot https://github.com/Second-Hand-Friends/kleinanzeigen-bot
""")[1:], flush = True) # [1:] removes the first empty blank line """)[1:], flush = True) # [1:] removes the first empty blank line
utils.configure_console_logging() loggers.configure_console_logging()
signal.signal(signal.SIGINT, utils.on_sigint) # capture CTRL+C signal.signal(signal.SIGINT, error_handlers.on_sigint) # capture CTRL+C
sys.excepthook = utils.on_exception sys.excepthook = error_handlers.on_exception
atexit.register(utils.on_exit) atexit.register(loggers.flush_all_handlers)
bot = KleinanzeigenBot() bot = KleinanzeigenBot()
atexit.register(bot.close_browser_session) atexit.register(bot.close_browser_session)
@@ -1059,6 +1062,6 @@ def main(args:list[str]) -> None:
if __name__ == "__main__": if __name__ == "__main__":
utils.configure_console_logging() loggers.configure_console_logging()
LOG.error("Direct execution not supported. Use 'pdm run app'") LOG.error("Direct execution not supported. Use 'pdm run app'")
sys.exit(1) sys.exit(1)

View File

@@ -0,0 +1,38 @@
"""
SPDX-FileCopyrightText: © Jens Bergman and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import json, os, hashlib
from typing import Any
def calculate_content_hash(ad_cfg: dict[str, Any]) -> str:
"""Calculate a hash for user-modifiable fields of the ad."""
# Relevant fields for the hash
content = {
"active": bool(ad_cfg.get("active", True)), # Explicitly convert to bool
"type": str(ad_cfg.get("type", "")), # Explicitly convert to string
"title": str(ad_cfg.get("title", "")),
"description": str(ad_cfg.get("description", "")),
"category": str(ad_cfg.get("category", "")),
"price": str(ad_cfg.get("price", "")), # Price always as string
"price_type": str(ad_cfg.get("price_type", "")),
"special_attributes": dict(ad_cfg.get("special_attributes") or {}), # Handle None case
"shipping_type": str(ad_cfg.get("shipping_type", "")),
"shipping_costs": str(ad_cfg.get("shipping_costs", "")),
"shipping_options": sorted([str(x) for x in (ad_cfg.get("shipping_options") or [])]), # Handle None case
"sell_directly": bool(ad_cfg.get("sell_directly", False)), # Explicitly convert to bool
"images": sorted([os.path.basename(str(img)) if img is not None else "" for img in (ad_cfg.get("images") or [])]), # Handle None values in images
"contact": {
"name": str(ad_cfg.get("contact", {}).get("name", "")),
"street": str(ad_cfg.get("contact", {}).get("street", "")), # Changed from "None" to empty string for consistency
"zipcode": str(ad_cfg.get("contact", {}).get("zipcode", "")),
"phone": str(ad_cfg.get("contact", {}).get("phone", ""))
}
}
# Create sorted JSON string for consistent hashes
content_str = json.dumps(content, sort_keys = True)
return hashlib.sha256(content_str.encode()).hexdigest()

View File

@@ -3,22 +3,20 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
""" """
import logging, os, shutil import json, mimetypes, os, shutil
import urllib.request as urllib_request import urllib.request as urllib_request
import mimetypes
from datetime import datetime from datetime import datetime
from typing import Any, Final from typing import Any, Final
import json
from .i18n import get_translating_logger, pluralize from .ads import calculate_content_hash
from .utils import is_integer, parse_decimal, save_dict, calculate_content_hash from .utils import dicts, i18n, loggers, misc, reflect
from .web_scraping_mixin import Browser, By, Element, Is, WebScrapingMixin from .utils.web_scraping_mixin import Browser, By, Element, Is, WebScrapingMixin
__all__ = [ __all__ = [
"AdExtractor", "AdExtractor",
] ]
LOG:Final[logging.Logger] = get_translating_logger(__name__) LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
class AdExtractor(WebScrapingMixin): class AdExtractor(WebScrapingMixin):
@@ -56,7 +54,7 @@ class AdExtractor(WebScrapingMixin):
# call extraction function # call extraction function
info = await self._extract_ad_page_info(new_base_dir, ad_id) info = await self._extract_ad_page_info(new_base_dir, ad_id)
ad_file_path = new_base_dir + '/' + f'ad_{ad_id}.yaml' ad_file_path = new_base_dir + '/' + f'ad_{ad_id}.yaml'
save_dict(ad_file_path, info) dicts.save_dict(ad_file_path, info)
async def _download_images_from_ad_page(self, directory:str, ad_id:int) -> list[str]: async def _download_images_from_ad_page(self, directory:str, ad_id:int) -> list[str]:
""" """
@@ -74,7 +72,7 @@ class AdExtractor(WebScrapingMixin):
image_box = await self.web_find(By.CLASS_NAME, 'galleryimage-large') image_box = await self.web_find(By.CLASS_NAME, 'galleryimage-large')
n_images = len(await self.web_find_all(By.CSS_SELECTOR, '.galleryimage-element[data-ix]', parent = image_box)) n_images = len(await self.web_find_all(By.CSS_SELECTOR, '.galleryimage-element[data-ix]', parent = image_box))
LOG.info('Found %s.', pluralize("image", n_images)) LOG.info('Found %s.', i18n.pluralize("image", n_images))
img_element:Element = await self.web_find(By.CSS_SELECTOR, 'div:nth-child(1) > img', parent = image_box) img_element:Element = await self.web_find(By.CSS_SELECTOR, 'div:nth-child(1) > img', parent = image_box)
img_fn_prefix = 'ad_' + str(ad_id) + '__img' img_fn_prefix = 'ad_' + str(ad_id) + '__img'
@@ -106,7 +104,7 @@ class AdExtractor(WebScrapingMixin):
LOG.error('NEXT button in image gallery somehow missing, aborting image fetching.') LOG.error('NEXT button in image gallery somehow missing, aborting image fetching.')
break break
img_nr += 1 img_nr += 1
LOG.info('Downloaded %s.', pluralize("image", dl_counter)) LOG.info('Downloaded %s.', i18n.pluralize("image", dl_counter))
except TimeoutError: # some ads do not require images except TimeoutError: # some ads do not require images
LOG.warning('No image area found. Continuing without downloading images.') LOG.warning('No image area found. Continuing without downloading images.')
@@ -193,7 +191,7 @@ class AdExtractor(WebScrapingMixin):
Navigates to an ad page specified with an ad ID; or alternatively by a given URL. Navigates to an ad page specified with an ad ID; or alternatively by a given URL.
:return: whether the navigation to the ad page was successful :return: whether the navigation to the ad page was successful
""" """
if is_integer(id_or_url): if reflect.is_integer(id_or_url):
# navigate to start page, otherwise page can be None! # navigate to start page, otherwise page can be None!
await self.web_open('https://www.kleinanzeigen.de/') await self.web_open('https://www.kleinanzeigen.de/')
# enter the ad ID into the search bar # enter the ad ID into the search bar
@@ -349,7 +347,7 @@ class AdExtractor(WebScrapingMixin):
elif '' in shipping_text: elif '' in shipping_text:
shipping_price_parts = shipping_text.split(' ') shipping_price_parts = shipping_text.split(' ')
ship_type = 'SHIPPING' ship_type = 'SHIPPING'
ship_costs = float(parse_decimal(shipping_price_parts[-2])) ship_costs = float(misc.parse_decimal(shipping_price_parts[-2]))
# reading shipping option from kleinanzeigen # reading shipping option from kleinanzeigen
# and find the right one by price # and find the right one by price

View File

@@ -1,366 +0,0 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import asyncio, copy, decimal, inspect, json, logging, os, re, socket, sys, traceback, time, hashlib
from importlib.resources import read_text as get_resource_as_string
from collections.abc import Callable
from datetime import datetime
from gettext import gettext as _
from types import FrameType, ModuleType, TracebackType
from typing import Any, Final, TypeVar
import colorama
from ruamel.yaml import YAML
from .i18n import get_translating_logger
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
LOG:Final[logging.Logger] = get_translating_logger(__name__)
# https://mypy.readthedocs.io/en/stable/generics.html#generic-functions
T = TypeVar('T')
def abspath(relative_path:str, relative_to:str | None = None) -> str:
"""
Makes a given relative path absolute based on another file/folder
"""
if os.path.isabs(relative_path):
return relative_path
if not relative_to:
return os.path.abspath(relative_path)
if os.path.isfile(relative_to):
relative_to = os.path.dirname(relative_to)
return os.path.normpath(os.path.join(relative_to, relative_path))
def ensure(condition:Any | bool | Callable[[], bool], error_message:str, timeout:float = 5, poll_requency:float = 0.5) -> None:
"""
:param timeout: timespan in seconds until when the condition must become `True`, default is 5 seconds
:param poll_requency: sleep interval between calls in seconds, default is 0.5 seconds
:raises AssertionError: if condition did not come `True` within given timespan
"""
if not isinstance(condition, Callable): # type: ignore[arg-type] # https://github.com/python/mypy/issues/6864
if condition:
return
raise AssertionError(_(error_message))
if timeout < 0:
raise AssertionError("[timeout] must be >= 0")
if poll_requency < 0:
raise AssertionError("[poll_requency] must be >= 0")
start_at = time.time()
while not condition(): # type: ignore[operator]
elapsed = time.time() - start_at
if elapsed >= timeout:
raise AssertionError(_(error_message))
time.sleep(poll_requency)
def get_caller(depth: int = 1) -> inspect.FrameInfo | None:
stack = inspect.stack()
try:
for frame in stack[depth + 1:]:
if frame.function and frame.function != "<lambda>":
return frame
return None
finally:
del stack # Clean up the stack to avoid reference cycles
def is_frozen() -> bool:
"""
>>> is_frozen()
False
"""
return getattr(sys, "frozen", False)
def is_integer(obj:Any) -> bool:
try:
int(obj)
return True
except (ValueError, TypeError):
return False
def is_port_open(host:str, port:int) -> bool:
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(1)
s.connect((host, port))
return True
except Exception:
return False
finally:
s.close()
async def ainput(prompt: str) -> str:
return await asyncio.to_thread(input, f'{prompt} ')
def apply_defaults(
target:dict[Any, Any],
defaults:dict[Any, Any],
ignore:Callable[[Any, Any], bool] = lambda _k, _v: False,
override:Callable[[Any, Any], bool] = lambda _k, _v: False
) -> dict[Any, Any]:
"""
>>> apply_defaults({}, {"foo": "bar"})
{'foo': 'bar'}
>>> apply_defaults({"foo": "foo"}, {"foo": "bar"})
{'foo': 'foo'}
>>> apply_defaults({"foo": ""}, {"foo": "bar"})
{'foo': ''}
>>> apply_defaults({}, {"foo": "bar"}, ignore = lambda k, _: k == "foo")
{}
>>> apply_defaults({"foo": ""}, {"foo": "bar"}, override = lambda _, v: v == "")
{'foo': 'bar'}
>>> apply_defaults({"foo": None}, {"foo": "bar"}, override = lambda _, v: v == "")
{'foo': None}
"""
for key, default_value in defaults.items():
if key in target:
if isinstance(target[key], dict) and isinstance(default_value, dict):
apply_defaults(target[key], default_value, ignore = ignore)
elif override(key, target[key]):
target[key] = copy.deepcopy(default_value)
elif not ignore(key, default_value):
target[key] = copy.deepcopy(default_value)
return target
def safe_get(a_map:dict[Any, Any], *keys:str) -> Any:
"""
>>> safe_get({"foo": {}}, "foo", "bar") is None
True
>>> safe_get({"foo": {"bar": "some_value"}}, "foo", "bar")
'some_value'
"""
if a_map:
for key in keys:
try:
a_map = a_map[key]
except (KeyError, TypeError):
return None
return a_map
def configure_console_logging() -> None:
class CustomFormatter(logging.Formatter):
LEVEL_COLORS = {
logging.DEBUG: colorama.Fore.BLACK + colorama.Style.BRIGHT,
logging.INFO: colorama.Fore.BLACK + colorama.Style.BRIGHT,
logging.WARNING: colorama.Fore.YELLOW,
logging.ERROR: colorama.Fore.RED,
logging.CRITICAL: colorama.Fore.RED,
}
MESSAGE_COLORS = {
logging.DEBUG: colorama.Fore.BLACK + colorama.Style.BRIGHT,
logging.INFO: colorama.Fore.RESET,
logging.WARNING: colorama.Fore.YELLOW,
logging.ERROR: colorama.Fore.RED,
logging.CRITICAL: colorama.Fore.RED + colorama.Style.BRIGHT,
}
VALUE_COLORS = {
logging.DEBUG: colorama.Fore.BLACK + colorama.Style.BRIGHT,
logging.INFO: colorama.Fore.MAGENTA,
logging.WARNING: colorama.Fore.MAGENTA,
logging.ERROR: colorama.Fore.MAGENTA,
logging.CRITICAL: colorama.Fore.MAGENTA,
}
def format(self, record:logging.LogRecord) -> str:
record = copy.deepcopy(record)
level_color = self.LEVEL_COLORS.get(record.levelno, "")
msg_color = self.MESSAGE_COLORS.get(record.levelno, "")
value_color = self.VALUE_COLORS.get(record.levelno, "")
# translate and colorize log level name
levelname = _(record.levelname) if record.levelno > logging.DEBUG else record.levelname
record.levelname = f"{level_color}[{levelname}]{colorama.Style.RESET_ALL}"
# highlight message values enclosed by [...], "...", and '...'
record.msg = re.sub(
r"\[([^\]]+)\]|\"([^\"]+)\"|\'([^\']+)\'",
lambda match: f"[{value_color}{match.group(1) or match.group(2) or match.group(3)}{colorama.Fore.RESET}{msg_color}]",
str(record.msg),
)
# colorize message
record.msg = f"{msg_color}{record.msg}{colorama.Style.RESET_ALL}"
return super().format(record)
formatter = CustomFormatter("%(levelname)s %(message)s")
stdout_log = logging.StreamHandler(sys.stderr)
stdout_log.setLevel(logging.DEBUG)
stdout_log.addFilter(type("", (logging.Filter,), {
"filter": lambda rec: rec.levelno <= logging.INFO
}))
stdout_log.setFormatter(formatter)
LOG_ROOT.addHandler(stdout_log)
stderr_log = logging.StreamHandler(sys.stderr)
stderr_log.setLevel(logging.WARNING)
stderr_log.setFormatter(formatter)
LOG_ROOT.addHandler(stderr_log)
def on_exception(ex_type:type[BaseException], ex_value:Any, ex_traceback:TracebackType | None) -> None:
if issubclass(ex_type, KeyboardInterrupt):
sys.__excepthook__(ex_type, ex_value, ex_traceback)
elif LOG.isEnabledFor(logging.DEBUG) or isinstance(ex_value, (AttributeError, ImportError, NameError, TypeError)):
LOG.error("".join(traceback.format_exception(ex_type, ex_value, ex_traceback)))
elif isinstance(ex_value, AssertionError):
LOG.error(ex_value)
else:
LOG.error("%s: %s", ex_type.__name__, ex_value)
def on_exit() -> None:
for handler in LOG_ROOT.handlers:
handler.flush()
def on_sigint(_sig:int, _frame:FrameType | None) -> None:
LOG.warning("Aborted on user request.")
sys.exit(0)
def load_dict(filepath:str, content_label:str = "") -> dict[str, Any]:
"""
:raises FileNotFoundError
"""
data = load_dict_if_exists(filepath, content_label)
if data is None:
raise FileNotFoundError(filepath)
return data
def load_dict_if_exists(filepath:str, content_label:str = "") -> dict[str, Any] | None:
abs_filepath = os.path.abspath(filepath)
LOG.info("Loading %s[%s]...", content_label and content_label + _(" from ") or "", abs_filepath)
__, file_ext = os.path.splitext(filepath)
if file_ext not in (".json", ".yaml", ".yml"):
raise ValueError(_('Unsupported file type. The filename "%s" must end with *.json, *.yaml, or *.yml') % filepath)
if not os.path.exists(filepath):
return None
with open(filepath, encoding = "utf-8") as file:
return json.load(file) if filepath.endswith(".json") else YAML().load(file) # type: ignore[no-any-return] # mypy
def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "") -> dict[str, Any]:
"""
:raises FileNotFoundError
"""
LOG.debug("Loading %s[%s.%s]...", content_label and content_label + " from " or "", module.__name__, filename)
__, file_ext = os.path.splitext(filename)
if file_ext not in (".json", ".yaml", ".yml"):
raise ValueError(f'Unsupported file type. The filename "{filename}" must end with *.json, *.yaml, or *.yml')
content = get_resource_as_string(module, filename) # pylint: disable=deprecated-method
return json.loads(content) if filename.endswith(".json") else YAML().load(content) # type: ignore[no-any-return] # mypy
def save_dict(filepath:str, content:dict[str, Any]) -> None:
filepath = os.path.abspath(filepath)
LOG.info("Saving [%s]...", filepath)
with open(filepath, "w", encoding = "utf-8") as file:
if filepath.endswith(".json"):
file.write(json.dumps(content, indent = 2, ensure_ascii = False))
else:
yaml = YAML()
yaml.indent(mapping = 2, sequence = 4, offset = 2)
yaml.representer.add_representer(str, # use YAML | block style for multi-line strings
lambda dumper, data:
dumper.represent_scalar('tag:yaml.org,2002:str', data, style = '|' if '\n' in data else None)
)
yaml.allow_duplicate_keys = False
yaml.explicit_start = False
yaml.dump(content, file)
def parse_decimal(number:float | int | str) -> decimal.Decimal:
"""
>>> parse_decimal(5)
Decimal('5')
>>> parse_decimal(5.5)
Decimal('5.5')
>>> parse_decimal("5.5")
Decimal('5.5')
>>> parse_decimal("5,5")
Decimal('5.5')
>>> parse_decimal("1.005,5")
Decimal('1005.5')
>>> parse_decimal("1,005.5")
Decimal('1005.5')
"""
try:
return decimal.Decimal(number)
except decimal.InvalidOperation as ex:
parts = re.split("[.,]", str(number))
try:
return decimal.Decimal("".join(parts[:-1]) + "." + parts[-1])
except decimal.InvalidOperation:
raise decimal.DecimalException(f"Invalid number format: {number}") from ex
def parse_datetime(date:datetime | str | None) -> datetime | None:
"""
>>> parse_datetime(datetime(2020, 1, 1, 0, 0))
datetime.datetime(2020, 1, 1, 0, 0)
>>> parse_datetime("2020-01-01T00:00:00")
datetime.datetime(2020, 1, 1, 0, 0)
>>> parse_datetime(None)
"""
if date is None:
return None
if isinstance(date, datetime):
return date
return datetime.fromisoformat(date)
def calculate_content_hash(ad_cfg: dict[str, Any]) -> str:
"""Calculate a hash for user-modifiable fields of the ad."""
# Relevant fields for the hash
content = {
"active": bool(ad_cfg.get("active", True)), # Explicitly convert to bool
"type": str(ad_cfg.get("type", "")), # Explicitly convert to string
"title": str(ad_cfg.get("title", "")),
"description": str(ad_cfg.get("description", "")),
"category": str(ad_cfg.get("category", "")),
"price": str(ad_cfg.get("price", "")), # Price always as string
"price_type": str(ad_cfg.get("price_type", "")),
"special_attributes": dict(ad_cfg.get("special_attributes") or {}), # Handle None case
"shipping_type": str(ad_cfg.get("shipping_type", "")),
"shipping_costs": str(ad_cfg.get("shipping_costs", "")),
"shipping_options": sorted([str(x) for x in (ad_cfg.get("shipping_options") or [])]), # Handle None case
"sell_directly": bool(ad_cfg.get("sell_directly", False)), # Explicitly convert to bool
"images": sorted([os.path.basename(str(img)) if img is not None else "" for img in (ad_cfg.get("images") or [])]), # Handle None values in images
"contact": {
"name": str(ad_cfg.get("contact", {}).get("name", "")),
"street": str(ad_cfg.get("contact", {}).get("street", "")), # Changed from "None" to empty string for consistency
"zipcode": str(ad_cfg.get("contact", {}).get("zipcode", "")),
"phone": str(ad_cfg.get("contact", {}).get("phone", ""))
}
}
# Create sorted JSON string for consistent hashes
content_str = json.dumps(content, sort_keys=True)
return hashlib.sha256(content_str.encode()).hexdigest()

View File

@@ -0,0 +1,3 @@
"""
This module contains generic, reusable code.
"""

View File

@@ -0,0 +1,120 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import copy, json, os
from collections.abc import Callable
from importlib.resources import read_text as get_resource_as_string
from gettext import gettext as _
from types import ModuleType
from typing import Any, Final
from ruamel.yaml import YAML
from . import files, loggers # pylint: disable=cyclic-import
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
def apply_defaults(
target:dict[Any, Any],
defaults:dict[Any, Any],
ignore:Callable[[Any, Any], bool] = lambda _k, _v: False,
override:Callable[[Any, Any], bool] = lambda _k, _v: False
) -> dict[Any, Any]:
"""
>>> apply_defaults({}, {"foo": "bar"})
{'foo': 'bar'}
>>> apply_defaults({"foo": "foo"}, {"foo": "bar"})
{'foo': 'foo'}
>>> apply_defaults({"foo": ""}, {"foo": "bar"})
{'foo': ''}
>>> apply_defaults({}, {"foo": "bar"}, ignore = lambda k, _: k == "foo")
{}
>>> apply_defaults({"foo": ""}, {"foo": "bar"}, override = lambda _, v: v == "")
{'foo': 'bar'}
>>> apply_defaults({"foo": None}, {"foo": "bar"}, override = lambda _, v: v == "")
{'foo': None}
"""
for key, default_value in defaults.items():
if key in target:
if isinstance(target[key], dict) and isinstance(default_value, dict):
apply_defaults(target[key], default_value, ignore = ignore)
elif override(key, target[key]):
target[key] = copy.deepcopy(default_value)
elif not ignore(key, default_value):
target[key] = copy.deepcopy(default_value)
return target
def load_dict(filepath:str, content_label:str = "") -> dict[str, Any]:
"""
:raises FileNotFoundError
"""
data = load_dict_if_exists(filepath, content_label)
if data is None:
raise FileNotFoundError(filepath)
return data
def load_dict_if_exists(filepath:str, content_label:str = "") -> dict[str, Any] | None:
abs_filepath = files.abspath(filepath)
LOG.info("Loading %s[%s]...", content_label and content_label + _(" from ") or "", abs_filepath)
__, file_ext = os.path.splitext(filepath)
if file_ext not in (".json", ".yaml", ".yml"):
raise ValueError(_('Unsupported file type. The filename "%s" must end with *.json, *.yaml, or *.yml') % filepath)
if not os.path.exists(filepath):
return None
with open(filepath, encoding = "utf-8") as file:
return json.load(file) if filepath.endswith(".json") else YAML().load(file) # type: ignore[no-any-return] # mypy
def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "") -> dict[str, Any]:
"""
:raises FileNotFoundError
"""
LOG.debug("Loading %s[%s.%s]...", content_label and content_label + " from " or "", module.__name__, filename)
__, file_ext = os.path.splitext(filename)
if file_ext not in (".json", ".yaml", ".yml"):
raise ValueError(f'Unsupported file type. The filename "{filename}" must end with *.json, *.yaml, or *.yml')
content = get_resource_as_string(module, filename) # pylint: disable=deprecated-method
return json.loads(content) if filename.endswith(".json") else YAML().load(content) # type: ignore[no-any-return] # mypy
def save_dict(filepath:str, content:dict[str, Any]) -> None:
filepath = files.abspath(filepath)
LOG.info("Saving [%s]...", filepath)
with open(filepath, "w", encoding = "utf-8") as file:
if filepath.endswith(".json"):
file.write(json.dumps(content, indent = 2, ensure_ascii = False))
else:
yaml = YAML()
yaml.indent(mapping = 2, sequence = 4, offset = 2)
yaml.representer.add_representer(str, # use YAML | block style for multi-line strings
lambda dumper, data:
dumper.represent_scalar('tag:yaml.org,2002:str', data, style = '|' if '\n' in data else None)
)
yaml.allow_duplicate_keys = False
yaml.explicit_start = False
yaml.dump(content, file)
def safe_get(a_map:dict[Any, Any], *keys:str) -> Any:
"""
>>> safe_get({"foo": {}}, "foo", "bar") is None
True
>>> safe_get({"foo": {"bar": "some_value"}}, "foo", "bar")
'some_value'
"""
if a_map:
for key in keys:
try:
a_map = a_map[key]
except (KeyError, TypeError):
return None
return a_map

View File

@@ -0,0 +1,28 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import sys, traceback
from types import FrameType, TracebackType
from typing import Any, Final
from . import loggers
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
def on_exception(ex_type:type[BaseException], ex_value:Any, ex_traceback:TracebackType | None) -> None:
if issubclass(ex_type, KeyboardInterrupt):
sys.__excepthook__(ex_type, ex_value, ex_traceback)
elif loggers.is_debug(LOG) or isinstance(ex_value, (AttributeError, ImportError, NameError, TypeError)):
LOG.error("".join(traceback.format_exception(ex_type, ex_value, ex_traceback)))
elif isinstance(ex_value, AssertionError):
LOG.error(ex_value)
else:
LOG.error("%s: %s", ex_type.__name__, ex_value)
def on_sigint(_sig:int, _frame:FrameType | None) -> None:
LOG.warning("Aborted on user request.")
sys.exit(0)

View File

@@ -0,0 +1,22 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import os
def abspath(relative_path:str, relative_to:str | None = None) -> str:
"""
Makes a given relative path absolute based on another file/folder
"""
if not relative_to:
return os.path.abspath(relative_path)
if os.path.isabs(relative_path):
return relative_path
if os.path.isfile(relative_to):
relative_to = os.path.dirname(relative_to)
return os.path.normpath(os.path.join(relative_to, relative_path))

View File

@@ -7,14 +7,18 @@ import ctypes, gettext, inspect, locale, logging, os, sys
from collections.abc import Sized from collections.abc import Sized
from typing import Any, Final, NamedTuple from typing import Any, Final, NamedTuple
from . import resources, utils # pylint: disable=cyclic-import from kleinanzeigen_bot import resources
from . import reflect
from . import dicts
__all__ = [ __all__ = [
"Locale", "Locale",
"get_translating_logger", "get_current_locale",
"pluralize",
"set_current_locale",
"translate"
] ]
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
LOG:Final[logging.Logger] = logging.getLogger(__name__) LOG:Final[logging.Logger] = logging.getLogger(__name__)
@@ -96,7 +100,7 @@ def translate(text:object, caller: inspect.FrameInfo | None) -> str:
global _TRANSLATIONS global _TRANSLATIONS
if _TRANSLATIONS is None: if _TRANSLATIONS is None:
try: try:
_TRANSLATIONS = utils.load_dict_from_module(resources, f"translations.{_CURRENT_LOCALE[0]}.yaml") _TRANSLATIONS = dicts.load_dict_from_module(resources, f"translations.{_CURRENT_LOCALE[0]}.yaml")
except FileNotFoundError: except FileNotFoundError:
_TRANSLATIONS = {} _TRANSLATIONS = {}
@@ -108,7 +112,7 @@ def translate(text:object, caller: inspect.FrameInfo | None) -> str:
if module_name and module_name.endswith(f".{file_basename}"): if module_name and module_name.endswith(f".{file_basename}"):
module_name = module_name[:-(len(file_basename) + 1)] module_name = module_name[:-(len(file_basename) + 1)]
file_key = f"{file_basename}.py" if module_name == file_basename else f"{module_name}/{file_basename}.py" file_key = f"{file_basename}.py" if module_name == file_basename else f"{module_name}/{file_basename}.py"
translation = utils.safe_get(_TRANSLATIONS, translation = dicts.safe_get(_TRANSLATIONS,
file_key, file_key,
caller.function, caller.function,
text text
@@ -116,8 +120,9 @@ def translate(text:object, caller: inspect.FrameInfo | None) -> str:
return translation if translation else text return translation if translation else text
# replace gettext.gettext with custom _translate function
_original_gettext = gettext.gettext _original_gettext = gettext.gettext
gettext.gettext = lambda message: translate(_original_gettext(message), utils.get_caller()) gettext.gettext = lambda message: translate(_original_gettext(message), reflect.get_caller())
for module_name, module in sys.modules.items(): for module_name, module in sys.modules.items():
if module is None or module_name in sys.builtin_module_names: if module is None or module_name in sys.builtin_module_names:
continue continue
@@ -127,19 +132,6 @@ for module_name, module in sys.modules.items():
setattr(module, 'gettext', gettext.gettext) setattr(module, 'gettext', gettext.gettext)
def get_translating_logger(name: str | None = None) -> logging.Logger:
class TranslatingLogger(logging.Logger):
def _log(self, level: int, msg: object, *args: Any, **kwargs: Any) -> None:
if level != logging.DEBUG: # debug messages should not be translated
msg = translate(msg, utils.get_caller(2))
super()._log(level, msg, *args, **kwargs)
logging.setLoggerClass(TranslatingLogger)
return logging.getLogger(name)
def get_current_locale() -> Locale: def get_current_locale() -> Locale:
return _CURRENT_LOCALE return _CURRENT_LOCALE
@@ -161,7 +153,7 @@ def pluralize(noun:str, count:int | Sized, prefix_with_count:bool = True) -> str
>>> pluralize("field", 2, prefix_with_count = False) >>> pluralize("field", 2, prefix_with_count = False)
'fields' 'fields'
""" """
noun = translate(noun, utils.get_caller()) noun = translate(noun, reflect.get_caller())
if isinstance(count, Sized): if isinstance(count, Sized):
count = len(count) count = len(count)

View File

@@ -0,0 +1,116 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import copy, logging, re, sys
from gettext import gettext as _
from typing import Any, Final # @UnusedImport
import colorama
from . import i18n, reflect
__all__ = [
"Logger",
"LOG_ROOT",
"DEBUG",
"INFO",
"configure_console_logging",
"flush_all_handlers",
"get_logger"
]
Logger = logging.Logger
DEBUG:Final[int] = logging.DEBUG
INFO:Final[int] = logging.INFO
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
def configure_console_logging() -> None:
class CustomFormatter(logging.Formatter):
LEVEL_COLORS = {
logging.DEBUG: colorama.Fore.BLACK + colorama.Style.BRIGHT,
logging.INFO: colorama.Fore.BLACK + colorama.Style.BRIGHT,
logging.WARNING: colorama.Fore.YELLOW,
logging.ERROR: colorama.Fore.RED,
logging.CRITICAL: colorama.Fore.RED,
}
MESSAGE_COLORS = {
logging.DEBUG: colorama.Fore.BLACK + colorama.Style.BRIGHT,
logging.INFO: colorama.Fore.RESET,
logging.WARNING: colorama.Fore.YELLOW,
logging.ERROR: colorama.Fore.RED,
logging.CRITICAL: colorama.Fore.RED + colorama.Style.BRIGHT,
}
VALUE_COLORS = {
logging.DEBUG: colorama.Fore.BLACK + colorama.Style.BRIGHT,
logging.INFO: colorama.Fore.MAGENTA,
logging.WARNING: colorama.Fore.MAGENTA,
logging.ERROR: colorama.Fore.MAGENTA,
logging.CRITICAL: colorama.Fore.MAGENTA,
}
def format(self, record:logging.LogRecord) -> str:
record = copy.deepcopy(record)
level_color = self.LEVEL_COLORS.get(record.levelno, "")
msg_color = self.MESSAGE_COLORS.get(record.levelno, "")
value_color = self.VALUE_COLORS.get(record.levelno, "")
# translate and colorize log level name
levelname = _(record.levelname) if record.levelno > logging.DEBUG else record.levelname
record.levelname = f"{level_color}[{levelname}]{colorama.Style.RESET_ALL}"
# highlight message values enclosed by [...], "...", and '...'
record.msg = re.sub(
r"\[([^\]]+)\]|\"([^\"]+)\"|\'([^\']+)\'",
lambda match: f"[{value_color}{match.group(1) or match.group(2) or match.group(3)}{colorama.Fore.RESET}{msg_color}]",
str(record.msg),
)
# colorize message
record.msg = f"{msg_color}{record.msg}{colorama.Style.RESET_ALL}"
return super().format(record)
formatter = CustomFormatter("%(levelname)s %(message)s")
stdout_log = logging.StreamHandler(sys.stderr)
stdout_log.setLevel(logging.DEBUG)
stdout_log.addFilter(type("", (logging.Filter,), {
"filter": lambda rec: rec.levelno <= logging.INFO
}))
stdout_log.setFormatter(formatter)
LOG_ROOT.addHandler(stdout_log)
stderr_log = logging.StreamHandler(sys.stderr)
stderr_log.setLevel(logging.WARNING)
stderr_log.setFormatter(formatter)
LOG_ROOT.addHandler(stderr_log)
def flush_all_handlers() -> None:
for handler in LOG_ROOT.handlers:
handler.flush()
def get_logger(name: str | None = None) -> logging.Logger:
"""
Returns a localized logger
"""
class TranslatingLogger(logging.Logger):
def _log(self, level: int, msg: object, *args: Any, **kwargs: Any) -> None:
if level != logging.DEBUG: # debug messages should not be translated
msg = i18n.translate(msg, reflect.get_caller(2))
super()._log(level, msg, *args, **kwargs)
logging.setLoggerClass(TranslatingLogger)
return logging.getLogger(name)
def is_debug(logger:Logger) -> bool:
return logger.isEnabledFor(logging.DEBUG)

View File

@@ -0,0 +1,90 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import asyncio, decimal, re, sys, time
from collections.abc import Callable
from datetime import datetime
from gettext import gettext as _
from typing import Any, TypeVar
# https://mypy.readthedocs.io/en/stable/generics.html#generic-functions
T = TypeVar('T')
def ensure(condition:Any | bool | Callable[[], bool], error_message:str, timeout:float = 5, poll_requency:float = 0.5) -> None:
"""
:param timeout: timespan in seconds until when the condition must become `True`, default is 5 seconds
:param poll_requency: sleep interval between calls in seconds, default is 0.5 seconds
:raises AssertionError: if condition did not come `True` within given timespan
"""
if not isinstance(condition, Callable): # type: ignore[arg-type] # https://github.com/python/mypy/issues/6864
if condition:
return
raise AssertionError(_(error_message))
if timeout < 0:
raise AssertionError("[timeout] must be >= 0")
if poll_requency < 0:
raise AssertionError("[poll_requency] must be >= 0")
start_at = time.time()
while not condition(): # type: ignore[operator]
elapsed = time.time() - start_at
if elapsed >= timeout:
raise AssertionError(_(error_message))
time.sleep(poll_requency)
def is_frozen() -> bool:
"""
>>> is_frozen()
False
"""
return getattr(sys, "frozen", False)
async def ainput(prompt: str) -> str:
return await asyncio.to_thread(input, f'{prompt} ')
def parse_decimal(number:float | int | str) -> decimal.Decimal:
"""
>>> parse_decimal(5)
Decimal('5')
>>> parse_decimal(5.5)
Decimal('5.5')
>>> parse_decimal("5.5")
Decimal('5.5')
>>> parse_decimal("5,5")
Decimal('5.5')
>>> parse_decimal("1.005,5")
Decimal('1005.5')
>>> parse_decimal("1,005.5")
Decimal('1005.5')
"""
try:
return decimal.Decimal(number)
except decimal.InvalidOperation as ex:
parts = re.split("[.,]", str(number))
try:
return decimal.Decimal("".join(parts[:-1]) + "." + parts[-1])
except decimal.InvalidOperation:
raise decimal.DecimalException(f"Invalid number format: {number}") from ex
def parse_datetime(date:datetime | str | None) -> datetime | None:
"""
>>> parse_datetime(datetime(2020, 1, 1, 0, 0))
datetime.datetime(2020, 1, 1, 0, 0)
>>> parse_datetime("2020-01-01T00:00:00")
datetime.datetime(2020, 1, 1, 0, 0)
>>> parse_datetime(None)
"""
if date is None:
return None
if isinstance(date, datetime):
return date
return datetime.fromisoformat(date)

View File

@@ -0,0 +1,20 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import socket
def is_port_open(host:str, port:int) -> bool:
s:socket.socket | None = None
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(1)
s.connect((host, port))
return True
except Exception:
return False
finally:
if s:
s.close()

View File

@@ -0,0 +1,26 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import inspect
from typing import Any
def get_caller(depth: int = 1) -> inspect.FrameInfo | None:
stack = inspect.stack()
try:
for frame in stack[depth + 1:]:
if frame.function and frame.function != "<lambda>":
return frame
return None
finally:
del stack # Clean up the stack to avoid reference cycles
def is_integer(obj:Any) -> bool:
try:
int(obj)
return True
except (ValueError, TypeError):
return False

View File

@@ -3,7 +3,7 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
""" """
import asyncio, enum, inspect, json, logging, os, platform, secrets, shutil, time import asyncio, enum, inspect, json, os, platform, secrets, shutil, time
from collections.abc import Callable, Coroutine, Iterable from collections.abc import Callable, Coroutine, Iterable
from gettext import gettext as _ from gettext import gettext as _
from typing import cast, Any, Final from typing import cast, Any, Final
@@ -19,8 +19,8 @@ from nodriver.core.config import Config
from nodriver.core.element import Element from nodriver.core.element import Element
from nodriver.core.tab import Tab as Page from nodriver.core.tab import Tab as Page
from .i18n import get_translating_logger from . import loggers, net
from .utils import ensure, is_port_open, T from .misc import ensure, T
__all__ = [ __all__ = [
"Browser", "Browser",
@@ -32,7 +32,7 @@ __all__ = [
"WebScrapingMixin", "WebScrapingMixin",
] ]
LOG:Final[logging.Logger] = get_translating_logger(__name__) LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
# see https://api.jquery.com/category/selectors/ # see https://api.jquery.com/category/selectors/
METACHAR_ESCAPER:Final[dict[int, str]] = str.maketrans({ch: f'\\{ch}' for ch in '!"#$%&\'()*+,./:;<=>?@[\\]^`{|}~'}) METACHAR_ESCAPER:Final[dict[int, str]] = str.maketrans({ch: f'\\{ch}' for ch in '!"#$%&\'()*+,./:;<=>?@[\\]^`{|}~'})
@@ -95,7 +95,7 @@ class WebScrapingMixin:
if remote_port > 0: if remote_port > 0:
LOG.info("Using existing browser process at %s:%s", remote_host, remote_port) LOG.info("Using existing browser process at %s:%s", remote_host, remote_port)
ensure(is_port_open(remote_host, remote_port), ensure(net.is_port_open(remote_host, remote_port),
f"Browser process not reachable at {remote_host}:{remote_port}. " + f"Browser process not reachable at {remote_host}:{remote_port}. " +
f"Start the browser with --remote-debugging-port={remote_port} or remove this port from your config.yaml") f"Start the browser with --remote-debugging-port={remote_port} or remove this port from your config.yaml")
cfg = Config( cfg = Config(
@@ -146,7 +146,7 @@ class WebScrapingMixin:
LOG.info(" -> Custom Browser argument: %s", browser_arg) LOG.info(" -> Custom Browser argument: %s", browser_arg)
browser_args.append(browser_arg) browser_args.append(browser_arg)
if not LOG.isEnabledFor(logging.DEBUG): if not loggers.is_debug(LOG):
browser_args.append("--log-level=3") # INFO: 0, WARNING: 1, ERROR: 2, FATAL: 3 browser_args.append("--log-level=3") # INFO: 0, WARNING: 1, ERROR: 2, FATAL: 3
if self.browser_config.user_data_dir: if self.browser_config.user_data_dir:
@@ -483,7 +483,7 @@ class WebScrapingMixin:
async def web_sleep(self, min_ms:int = 1000, max_ms:int = 2500) -> None: async def web_sleep(self, min_ms:int = 1000, max_ms:int = 2500) -> None:
duration = max_ms <= min_ms and min_ms or secrets.randbelow(max_ms - min_ms) + min_ms duration = max_ms <= min_ms and min_ms or secrets.randbelow(max_ms - min_ms) + min_ms
LOG.log(logging.INFO if duration > 1500 else logging.DEBUG, " ... pausing for %d ms ...", duration) LOG.log(loggers.INFO if duration > 1500 else loggers.DEBUG, " ... pausing for %d ms ...", duration)
await self.page.sleep(duration / 1000) await self.page.sleep(duration / 1000)
async def web_request(self, url:str, method:str = "GET", valid_response_codes:int | Iterable[int] = 200, async def web_request(self, url:str, method:str = "GET", valid_response_codes:int | Iterable[int] = 200,

View File

@@ -3,21 +3,21 @@ SPDX-FileCopyrightText: © Jens Bergmann and contributors
SPDX-License-Identifier: AGPL-3.0-or-later SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
""" """
import logging, os import os
from typing import Any, Final from typing import Any, Final
from unittest.mock import MagicMock from unittest.mock import MagicMock
import pytest import pytest
from kleinanzeigen_bot import KleinanzeigenBot, utils from kleinanzeigen_bot import KleinanzeigenBot
from kleinanzeigen_bot.utils import loggers
from kleinanzeigen_bot.extract import AdExtractor from kleinanzeigen_bot.extract import AdExtractor
from kleinanzeigen_bot.i18n import get_translating_logger from kleinanzeigen_bot.utils.web_scraping_mixin import Browser
from kleinanzeigen_bot.web_scraping_mixin import Browser
utils.configure_console_logging() loggers.configure_console_logging()
LOG: Final[logging.Logger] = get_translating_logger("kleinanzeigen_bot") LOG:Final[loggers.Logger] = loggers.get_logger("kleinanzeigen_bot")
LOG.setLevel(logging.DEBUG) LOG.setLevel(loggers.DEBUG)
@pytest.fixture @pytest.fixture

View File

@@ -3,18 +3,18 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
""" """
import logging, os, platform import os, platform
from typing import cast from typing import cast
import nodriver, pytest import nodriver, pytest
from kleinanzeigen_bot.utils import ensure from kleinanzeigen_bot.utils import loggers
from kleinanzeigen_bot.i18n import get_translating_logger from kleinanzeigen_bot.utils.misc import ensure
from kleinanzeigen_bot.web_scraping_mixin import WebScrapingMixin from kleinanzeigen_bot.utils.web_scraping_mixin import WebScrapingMixin
if os.environ.get("CI"): if os.environ.get("CI"):
get_translating_logger("kleinanzeigen_bot").setLevel(logging.DEBUG) loggers.get_logger("kleinanzeigen_bot").setLevel(loggers.DEBUG)
get_translating_logger("nodriver").setLevel(logging.DEBUG) loggers.get_logger("nodriver").setLevel(loggers.DEBUG)
async def atest_init() -> None: async def atest_init() -> None:

View File

@@ -3,31 +3,7 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
""" """
import pytest from kleinanzeigen_bot import ads
from kleinanzeigen_bot import utils
def test_ensure() -> None:
utils.ensure(True, "TRUE")
utils.ensure("Some Value", "TRUE")
utils.ensure(123, "TRUE")
utils.ensure(-123, "TRUE")
utils.ensure(lambda: True, "TRUE")
with pytest.raises(AssertionError):
utils.ensure(False, "FALSE")
with pytest.raises(AssertionError):
utils.ensure(0, "FALSE")
with pytest.raises(AssertionError):
utils.ensure("", "FALSE")
with pytest.raises(AssertionError):
utils.ensure(None, "FALSE")
with pytest.raises(AssertionError):
utils.ensure(lambda: False, "FALSE", timeout = 2)
def test_calculate_content_hash_with_none_values() -> None: def test_calculate_content_hash_with_none_values() -> None:
@@ -48,6 +24,6 @@ def test_calculate_content_hash_with_none_values() -> None:
} }
# Should not raise TypeError # Should not raise TypeError
hash_value = utils.calculate_content_hash(ad_cfg) hash_value = ads.calculate_content_hash(ad_cfg)
assert isinstance(hash_value, str) assert isinstance(hash_value, str)
assert len(hash_value) == 64 # SHA-256 hash is 64 characters long assert len(hash_value) == 64 # SHA-256 hash is 64 characters long

View File

@@ -1,5 +1,5 @@
""" """
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors SPDX-FileCopyrightText: © Jens Bergmann and contributors
SPDX-License-Identifier: AGPL-3.0-or-later SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
""" """
@@ -10,7 +10,7 @@ from unittest.mock import AsyncMock, MagicMock, call, patch
import pytest import pytest
from kleinanzeigen_bot.extract import AdExtractor from kleinanzeigen_bot.extract import AdExtractor
from kleinanzeigen_bot.web_scraping_mixin import Browser, By, Element from kleinanzeigen_bot.utils.web_scraping_mixin import Browser, By, Element
class _DimensionsDict(TypedDict): class _DimensionsDict(TypedDict):
@@ -529,7 +529,7 @@ class TestAdExtractorDownload:
patch('os.makedirs') as mock_makedirs, \ patch('os.makedirs') as mock_makedirs, \
patch('os.mkdir') as mock_mkdir, \ patch('os.mkdir') as mock_mkdir, \
patch('shutil.rmtree') as mock_rmtree, \ patch('shutil.rmtree') as mock_rmtree, \
patch('kleinanzeigen_bot.extract.save_dict', autospec=True) as mock_save_dict, \ patch('kleinanzeigen_bot.extract.dicts.save_dict', autospec = True) as mock_save_dict, \
patch.object(extractor, '_extract_ad_page_info', new_callable = AsyncMock) as mock_extract: patch.object(extractor, '_extract_ad_page_info', new_callable = AsyncMock) as mock_extract:
base_dir = 'downloaded-ads' base_dir = 'downloaded-ads'
@@ -586,7 +586,7 @@ class TestAdExtractorDownload:
patch('os.makedirs') as mock_makedirs, \ patch('os.makedirs') as mock_makedirs, \
patch('os.mkdir') as mock_mkdir, \ patch('os.mkdir') as mock_mkdir, \
patch('shutil.rmtree') as mock_rmtree, \ patch('shutil.rmtree') as mock_rmtree, \
patch('kleinanzeigen_bot.extract.save_dict', autospec=True) as mock_save_dict, \ patch('kleinanzeigen_bot.extract.dicts.save_dict', autospec = True) as mock_save_dict, \
patch.object(extractor, '_extract_ad_page_info', new_callable = AsyncMock) as mock_extract: patch.object(extractor, '_extract_ad_page_info', new_callable = AsyncMock) as mock_extract:
base_dir = 'downloaded-ads' base_dir = 'downloaded-ads'

View File

@@ -5,7 +5,7 @@ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanze
""" """
import pytest import pytest
from _pytest.monkeypatch import MonkeyPatch # pylint: disable=import-private-name from _pytest.monkeypatch import MonkeyPatch # pylint: disable=import-private-name
from kleinanzeigen_bot import i18n from kleinanzeigen_bot.utils import i18n
@pytest.mark.parametrize("lang, expected", [ @pytest.mark.parametrize("lang, expected", [

View File

@@ -15,7 +15,7 @@ from ruamel.yaml import YAML
from kleinanzeigen_bot import LOG, KleinanzeigenBot from kleinanzeigen_bot import LOG, KleinanzeigenBot
from kleinanzeigen_bot._version import __version__ from kleinanzeigen_bot._version import __version__
from kleinanzeigen_bot.utils import calculate_content_hash from kleinanzeigen_bot.ads import calculate_content_hash
@pytest.fixture @pytest.fixture
@@ -250,10 +250,10 @@ class TestKleinanzeigenBotConfiguration:
sample_config_with_categories = sample_config.copy() sample_config_with_categories = sample_config.copy()
sample_config_with_categories["categories"] = {} sample_config_with_categories["categories"] = {}
with patch('kleinanzeigen_bot.utils.load_dict_if_exists', return_value=None), \ with patch('kleinanzeigen_bot.utils.dicts.load_dict_if_exists', return_value = None), \
patch.object(LOG, 'warning') as mock_warning, \ patch.object(LOG, 'warning') as mock_warning, \
patch('kleinanzeigen_bot.utils.save_dict') as mock_save, \ patch('kleinanzeigen_bot.utils.dicts.save_dict') as mock_save, \
patch('kleinanzeigen_bot.utils.load_dict_from_module') as mock_load_module: patch('kleinanzeigen_bot.utils.dicts.load_dict_from_module') as mock_load_module:
mock_load_module.side_effect = [ mock_load_module.side_effect = [
sample_config_with_categories, # config_defaults.yaml sample_config_with_categories, # config_defaults.yaml
@@ -892,7 +892,7 @@ class TestKleinanzeigenBotAdRepublication:
test_bot.config['ad_files'] = ["ads/*.yaml"] test_bot.config['ad_files'] = ["ads/*.yaml"]
# Mock the loading of the original ad configuration # Mock the loading of the original ad configuration
with patch('kleinanzeigen_bot.utils.load_dict', side_effect=[ with patch('kleinanzeigen_bot.utils.dicts.load_dict', side_effect = [
ad_cfg, # First call returns the original ad config ad_cfg, # First call returns the original ad config
{} # Second call for ad_fields.yaml {} # Second call for ad_fields.yaml
]): ]):
@@ -919,8 +919,8 @@ class TestKleinanzeigenBotAdRepublication:
# Mock the config to prevent actual file operations # Mock the config to prevent actual file operations
test_bot.config['ad_files'] = ['test.yaml'] test_bot.config['ad_files'] = ['test.yaml']
with patch('kleinanzeigen_bot.utils.load_dict_if_exists', return_value=ad_cfg_orig), \ with patch('kleinanzeigen_bot.utils.dicts.load_dict_if_exists', return_value = ad_cfg_orig), \
patch('kleinanzeigen_bot.utils.load_dict', return_value={}): # Mock ad_fields.yaml patch('kleinanzeigen_bot.utils.dicts.load_dict', return_value = {}): # Mock ad_fields.yaml
ads_to_publish = test_bot.load_ads() ads_to_publish = test_bot.load_ads()
assert len(ads_to_publish) == 0 # No ads should be marked for republication assert len(ads_to_publish) == 0 # No ads should be marked for republication

View File

@@ -0,0 +1,30 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import pytest
from kleinanzeigen_bot.utils import misc
def test_ensure() -> None:
misc.ensure(True, "TRUE")
misc.ensure("Some Value", "TRUE")
misc.ensure(123, "TRUE")
misc.ensure(-123, "TRUE")
misc.ensure(lambda: True, "TRUE")
with pytest.raises(AssertionError):
misc.ensure(False, "FALSE")
with pytest.raises(AssertionError):
misc.ensure(0, "FALSE")
with pytest.raises(AssertionError):
misc.ensure("", "FALSE")
with pytest.raises(AssertionError):
misc.ensure(None, "FALSE")
with pytest.raises(AssertionError):
misc.ensure(lambda: False, "FALSE", timeout = 2)