refact: reorganize utility modules

This commit is contained in:
sebthom
2025-02-10 06:23:17 +01:00
parent e8d342dc68
commit 2402ba2572
21 changed files with 734 additions and 638 deletions

View File

@@ -17,16 +17,18 @@ import certifi, colorama, nodriver
from ruamel.yaml import YAML
from wcmatch import glob
from . import utils, resources, extract
from .i18n import Locale, get_current_locale, set_current_locale, get_translating_logger, pluralize
from .utils import abspath, ainput, apply_defaults, ensure, is_frozen, safe_get, parse_datetime, calculate_content_hash
from .web_scraping_mixin import By, Element, Page, Is, WebScrapingMixin
from . import extract, resources
from .ads import calculate_content_hash
from .utils import dicts, error_handlers, loggers, misc
from .utils.files import abspath
from .utils.i18n import Locale, get_current_locale, set_current_locale, pluralize
from .utils.misc import ainput, ensure, is_frozen, parse_datetime, parse_decimal
from .utils.web_scraping_mixin import By, Element, Page, Is, WebScrapingMixin
from ._version import __version__
# W0406: possibly a bug, see https://github.com/PyCQA/pylint/issues/3933
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
LOG:Final[logging.Logger] = get_translating_logger(__name__)
LOG:Final[logging.Logger] = loggers.get_logger(__name__)
LOG.setLevel(logging.INFO)
colorama.just_fix_windows_console()
@@ -59,7 +61,8 @@ class KleinanzeigenBot(WebScrapingMixin):
def __del__(self) -> None:
if self.file_log:
LOG_ROOT.removeHandler(self.file_log)
self.file_log.flush()
loggers.LOG_ROOT.removeHandler(self.file_log)
self.file_log.close()
self.close_browser_session()
@@ -258,7 +261,7 @@ class KleinanzeigenBot(WebScrapingMixin):
self.file_log = RotatingFileHandler(filename = self.log_file_path, maxBytes = 10 * 1024 * 1024, backupCount = 10, encoding = "utf-8")
self.file_log.setLevel(logging.DEBUG)
self.file_log.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s"))
LOG_ROOT.addHandler(self.file_log)
loggers.LOG_ROOT.addHandler(self.file_log)
LOG.info("App version: %s", self.get_version())
LOG.info("Python version: %s", sys.version)
@@ -333,13 +336,13 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.info('Start fetch task for the ad(s) with id(s):')
LOG.info(' | '.join([str(id_) for id_ in ids]))
ad_fields = utils.load_dict_from_module(resources, "ad_fields.yaml")
ad_fields = dicts.load_dict_from_module(resources, "ad_fields.yaml")
ads = []
for ad_file, ad_file_relative in sorted(ad_files.items()):
ad_cfg_orig = utils.load_dict(ad_file, "ad")
ad_cfg_orig = dicts.load_dict(ad_file, "ad")
ad_cfg = copy.deepcopy(ad_cfg_orig)
apply_defaults(ad_cfg, self.config["ad_defaults"], ignore = lambda k, _: k == "description", override = lambda _, v: v == "")
apply_defaults(ad_cfg, ad_fields)
dicts.apply_defaults(ad_cfg, self.config["ad_defaults"], ignore = lambda k, _: k == "description", override = lambda _, v: v == "")
dicts.apply_defaults(ad_cfg, ad_fields)
if ignore_inactive and not ad_cfg["active"]:
LOG.info(" -> SKIPPED: inactive ad [%s]", ad_file_relative)
@@ -365,13 +368,13 @@ class KleinanzeigenBot(WebScrapingMixin):
# pylint: disable=cell-var-from-loop
def assert_one_of(path:str, allowed:Iterable[str]) -> None:
ensure(safe_get(ad_cfg, *path.split(".")) in allowed, f"-> property [{path}] must be one of: {allowed} @ [{ad_file}]")
ensure(dicts.safe_get(ad_cfg, *path.split(".")) in allowed, f"-> property [{path}] must be one of: {allowed} @ [{ad_file}]")
def assert_min_len(path:str, minlen:int) -> None:
ensure(len(safe_get(ad_cfg, *path.split("."))) >= minlen, f"-> property [{path}] must be at least {minlen} characters long @ [{ad_file}]")
ensure(len(dicts.safe_get(ad_cfg, *path.split("."))) >= minlen, f"-> property [{path}] must be at least {minlen} characters long @ [{ad_file}]")
def assert_has_value(path:str) -> None:
ensure(safe_get(ad_cfg, *path.split(".")), f"-> property [{path}] not specified @ [{ad_file}]")
ensure(dicts.safe_get(ad_cfg, *path.split(".")), f"-> property [{path}] not specified @ [{ad_file}]")
# pylint: enable=cell-var-from-loop
assert_one_of("type", {"OFFER", "WANTED"})
@@ -379,7 +382,7 @@ class KleinanzeigenBot(WebScrapingMixin):
assert_has_value("description")
assert_one_of("price_type", {"FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"})
if ad_cfg["price_type"] == "GIVE_AWAY":
ensure(not safe_get(ad_cfg, "price"), f"-> [price] must not be specified for GIVE_AWAY ad @ [{ad_file}]")
ensure(not dicts.safe_get(ad_cfg, "price"), f"-> [price] must not be specified for GIVE_AWAY ad @ [{ad_file}]")
elif ad_cfg["price_type"] == "FIXED":
assert_has_value("price")
@@ -405,7 +408,7 @@ class KleinanzeigenBot(WebScrapingMixin):
ad_cfg["category"] = resolved_category_id
if ad_cfg["shipping_costs"]:
ad_cfg["shipping_costs"] = str(round(utils.parse_decimal(ad_cfg["shipping_costs"]), 2))
ad_cfg["shipping_costs"] = str(round(misc.parse_decimal(ad_cfg["shipping_costs"]), 2))
if ad_cfg["images"]:
images = []
@@ -433,18 +436,18 @@ class KleinanzeigenBot(WebScrapingMixin):
return ads
def load_config(self) -> None:
config_defaults = utils.load_dict_from_module(resources, "config_defaults.yaml")
config = utils.load_dict_if_exists(self.config_file_path, _("config"))
config_defaults = dicts.load_dict_from_module(resources, "config_defaults.yaml")
config = dicts.load_dict_if_exists(self.config_file_path, _("config"))
if config is None:
LOG.warning("Config file %s does not exist. Creating it with default values...", self.config_file_path)
utils.save_dict(self.config_file_path, config_defaults)
dicts.save_dict(self.config_file_path, config_defaults)
config = {}
self.config = apply_defaults(config, config_defaults)
self.config = dicts.apply_defaults(config, config_defaults)
self.categories = utils.load_dict_from_module(resources, "categories.yaml", "categories")
deprecated_categories = utils.load_dict_from_module(resources, "categories_old.yaml", "categories")
self.categories = dicts.load_dict_from_module(resources, "categories.yaml", "categories")
deprecated_categories = dicts.load_dict_from_module(resources, "categories_old.yaml", "categories")
self.categories.update(deprecated_categories)
if self.config["categories"]:
self.categories.update(self.config["categories"])
@@ -675,7 +678,7 @@ class KleinanzeigenBot(WebScrapingMixin):
await self.web_select(By.CSS_SELECTOR, "select#price-type-react, select#micro-frontend-price-type, select#priceType", price_type)
except TimeoutError:
pass
if safe_get(ad_cfg, "price"):
if dicts.safe_get(ad_cfg, "price"):
await self.web_input(By.CSS_SELECTOR, "input#post-ad-frontend-price, input#micro-frontend-price, input#pstad-price", ad_cfg["price"])
#############################
@@ -797,7 +800,7 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.info(" -> SUCCESS: ad published with ID %s", ad_id)
utils.save_dict(ad_file, ad_cfg_orig)
dicts.save_dict(ad_file, ad_cfg_orig)
async def __set_condition(self, condition_value: str) -> None:
condition_mapping = {
@@ -1047,11 +1050,11 @@ def main(args:list[str]) -> None:
https://github.com/Second-Hand-Friends/kleinanzeigen-bot
""")[1:], flush = True) # [1:] removes the first empty blank line
utils.configure_console_logging()
loggers.configure_console_logging()
signal.signal(signal.SIGINT, utils.on_sigint) # capture CTRL+C
sys.excepthook = utils.on_exception
atexit.register(utils.on_exit)
signal.signal(signal.SIGINT, error_handlers.on_sigint) # capture CTRL+C
sys.excepthook = error_handlers.on_exception
atexit.register(loggers.flush_all_handlers)
bot = KleinanzeigenBot()
atexit.register(bot.close_browser_session)
@@ -1059,6 +1062,6 @@ def main(args:list[str]) -> None:
if __name__ == "__main__":
utils.configure_console_logging()
loggers.configure_console_logging()
LOG.error("Direct execution not supported. Use 'pdm run app'")
sys.exit(1)

View File

@@ -0,0 +1,38 @@
"""
SPDX-FileCopyrightText: © Jens Bergman and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import json, os, hashlib
from typing import Any
def calculate_content_hash(ad_cfg: dict[str, Any]) -> str:
"""Calculate a hash for user-modifiable fields of the ad."""
# Relevant fields for the hash
content = {
"active": bool(ad_cfg.get("active", True)), # Explicitly convert to bool
"type": str(ad_cfg.get("type", "")), # Explicitly convert to string
"title": str(ad_cfg.get("title", "")),
"description": str(ad_cfg.get("description", "")),
"category": str(ad_cfg.get("category", "")),
"price": str(ad_cfg.get("price", "")), # Price always as string
"price_type": str(ad_cfg.get("price_type", "")),
"special_attributes": dict(ad_cfg.get("special_attributes") or {}), # Handle None case
"shipping_type": str(ad_cfg.get("shipping_type", "")),
"shipping_costs": str(ad_cfg.get("shipping_costs", "")),
"shipping_options": sorted([str(x) for x in (ad_cfg.get("shipping_options") or [])]), # Handle None case
"sell_directly": bool(ad_cfg.get("sell_directly", False)), # Explicitly convert to bool
"images": sorted([os.path.basename(str(img)) if img is not None else "" for img in (ad_cfg.get("images") or [])]), # Handle None values in images
"contact": {
"name": str(ad_cfg.get("contact", {}).get("name", "")),
"street": str(ad_cfg.get("contact", {}).get("street", "")), # Changed from "None" to empty string for consistency
"zipcode": str(ad_cfg.get("contact", {}).get("zipcode", "")),
"phone": str(ad_cfg.get("contact", {}).get("phone", ""))
}
}
# Create sorted JSON string for consistent hashes
content_str = json.dumps(content, sort_keys = True)
return hashlib.sha256(content_str.encode()).hexdigest()

View File

@@ -3,22 +3,20 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import logging, os, shutil
import json, mimetypes, os, shutil
import urllib.request as urllib_request
import mimetypes
from datetime import datetime
from typing import Any, Final
import json
from .i18n import get_translating_logger, pluralize
from .utils import is_integer, parse_decimal, save_dict, calculate_content_hash
from .web_scraping_mixin import Browser, By, Element, Is, WebScrapingMixin
from .ads import calculate_content_hash
from .utils import dicts, i18n, loggers, misc, reflect
from .utils.web_scraping_mixin import Browser, By, Element, Is, WebScrapingMixin
__all__ = [
"AdExtractor",
]
LOG:Final[logging.Logger] = get_translating_logger(__name__)
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
class AdExtractor(WebScrapingMixin):
@@ -56,7 +54,7 @@ class AdExtractor(WebScrapingMixin):
# call extraction function
info = await self._extract_ad_page_info(new_base_dir, ad_id)
ad_file_path = new_base_dir + '/' + f'ad_{ad_id}.yaml'
save_dict(ad_file_path, info)
dicts.save_dict(ad_file_path, info)
async def _download_images_from_ad_page(self, directory:str, ad_id:int) -> list[str]:
"""
@@ -74,7 +72,7 @@ class AdExtractor(WebScrapingMixin):
image_box = await self.web_find(By.CLASS_NAME, 'galleryimage-large')
n_images = len(await self.web_find_all(By.CSS_SELECTOR, '.galleryimage-element[data-ix]', parent = image_box))
LOG.info('Found %s.', pluralize("image", n_images))
LOG.info('Found %s.', i18n.pluralize("image", n_images))
img_element:Element = await self.web_find(By.CSS_SELECTOR, 'div:nth-child(1) > img', parent = image_box)
img_fn_prefix = 'ad_' + str(ad_id) + '__img'
@@ -106,7 +104,7 @@ class AdExtractor(WebScrapingMixin):
LOG.error('NEXT button in image gallery somehow missing, aborting image fetching.')
break
img_nr += 1
LOG.info('Downloaded %s.', pluralize("image", dl_counter))
LOG.info('Downloaded %s.', i18n.pluralize("image", dl_counter))
except TimeoutError: # some ads do not require images
LOG.warning('No image area found. Continuing without downloading images.')
@@ -193,7 +191,7 @@ class AdExtractor(WebScrapingMixin):
Navigates to an ad page specified with an ad ID; or alternatively by a given URL.
:return: whether the navigation to the ad page was successful
"""
if is_integer(id_or_url):
if reflect.is_integer(id_or_url):
# navigate to start page, otherwise page can be None!
await self.web_open('https://www.kleinanzeigen.de/')
# enter the ad ID into the search bar
@@ -349,7 +347,7 @@ class AdExtractor(WebScrapingMixin):
elif '' in shipping_text:
shipping_price_parts = shipping_text.split(' ')
ship_type = 'SHIPPING'
ship_costs = float(parse_decimal(shipping_price_parts[-2]))
ship_costs = float(misc.parse_decimal(shipping_price_parts[-2]))
# reading shipping option from kleinanzeigen
# and find the right one by price

View File

@@ -1,366 +0,0 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import asyncio, copy, decimal, inspect, json, logging, os, re, socket, sys, traceback, time, hashlib
from importlib.resources import read_text as get_resource_as_string
from collections.abc import Callable
from datetime import datetime
from gettext import gettext as _
from types import FrameType, ModuleType, TracebackType
from typing import Any, Final, TypeVar
import colorama
from ruamel.yaml import YAML
from .i18n import get_translating_logger
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
LOG:Final[logging.Logger] = get_translating_logger(__name__)
# https://mypy.readthedocs.io/en/stable/generics.html#generic-functions
T = TypeVar('T')
def abspath(relative_path:str, relative_to:str | None = None) -> str:
"""
Makes a given relative path absolute based on another file/folder
"""
if os.path.isabs(relative_path):
return relative_path
if not relative_to:
return os.path.abspath(relative_path)
if os.path.isfile(relative_to):
relative_to = os.path.dirname(relative_to)
return os.path.normpath(os.path.join(relative_to, relative_path))
def ensure(condition:Any | bool | Callable[[], bool], error_message:str, timeout:float = 5, poll_requency:float = 0.5) -> None:
"""
:param timeout: timespan in seconds until when the condition must become `True`, default is 5 seconds
:param poll_requency: sleep interval between calls in seconds, default is 0.5 seconds
:raises AssertionError: if condition did not come `True` within given timespan
"""
if not isinstance(condition, Callable): # type: ignore[arg-type] # https://github.com/python/mypy/issues/6864
if condition:
return
raise AssertionError(_(error_message))
if timeout < 0:
raise AssertionError("[timeout] must be >= 0")
if poll_requency < 0:
raise AssertionError("[poll_requency] must be >= 0")
start_at = time.time()
while not condition(): # type: ignore[operator]
elapsed = time.time() - start_at
if elapsed >= timeout:
raise AssertionError(_(error_message))
time.sleep(poll_requency)
def get_caller(depth: int = 1) -> inspect.FrameInfo | None:
stack = inspect.stack()
try:
for frame in stack[depth + 1:]:
if frame.function and frame.function != "<lambda>":
return frame
return None
finally:
del stack # Clean up the stack to avoid reference cycles
def is_frozen() -> bool:
"""
>>> is_frozen()
False
"""
return getattr(sys, "frozen", False)
def is_integer(obj:Any) -> bool:
try:
int(obj)
return True
except (ValueError, TypeError):
return False
def is_port_open(host:str, port:int) -> bool:
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(1)
s.connect((host, port))
return True
except Exception:
return False
finally:
s.close()
async def ainput(prompt: str) -> str:
return await asyncio.to_thread(input, f'{prompt} ')
def apply_defaults(
target:dict[Any, Any],
defaults:dict[Any, Any],
ignore:Callable[[Any, Any], bool] = lambda _k, _v: False,
override:Callable[[Any, Any], bool] = lambda _k, _v: False
) -> dict[Any, Any]:
"""
>>> apply_defaults({}, {"foo": "bar"})
{'foo': 'bar'}
>>> apply_defaults({"foo": "foo"}, {"foo": "bar"})
{'foo': 'foo'}
>>> apply_defaults({"foo": ""}, {"foo": "bar"})
{'foo': ''}
>>> apply_defaults({}, {"foo": "bar"}, ignore = lambda k, _: k == "foo")
{}
>>> apply_defaults({"foo": ""}, {"foo": "bar"}, override = lambda _, v: v == "")
{'foo': 'bar'}
>>> apply_defaults({"foo": None}, {"foo": "bar"}, override = lambda _, v: v == "")
{'foo': None}
"""
for key, default_value in defaults.items():
if key in target:
if isinstance(target[key], dict) and isinstance(default_value, dict):
apply_defaults(target[key], default_value, ignore = ignore)
elif override(key, target[key]):
target[key] = copy.deepcopy(default_value)
elif not ignore(key, default_value):
target[key] = copy.deepcopy(default_value)
return target
def safe_get(a_map:dict[Any, Any], *keys:str) -> Any:
"""
>>> safe_get({"foo": {}}, "foo", "bar") is None
True
>>> safe_get({"foo": {"bar": "some_value"}}, "foo", "bar")
'some_value'
"""
if a_map:
for key in keys:
try:
a_map = a_map[key]
except (KeyError, TypeError):
return None
return a_map
def configure_console_logging() -> None:
class CustomFormatter(logging.Formatter):
LEVEL_COLORS = {
logging.DEBUG: colorama.Fore.BLACK + colorama.Style.BRIGHT,
logging.INFO: colorama.Fore.BLACK + colorama.Style.BRIGHT,
logging.WARNING: colorama.Fore.YELLOW,
logging.ERROR: colorama.Fore.RED,
logging.CRITICAL: colorama.Fore.RED,
}
MESSAGE_COLORS = {
logging.DEBUG: colorama.Fore.BLACK + colorama.Style.BRIGHT,
logging.INFO: colorama.Fore.RESET,
logging.WARNING: colorama.Fore.YELLOW,
logging.ERROR: colorama.Fore.RED,
logging.CRITICAL: colorama.Fore.RED + colorama.Style.BRIGHT,
}
VALUE_COLORS = {
logging.DEBUG: colorama.Fore.BLACK + colorama.Style.BRIGHT,
logging.INFO: colorama.Fore.MAGENTA,
logging.WARNING: colorama.Fore.MAGENTA,
logging.ERROR: colorama.Fore.MAGENTA,
logging.CRITICAL: colorama.Fore.MAGENTA,
}
def format(self, record:logging.LogRecord) -> str:
record = copy.deepcopy(record)
level_color = self.LEVEL_COLORS.get(record.levelno, "")
msg_color = self.MESSAGE_COLORS.get(record.levelno, "")
value_color = self.VALUE_COLORS.get(record.levelno, "")
# translate and colorize log level name
levelname = _(record.levelname) if record.levelno > logging.DEBUG else record.levelname
record.levelname = f"{level_color}[{levelname}]{colorama.Style.RESET_ALL}"
# highlight message values enclosed by [...], "...", and '...'
record.msg = re.sub(
r"\[([^\]]+)\]|\"([^\"]+)\"|\'([^\']+)\'",
lambda match: f"[{value_color}{match.group(1) or match.group(2) or match.group(3)}{colorama.Fore.RESET}{msg_color}]",
str(record.msg),
)
# colorize message
record.msg = f"{msg_color}{record.msg}{colorama.Style.RESET_ALL}"
return super().format(record)
formatter = CustomFormatter("%(levelname)s %(message)s")
stdout_log = logging.StreamHandler(sys.stderr)
stdout_log.setLevel(logging.DEBUG)
stdout_log.addFilter(type("", (logging.Filter,), {
"filter": lambda rec: rec.levelno <= logging.INFO
}))
stdout_log.setFormatter(formatter)
LOG_ROOT.addHandler(stdout_log)
stderr_log = logging.StreamHandler(sys.stderr)
stderr_log.setLevel(logging.WARNING)
stderr_log.setFormatter(formatter)
LOG_ROOT.addHandler(stderr_log)
def on_exception(ex_type:type[BaseException], ex_value:Any, ex_traceback:TracebackType | None) -> None:
if issubclass(ex_type, KeyboardInterrupt):
sys.__excepthook__(ex_type, ex_value, ex_traceback)
elif LOG.isEnabledFor(logging.DEBUG) or isinstance(ex_value, (AttributeError, ImportError, NameError, TypeError)):
LOG.error("".join(traceback.format_exception(ex_type, ex_value, ex_traceback)))
elif isinstance(ex_value, AssertionError):
LOG.error(ex_value)
else:
LOG.error("%s: %s", ex_type.__name__, ex_value)
def on_exit() -> None:
for handler in LOG_ROOT.handlers:
handler.flush()
def on_sigint(_sig:int, _frame:FrameType | None) -> None:
LOG.warning("Aborted on user request.")
sys.exit(0)
def load_dict(filepath:str, content_label:str = "") -> dict[str, Any]:
"""
:raises FileNotFoundError
"""
data = load_dict_if_exists(filepath, content_label)
if data is None:
raise FileNotFoundError(filepath)
return data
def load_dict_if_exists(filepath:str, content_label:str = "") -> dict[str, Any] | None:
abs_filepath = os.path.abspath(filepath)
LOG.info("Loading %s[%s]...", content_label and content_label + _(" from ") or "", abs_filepath)
__, file_ext = os.path.splitext(filepath)
if file_ext not in (".json", ".yaml", ".yml"):
raise ValueError(_('Unsupported file type. The filename "%s" must end with *.json, *.yaml, or *.yml') % filepath)
if not os.path.exists(filepath):
return None
with open(filepath, encoding = "utf-8") as file:
return json.load(file) if filepath.endswith(".json") else YAML().load(file) # type: ignore[no-any-return] # mypy
def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "") -> dict[str, Any]:
"""
:raises FileNotFoundError
"""
LOG.debug("Loading %s[%s.%s]...", content_label and content_label + " from " or "", module.__name__, filename)
__, file_ext = os.path.splitext(filename)
if file_ext not in (".json", ".yaml", ".yml"):
raise ValueError(f'Unsupported file type. The filename "{filename}" must end with *.json, *.yaml, or *.yml')
content = get_resource_as_string(module, filename) # pylint: disable=deprecated-method
return json.loads(content) if filename.endswith(".json") else YAML().load(content) # type: ignore[no-any-return] # mypy
def save_dict(filepath:str, content:dict[str, Any]) -> None:
filepath = os.path.abspath(filepath)
LOG.info("Saving [%s]...", filepath)
with open(filepath, "w", encoding = "utf-8") as file:
if filepath.endswith(".json"):
file.write(json.dumps(content, indent = 2, ensure_ascii = False))
else:
yaml = YAML()
yaml.indent(mapping = 2, sequence = 4, offset = 2)
yaml.representer.add_representer(str, # use YAML | block style for multi-line strings
lambda dumper, data:
dumper.represent_scalar('tag:yaml.org,2002:str', data, style = '|' if '\n' in data else None)
)
yaml.allow_duplicate_keys = False
yaml.explicit_start = False
yaml.dump(content, file)
def parse_decimal(number:float | int | str) -> decimal.Decimal:
"""
>>> parse_decimal(5)
Decimal('5')
>>> parse_decimal(5.5)
Decimal('5.5')
>>> parse_decimal("5.5")
Decimal('5.5')
>>> parse_decimal("5,5")
Decimal('5.5')
>>> parse_decimal("1.005,5")
Decimal('1005.5')
>>> parse_decimal("1,005.5")
Decimal('1005.5')
"""
try:
return decimal.Decimal(number)
except decimal.InvalidOperation as ex:
parts = re.split("[.,]", str(number))
try:
return decimal.Decimal("".join(parts[:-1]) + "." + parts[-1])
except decimal.InvalidOperation:
raise decimal.DecimalException(f"Invalid number format: {number}") from ex
def parse_datetime(date:datetime | str | None) -> datetime | None:
"""
>>> parse_datetime(datetime(2020, 1, 1, 0, 0))
datetime.datetime(2020, 1, 1, 0, 0)
>>> parse_datetime("2020-01-01T00:00:00")
datetime.datetime(2020, 1, 1, 0, 0)
>>> parse_datetime(None)
"""
if date is None:
return None
if isinstance(date, datetime):
return date
return datetime.fromisoformat(date)
def calculate_content_hash(ad_cfg: dict[str, Any]) -> str:
"""Calculate a hash for user-modifiable fields of the ad."""
# Relevant fields for the hash
content = {
"active": bool(ad_cfg.get("active", True)), # Explicitly convert to bool
"type": str(ad_cfg.get("type", "")), # Explicitly convert to string
"title": str(ad_cfg.get("title", "")),
"description": str(ad_cfg.get("description", "")),
"category": str(ad_cfg.get("category", "")),
"price": str(ad_cfg.get("price", "")), # Price always as string
"price_type": str(ad_cfg.get("price_type", "")),
"special_attributes": dict(ad_cfg.get("special_attributes") or {}), # Handle None case
"shipping_type": str(ad_cfg.get("shipping_type", "")),
"shipping_costs": str(ad_cfg.get("shipping_costs", "")),
"shipping_options": sorted([str(x) for x in (ad_cfg.get("shipping_options") or [])]), # Handle None case
"sell_directly": bool(ad_cfg.get("sell_directly", False)), # Explicitly convert to bool
"images": sorted([os.path.basename(str(img)) if img is not None else "" for img in (ad_cfg.get("images") or [])]), # Handle None values in images
"contact": {
"name": str(ad_cfg.get("contact", {}).get("name", "")),
"street": str(ad_cfg.get("contact", {}).get("street", "")), # Changed from "None" to empty string for consistency
"zipcode": str(ad_cfg.get("contact", {}).get("zipcode", "")),
"phone": str(ad_cfg.get("contact", {}).get("phone", ""))
}
}
# Create sorted JSON string for consistent hashes
content_str = json.dumps(content, sort_keys=True)
return hashlib.sha256(content_str.encode()).hexdigest()

View File

@@ -0,0 +1,3 @@
"""
This module contains generic, reusable code.
"""

View File

@@ -0,0 +1,120 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import copy, json, os
from collections.abc import Callable
from importlib.resources import read_text as get_resource_as_string
from gettext import gettext as _
from types import ModuleType
from typing import Any, Final
from ruamel.yaml import YAML
from . import files, loggers # pylint: disable=cyclic-import
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
def apply_defaults(
target:dict[Any, Any],
defaults:dict[Any, Any],
ignore:Callable[[Any, Any], bool] = lambda _k, _v: False,
override:Callable[[Any, Any], bool] = lambda _k, _v: False
) -> dict[Any, Any]:
"""
>>> apply_defaults({}, {"foo": "bar"})
{'foo': 'bar'}
>>> apply_defaults({"foo": "foo"}, {"foo": "bar"})
{'foo': 'foo'}
>>> apply_defaults({"foo": ""}, {"foo": "bar"})
{'foo': ''}
>>> apply_defaults({}, {"foo": "bar"}, ignore = lambda k, _: k == "foo")
{}
>>> apply_defaults({"foo": ""}, {"foo": "bar"}, override = lambda _, v: v == "")
{'foo': 'bar'}
>>> apply_defaults({"foo": None}, {"foo": "bar"}, override = lambda _, v: v == "")
{'foo': None}
"""
for key, default_value in defaults.items():
if key in target:
if isinstance(target[key], dict) and isinstance(default_value, dict):
apply_defaults(target[key], default_value, ignore = ignore)
elif override(key, target[key]):
target[key] = copy.deepcopy(default_value)
elif not ignore(key, default_value):
target[key] = copy.deepcopy(default_value)
return target
def load_dict(filepath:str, content_label:str = "") -> dict[str, Any]:
"""
:raises FileNotFoundError
"""
data = load_dict_if_exists(filepath, content_label)
if data is None:
raise FileNotFoundError(filepath)
return data
def load_dict_if_exists(filepath:str, content_label:str = "") -> dict[str, Any] | None:
abs_filepath = files.abspath(filepath)
LOG.info("Loading %s[%s]...", content_label and content_label + _(" from ") or "", abs_filepath)
__, file_ext = os.path.splitext(filepath)
if file_ext not in (".json", ".yaml", ".yml"):
raise ValueError(_('Unsupported file type. The filename "%s" must end with *.json, *.yaml, or *.yml') % filepath)
if not os.path.exists(filepath):
return None
with open(filepath, encoding = "utf-8") as file:
return json.load(file) if filepath.endswith(".json") else YAML().load(file) # type: ignore[no-any-return] # mypy
def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "") -> dict[str, Any]:
"""
:raises FileNotFoundError
"""
LOG.debug("Loading %s[%s.%s]...", content_label and content_label + " from " or "", module.__name__, filename)
__, file_ext = os.path.splitext(filename)
if file_ext not in (".json", ".yaml", ".yml"):
raise ValueError(f'Unsupported file type. The filename "{filename}" must end with *.json, *.yaml, or *.yml')
content = get_resource_as_string(module, filename) # pylint: disable=deprecated-method
return json.loads(content) if filename.endswith(".json") else YAML().load(content) # type: ignore[no-any-return] # mypy
def save_dict(filepath:str, content:dict[str, Any]) -> None:
filepath = files.abspath(filepath)
LOG.info("Saving [%s]...", filepath)
with open(filepath, "w", encoding = "utf-8") as file:
if filepath.endswith(".json"):
file.write(json.dumps(content, indent = 2, ensure_ascii = False))
else:
yaml = YAML()
yaml.indent(mapping = 2, sequence = 4, offset = 2)
yaml.representer.add_representer(str, # use YAML | block style for multi-line strings
lambda dumper, data:
dumper.represent_scalar('tag:yaml.org,2002:str', data, style = '|' if '\n' in data else None)
)
yaml.allow_duplicate_keys = False
yaml.explicit_start = False
yaml.dump(content, file)
def safe_get(a_map:dict[Any, Any], *keys:str) -> Any:
"""
>>> safe_get({"foo": {}}, "foo", "bar") is None
True
>>> safe_get({"foo": {"bar": "some_value"}}, "foo", "bar")
'some_value'
"""
if a_map:
for key in keys:
try:
a_map = a_map[key]
except (KeyError, TypeError):
return None
return a_map

View File

@@ -0,0 +1,28 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import sys, traceback
from types import FrameType, TracebackType
from typing import Any, Final
from . import loggers
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
def on_exception(ex_type:type[BaseException], ex_value:Any, ex_traceback:TracebackType | None) -> None:
if issubclass(ex_type, KeyboardInterrupt):
sys.__excepthook__(ex_type, ex_value, ex_traceback)
elif loggers.is_debug(LOG) or isinstance(ex_value, (AttributeError, ImportError, NameError, TypeError)):
LOG.error("".join(traceback.format_exception(ex_type, ex_value, ex_traceback)))
elif isinstance(ex_value, AssertionError):
LOG.error(ex_value)
else:
LOG.error("%s: %s", ex_type.__name__, ex_value)
def on_sigint(_sig:int, _frame:FrameType | None) -> None:
LOG.warning("Aborted on user request.")
sys.exit(0)

View File

@@ -0,0 +1,22 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import os
def abspath(relative_path:str, relative_to:str | None = None) -> str:
"""
Makes a given relative path absolute based on another file/folder
"""
if not relative_to:
return os.path.abspath(relative_path)
if os.path.isabs(relative_path):
return relative_path
if os.path.isfile(relative_to):
relative_to = os.path.dirname(relative_to)
return os.path.normpath(os.path.join(relative_to, relative_path))

View File

@@ -7,14 +7,18 @@ import ctypes, gettext, inspect, locale, logging, os, sys
from collections.abc import Sized
from typing import Any, Final, NamedTuple
from . import resources, utils # pylint: disable=cyclic-import
from kleinanzeigen_bot import resources
from . import reflect
from . import dicts
__all__ = [
"Locale",
"get_translating_logger",
"get_current_locale",
"pluralize",
"set_current_locale",
"translate"
]
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
LOG:Final[logging.Logger] = logging.getLogger(__name__)
@@ -96,7 +100,7 @@ def translate(text:object, caller: inspect.FrameInfo | None) -> str:
global _TRANSLATIONS
if _TRANSLATIONS is None:
try:
_TRANSLATIONS = utils.load_dict_from_module(resources, f"translations.{_CURRENT_LOCALE[0]}.yaml")
_TRANSLATIONS = dicts.load_dict_from_module(resources, f"translations.{_CURRENT_LOCALE[0]}.yaml")
except FileNotFoundError:
_TRANSLATIONS = {}
@@ -108,7 +112,7 @@ def translate(text:object, caller: inspect.FrameInfo | None) -> str:
if module_name and module_name.endswith(f".{file_basename}"):
module_name = module_name[:-(len(file_basename) + 1)]
file_key = f"{file_basename}.py" if module_name == file_basename else f"{module_name}/{file_basename}.py"
translation = utils.safe_get(_TRANSLATIONS,
translation = dicts.safe_get(_TRANSLATIONS,
file_key,
caller.function,
text
@@ -116,8 +120,9 @@ def translate(text:object, caller: inspect.FrameInfo | None) -> str:
return translation if translation else text
# replace gettext.gettext with custom _translate function
_original_gettext = gettext.gettext
gettext.gettext = lambda message: translate(_original_gettext(message), utils.get_caller())
gettext.gettext = lambda message: translate(_original_gettext(message), reflect.get_caller())
for module_name, module in sys.modules.items():
if module is None or module_name in sys.builtin_module_names:
continue
@@ -127,19 +132,6 @@ for module_name, module in sys.modules.items():
setattr(module, 'gettext', gettext.gettext)
def get_translating_logger(name: str | None = None) -> logging.Logger:
class TranslatingLogger(logging.Logger):
def _log(self, level: int, msg: object, *args: Any, **kwargs: Any) -> None:
if level != logging.DEBUG: # debug messages should not be translated
msg = translate(msg, utils.get_caller(2))
super()._log(level, msg, *args, **kwargs)
logging.setLoggerClass(TranslatingLogger)
return logging.getLogger(name)
def get_current_locale() -> Locale:
return _CURRENT_LOCALE
@@ -161,7 +153,7 @@ def pluralize(noun:str, count:int | Sized, prefix_with_count:bool = True) -> str
>>> pluralize("field", 2, prefix_with_count = False)
'fields'
"""
noun = translate(noun, utils.get_caller())
noun = translate(noun, reflect.get_caller())
if isinstance(count, Sized):
count = len(count)

View File

@@ -0,0 +1,116 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import copy, logging, re, sys
from gettext import gettext as _
from typing import Any, Final # @UnusedImport
import colorama
from . import i18n, reflect
__all__ = [
"Logger",
"LOG_ROOT",
"DEBUG",
"INFO",
"configure_console_logging",
"flush_all_handlers",
"get_logger"
]
Logger = logging.Logger
DEBUG:Final[int] = logging.DEBUG
INFO:Final[int] = logging.INFO
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
def configure_console_logging() -> None:
class CustomFormatter(logging.Formatter):
LEVEL_COLORS = {
logging.DEBUG: colorama.Fore.BLACK + colorama.Style.BRIGHT,
logging.INFO: colorama.Fore.BLACK + colorama.Style.BRIGHT,
logging.WARNING: colorama.Fore.YELLOW,
logging.ERROR: colorama.Fore.RED,
logging.CRITICAL: colorama.Fore.RED,
}
MESSAGE_COLORS = {
logging.DEBUG: colorama.Fore.BLACK + colorama.Style.BRIGHT,
logging.INFO: colorama.Fore.RESET,
logging.WARNING: colorama.Fore.YELLOW,
logging.ERROR: colorama.Fore.RED,
logging.CRITICAL: colorama.Fore.RED + colorama.Style.BRIGHT,
}
VALUE_COLORS = {
logging.DEBUG: colorama.Fore.BLACK + colorama.Style.BRIGHT,
logging.INFO: colorama.Fore.MAGENTA,
logging.WARNING: colorama.Fore.MAGENTA,
logging.ERROR: colorama.Fore.MAGENTA,
logging.CRITICAL: colorama.Fore.MAGENTA,
}
def format(self, record:logging.LogRecord) -> str:
record = copy.deepcopy(record)
level_color = self.LEVEL_COLORS.get(record.levelno, "")
msg_color = self.MESSAGE_COLORS.get(record.levelno, "")
value_color = self.VALUE_COLORS.get(record.levelno, "")
# translate and colorize log level name
levelname = _(record.levelname) if record.levelno > logging.DEBUG else record.levelname
record.levelname = f"{level_color}[{levelname}]{colorama.Style.RESET_ALL}"
# highlight message values enclosed by [...], "...", and '...'
record.msg = re.sub(
r"\[([^\]]+)\]|\"([^\"]+)\"|\'([^\']+)\'",
lambda match: f"[{value_color}{match.group(1) or match.group(2) or match.group(3)}{colorama.Fore.RESET}{msg_color}]",
str(record.msg),
)
# colorize message
record.msg = f"{msg_color}{record.msg}{colorama.Style.RESET_ALL}"
return super().format(record)
formatter = CustomFormatter("%(levelname)s %(message)s")
stdout_log = logging.StreamHandler(sys.stderr)
stdout_log.setLevel(logging.DEBUG)
stdout_log.addFilter(type("", (logging.Filter,), {
"filter": lambda rec: rec.levelno <= logging.INFO
}))
stdout_log.setFormatter(formatter)
LOG_ROOT.addHandler(stdout_log)
stderr_log = logging.StreamHandler(sys.stderr)
stderr_log.setLevel(logging.WARNING)
stderr_log.setFormatter(formatter)
LOG_ROOT.addHandler(stderr_log)
def flush_all_handlers() -> None:
for handler in LOG_ROOT.handlers:
handler.flush()
def get_logger(name: str | None = None) -> logging.Logger:
"""
Returns a localized logger
"""
class TranslatingLogger(logging.Logger):
def _log(self, level: int, msg: object, *args: Any, **kwargs: Any) -> None:
if level != logging.DEBUG: # debug messages should not be translated
msg = i18n.translate(msg, reflect.get_caller(2))
super()._log(level, msg, *args, **kwargs)
logging.setLoggerClass(TranslatingLogger)
return logging.getLogger(name)
def is_debug(logger:Logger) -> bool:
return logger.isEnabledFor(logging.DEBUG)

View File

@@ -0,0 +1,90 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import asyncio, decimal, re, sys, time
from collections.abc import Callable
from datetime import datetime
from gettext import gettext as _
from typing import Any, TypeVar
# https://mypy.readthedocs.io/en/stable/generics.html#generic-functions
T = TypeVar('T')
def ensure(condition:Any | bool | Callable[[], bool], error_message:str, timeout:float = 5, poll_requency:float = 0.5) -> None:
"""
:param timeout: timespan in seconds until when the condition must become `True`, default is 5 seconds
:param poll_requency: sleep interval between calls in seconds, default is 0.5 seconds
:raises AssertionError: if condition did not come `True` within given timespan
"""
if not isinstance(condition, Callable): # type: ignore[arg-type] # https://github.com/python/mypy/issues/6864
if condition:
return
raise AssertionError(_(error_message))
if timeout < 0:
raise AssertionError("[timeout] must be >= 0")
if poll_requency < 0:
raise AssertionError("[poll_requency] must be >= 0")
start_at = time.time()
while not condition(): # type: ignore[operator]
elapsed = time.time() - start_at
if elapsed >= timeout:
raise AssertionError(_(error_message))
time.sleep(poll_requency)
def is_frozen() -> bool:
"""
>>> is_frozen()
False
"""
return getattr(sys, "frozen", False)
async def ainput(prompt: str) -> str:
return await asyncio.to_thread(input, f'{prompt} ')
def parse_decimal(number:float | int | str) -> decimal.Decimal:
"""
>>> parse_decimal(5)
Decimal('5')
>>> parse_decimal(5.5)
Decimal('5.5')
>>> parse_decimal("5.5")
Decimal('5.5')
>>> parse_decimal("5,5")
Decimal('5.5')
>>> parse_decimal("1.005,5")
Decimal('1005.5')
>>> parse_decimal("1,005.5")
Decimal('1005.5')
"""
try:
return decimal.Decimal(number)
except decimal.InvalidOperation as ex:
parts = re.split("[.,]", str(number))
try:
return decimal.Decimal("".join(parts[:-1]) + "." + parts[-1])
except decimal.InvalidOperation:
raise decimal.DecimalException(f"Invalid number format: {number}") from ex
def parse_datetime(date:datetime | str | None) -> datetime | None:
"""
>>> parse_datetime(datetime(2020, 1, 1, 0, 0))
datetime.datetime(2020, 1, 1, 0, 0)
>>> parse_datetime("2020-01-01T00:00:00")
datetime.datetime(2020, 1, 1, 0, 0)
>>> parse_datetime(None)
"""
if date is None:
return None
if isinstance(date, datetime):
return date
return datetime.fromisoformat(date)

View File

@@ -0,0 +1,20 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import socket
def is_port_open(host:str, port:int) -> bool:
s:socket.socket | None = None
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(1)
s.connect((host, port))
return True
except Exception:
return False
finally:
if s:
s.close()

View File

@@ -0,0 +1,26 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import inspect
from typing import Any
def get_caller(depth: int = 1) -> inspect.FrameInfo | None:
stack = inspect.stack()
try:
for frame in stack[depth + 1:]:
if frame.function and frame.function != "<lambda>":
return frame
return None
finally:
del stack # Clean up the stack to avoid reference cycles
def is_integer(obj:Any) -> bool:
try:
int(obj)
return True
except (ValueError, TypeError):
return False

View File

@@ -3,7 +3,7 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import asyncio, enum, inspect, json, logging, os, platform, secrets, shutil, time
import asyncio, enum, inspect, json, os, platform, secrets, shutil, time
from collections.abc import Callable, Coroutine, Iterable
from gettext import gettext as _
from typing import cast, Any, Final
@@ -19,8 +19,8 @@ from nodriver.core.config import Config
from nodriver.core.element import Element
from nodriver.core.tab import Tab as Page
from .i18n import get_translating_logger
from .utils import ensure, is_port_open, T
from . import loggers, net
from .misc import ensure, T
__all__ = [
"Browser",
@@ -32,7 +32,7 @@ __all__ = [
"WebScrapingMixin",
]
LOG:Final[logging.Logger] = get_translating_logger(__name__)
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
# see https://api.jquery.com/category/selectors/
METACHAR_ESCAPER:Final[dict[int, str]] = str.maketrans({ch: f'\\{ch}' for ch in '!"#$%&\'()*+,./:;<=>?@[\\]^`{|}~'})
@@ -95,7 +95,7 @@ class WebScrapingMixin:
if remote_port > 0:
LOG.info("Using existing browser process at %s:%s", remote_host, remote_port)
ensure(is_port_open(remote_host, remote_port),
ensure(net.is_port_open(remote_host, remote_port),
f"Browser process not reachable at {remote_host}:{remote_port}. " +
f"Start the browser with --remote-debugging-port={remote_port} or remove this port from your config.yaml")
cfg = Config(
@@ -146,7 +146,7 @@ class WebScrapingMixin:
LOG.info(" -> Custom Browser argument: %s", browser_arg)
browser_args.append(browser_arg)
if not LOG.isEnabledFor(logging.DEBUG):
if not loggers.is_debug(LOG):
browser_args.append("--log-level=3") # INFO: 0, WARNING: 1, ERROR: 2, FATAL: 3
if self.browser_config.user_data_dir:
@@ -483,7 +483,7 @@ class WebScrapingMixin:
async def web_sleep(self, min_ms:int = 1000, max_ms:int = 2500) -> None:
duration = max_ms <= min_ms and min_ms or secrets.randbelow(max_ms - min_ms) + min_ms
LOG.log(logging.INFO if duration > 1500 else logging.DEBUG, " ... pausing for %d ms ...", duration)
LOG.log(loggers.INFO if duration > 1500 else loggers.DEBUG, " ... pausing for %d ms ...", duration)
await self.page.sleep(duration / 1000)
async def web_request(self, url:str, method:str = "GET", valid_response_codes:int | Iterable[int] = 200,