refact: apply consistent formatting

This commit is contained in:
sebthom
2025-04-27 23:54:22 +02:00
parent fe33a0e461
commit ef923a8337
21 changed files with 1020 additions and 709 deletions

View File

@@ -20,8 +20,8 @@ Select the type(s) of change(s) included in this pull request:
Before requesting a review, confirm the following: Before requesting a review, confirm the following:
- [ ] I have reviewed my changes to ensure they meet the project's standards. - [ ] I have reviewed my changes to ensure they meet the project's standards.
- [ ] I have tested my changes and ensured that all tests pass (`pdm run test`). - [ ] I have tested my changes and ensured that all tests pass (`pdm run test`).
- [ ] I have formatted the code (`pdm run format`).
- [ ] I have verified that linting passes (`pdm run lint`). - [ ] I have verified that linting passes (`pdm run lint`).
- [ ] I have run security scans and addressed any identified issues (`pdm run audit`).
- [ ] I have updated documentation where necessary. - [ ] I have updated documentation where necessary.
By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.

View File

@@ -82,7 +82,7 @@ app = "python -m kleinanzeigen_bot"
compile.cmd = "python -O -m PyInstaller pyinstaller.spec --clean" compile.cmd = "python -O -m PyInstaller pyinstaller.spec --clean"
compile.env = {PYTHONHASHSEED = "1", SOURCE_DATE_EPOCH = "0"} # https://pyinstaller.org/en/stable/advanced-topics.html#creating-a-reproducible-build compile.env = {PYTHONHASHSEED = "1", SOURCE_DATE_EPOCH = "0"} # https://pyinstaller.org/en/stable/advanced-topics.html#creating-a-reproducible-build
debug = "python -m pdb -m kleinanzeigen_bot" debug = "python -m pdb -m kleinanzeigen_bot"
format = "autopep8 --recursive --in-place src tests --verbose" format = {shell = "autopep8 --recursive --in-place scripts src tests --verbose && python scripts/post_autopep8.py scripts src tests" }
lint = {shell = "ruff check && mypy && basedpyright" } lint = {shell = "ruff check && mypy && basedpyright" }
fix = {shell = "ruff check --fix" } fix = {shell = "ruff check --fix" }
test = "python -m pytest --capture=tee-sys -v" test = "python -m pytest --capture=tee-sys -v"
@@ -113,7 +113,7 @@ aggressive = 3
# https://docs.astral.sh/ruff/configuration/ # https://docs.astral.sh/ruff/configuration/
##################### #####################
[tool.ruff] [tool.ruff]
include = ["pyproject.toml", "src/**/*.py", "tests/**/*.py"] include = ["pyproject.toml", "scripts/**/*.py", "src/**/*.py", "tests/**/*.py"]
line-length = 160 line-length = 160
indent-width = 4 indent-width = 4
target-version = "py310" target-version = "py310"
@@ -208,14 +208,10 @@ ignore = [
"TC006", # Add quotes to type expression in `typing.cast()` "TC006", # Add quotes to type expression in `typing.cast()`
] ]
[tool.ruff.format]
quote-style = "double"
indent-style = "space"
line-ending = "native"
docstring-code-format = false
skip-magic-trailing-comma = false
[tool.ruff.lint.per-file-ignores] [tool.ruff.lint.per-file-ignores]
"scripts/**/*.py" = [
"INP001", # File `...` is part of an implicit namespace package. Add an `__init__.py`.
]
"tests/**/*.py" = [ "tests/**/*.py" = [
"ARG", "ARG",
"B", "B",
@@ -247,7 +243,7 @@ max-statements = 150 # max. number of statements in function / method body (R091
# https://mypy.readthedocs.io/en/stable/config_file.html # https://mypy.readthedocs.io/en/stable/config_file.html
#mypy_path = "$MYPY_CONFIG_FILE_DIR/tests/stubs" #mypy_path = "$MYPY_CONFIG_FILE_DIR/tests/stubs"
python_version = "3.10" python_version = "3.10"
files = "src,tests" files = "scripts,src,tests"
strict = true strict = true
disallow_untyped_calls = false disallow_untyped_calls = false
disallow_untyped_defs = true disallow_untyped_defs = true
@@ -264,7 +260,7 @@ verbosity = 0
##################### #####################
[tool.basedpyright] [tool.basedpyright]
# https://docs.basedpyright.com/latest/configuration/config-files/ # https://docs.basedpyright.com/latest/configuration/config-files/
include = ["src", "tests"] include = ["scripts", "src", "tests"]
defineConstant = { DEBUG = false } defineConstant = { DEBUG = false }
pythonVersion = "3.10" pythonVersion = "3.10"
typeCheckingMode = "standard" typeCheckingMode = "standard"

317
scripts/post_autopep8.py Normal file
View File

@@ -0,0 +1,317 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import ast, logging, re, sys # isort: skip
from pathlib import Path
from typing import Final, List, Protocol, Tuple
from typing_extensions import override
# Configure basic logging
logging.basicConfig(level = logging.INFO, format = "%(levelname)s: %(message)s")
LOG:Final[logging.Logger] = logging.getLogger(__name__)
class FormatterRule(Protocol):
"""
A code processor that can modify source lines based on the AST.
"""
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
...
class NoSpaceAfterColonInTypeAnnotationRule(FormatterRule):
"""
Removes whitespace between the colon (:) and the type annotation in variable and function parameter declarations.
This rule enforces `a:int` instead of `a: int`.
It is the opposite behavior of autopep8 rule E231.
Example:
# Before
def foo(a: int, b : str) -> None:
pass
# After
def foo(a:int, b:str) -> None:
pass
"""
@override
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
ann_positions:List[Tuple[int, int]] = []
for node in ast.walk(tree):
if isinstance(node, ast.arg) and node.annotation is not None:
ann_positions.append((node.annotation.lineno - 1, node.annotation.col_offset))
elif isinstance(node, ast.AnnAssign) and node.annotation is not None:
ann = node.annotation
ann_positions.append((ann.lineno - 1, ann.col_offset))
if not ann_positions:
return lines
new_lines:List[str] = []
for idx, line in enumerate(lines):
if line.lstrip().startswith("#"):
new_lines.append(line)
continue
chars = list(line)
offsets = [col for (lin, col) in ann_positions if lin == idx]
for col in sorted(offsets, reverse = True):
prefix = "".join(chars[:col])
colon_idx = prefix.rfind(":")
if colon_idx == -1:
continue
j = colon_idx + 1
while j < len(chars) and chars[j].isspace():
del chars[j]
new_lines.append("".join(chars))
return new_lines
class EqualSignSpacingInDefaultsAndNamedArgsRule(FormatterRule):
"""
Ensures that the '=' sign in default values for function parameters and keyword arguments in function calls
is surrounded by exactly one space on each side.
This rule enforces `a:int = 3` instead of `a:int=3`, and `x = 42` instead of `x=42` or `x =42`.
It is the opposite behavior of autopep8 rule E251.
Example:
# Before
def foo(a:int=3, b :str= "bar"):
pass
foo(x=42,y = "hello")
# After
def foo(a:int = 3, b:str = "bar"):
pass
foo(x = 42, y = "hello")
"""
@override
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
equals_positions:List[Tuple[int, int]] = []
for node in ast.walk(tree):
# --- Defaults in function definitions, async defs & lambdas ---
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.Lambda)):
# positional defaults
equals_positions.extend(
(d.lineno - 1, d.col_offset)
for d in node.args.defaults
if d is not None
)
# keyword-only defaults (only on defs, not lambdas)
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
equals_positions.extend(
(d.lineno - 1, d.col_offset)
for d in node.args.kw_defaults
if d is not None
)
# --- Keyword arguments in calls ---
if isinstance(node, ast.Call):
equals_positions.extend(
(kw.value.lineno - 1, kw.value.col_offset)
for kw in node.keywords
if kw.arg is not None
)
if not equals_positions:
return lines
new_lines:List[str] = []
for line_idx, line in enumerate(lines):
if line.lstrip().startswith("#"):
new_lines.append(line)
continue
chars = list(line)
equals_offsets = [col for (lineno, col) in equals_positions if lineno == line_idx]
for col in sorted(equals_offsets, reverse = True):
prefix = "".join(chars[:col])
equal_sign_idx = prefix.rfind("=")
if equal_sign_idx == -1:
continue
# remove spaces before '='
left_index = equal_sign_idx - 1
while left_index >= 0 and chars[left_index].isspace():
del chars[left_index]
equal_sign_idx -= 1
left_index -= 1
# remove spaces after '='
right_index = equal_sign_idx + 1
while right_index < len(chars) and chars[right_index].isspace():
del chars[right_index]
# insert single spaces
chars.insert(equal_sign_idx, " ")
chars.insert(equal_sign_idx + 2, " ")
new_lines.append("".join(chars))
return new_lines
class PreferDoubleQuotesRule(FormatterRule):
"""
Ensures string literals use double quotes unless the content contains a double quote.
Example:
# Before
foo = 'hello'
bar = 'a "quote" inside'
# After
foo = "hello"
bar = 'a "quote" inside' # kept as-is, because it contains a double quote
"""
@override
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
new_lines = lines.copy()
# Track how much each line has shifted so far
line_shifts:dict[int, int] = dict.fromkeys(range(len(lines)), 0)
# Build a parent map for f-string detection
parent_map:dict[ast.AST, ast.AST] = {}
for parent in ast.walk(tree):
for child in ast.iter_child_nodes(parent):
parent_map[child] = parent
def is_in_fstring(node:ast.AST) -> bool:
p = parent_map.get(node)
while p:
if isinstance(p, ast.JoinedStr):
return True
p = parent_map.get(p)
return False
# Regex to locate a single- or triple-quoted literal:
# (?P<prefix>[rRbuUfF]*) optional string flags (r, b, u, f, etc.), case-insensitive
# (?P<quote>'{3}|') the opening delimiter: either three single-quotes (''') or one ('),
# but never two in a row (so we won't mis-interpret adjacent quotes)
# (?P<content>.*?) the literal's content, non-greedy up to the next same delimiter
# (?P=quote) the matching closing delimiter (same length as the opener)
literal_re = re.compile(
r"(?P<prefix>[rRbuUfF]*)(?P<quote>'{3}|')(?P<content>.*?)(?P=quote)",
re.DOTALL,
)
for node in ast.walk(tree):
# only handle simple string constants
if not (isinstance(node, ast.Constant) and isinstance(node.value, str)):
continue
# skip anything inside an f-string, at any depth
if is_in_fstring(node):
continue
starting_line_number = getattr(node, "lineno", None)
starting_col_offset = getattr(node, "col_offset", None)
if starting_line_number is None or starting_col_offset is None:
continue
start_line = starting_line_number - 1
shift = line_shifts[start_line]
raw = new_lines[start_line]
# apply shift so we match against current edited line
idx = starting_col_offset + shift
if idx >= len(raw) or raw[idx] not in ("'", "r", "u", "b", "f", "R", "U", "B", "F"):
continue
# match literal at that column
m = literal_re.match(raw[idx:])
if not m:
continue
prefix = m.group("prefix")
quote = m.group("quote") # either "'" or "'''"
content = m.group("content") # what's inside
# skip if content has a double-quote already
if '"' in content:
continue
# build new literal with the same prefix, but doublequote delimiter
delim = '"' * len(quote)
escaped = content.replace(delim, "\\" + delim)
new_literal = f"{prefix}{delim}{escaped}{delim}"
literal_len = m.end() # how many chars we're replacing
before = raw[:idx]
after = raw[idx + literal_len:]
new_lines[start_line] = before + new_literal + after
# record shift delta for any further edits on this line
line_shifts[start_line] += len(new_literal) - literal_len
return new_lines
FORMATTER_RULES:List[FormatterRule] = [
NoSpaceAfterColonInTypeAnnotationRule(),
EqualSignSpacingInDefaultsAndNamedArgsRule(),
PreferDoubleQuotesRule(),
]
def format_file(path:Path) -> None:
# Read without newline conversion
with path.open("r", encoding = "utf-8", newline = "") as rf:
original_text = rf.read()
# Initial parse
try:
tree = ast.parse(original_text)
except SyntaxError as e:
LOG.error(
"Syntax error parsing %s[%d:%d]: %r -> %s",
path, e.lineno, e.offset, (e.text or "").rstrip(), e.msg
)
return
lines = original_text.splitlines(keepends = True)
formatted_text = original_text
success = True
for rule in FORMATTER_RULES:
lines = rule.apply(tree, lines, path)
formatted_text = "".join(lines)
# Re-parse the updated text
try:
tree = ast.parse(formatted_text)
except SyntaxError as e:
LOG.error(
"Syntax error after %s at %s[%d:%d]: %r -> %s",
rule.__class__.__name__, path, e.lineno, e.offset, (e.text or "").rstrip(), e.msg
)
success = False
break
if success and formatted_text != original_text:
with path.open("w", encoding = "utf-8", newline = "") as wf:
wf.write(formatted_text)
LOG.info("Formatted [%s].", path)
if __name__ == "__main__":
if len(sys.argv) < 2: # noqa: PLR2004 Magic value used in comparison
script_path = Path(sys.argv[0])
print(f"Usage: python {script_path} <directory1> [<directory2> ...]")
sys.exit(1)
for dir_arg in sys.argv[1:]:
root = Path(dir_arg)
if not root.exists():
LOG.warning("Directory [%s] does not exist, skipping...", root)
continue
for py_file in root.rglob("*.py"):
format_file(py_file)

View File

@@ -83,11 +83,11 @@ class KleinanzeigenBot(WebScrapingMixin):
self.configure_file_logging() self.configure_file_logging()
self.load_config() self.load_config()
if not (self.ads_selector in {'all', 'new', 'due', 'changed'} or if not (self.ads_selector in {"all", "new", "due", "changed"} or
any(selector in self.ads_selector.split(',') for selector in ('all', 'new', 'due', 'changed')) or any(selector in self.ads_selector.split(",") for selector in ("all", "new", "due", "changed")) or
re.compile(r'\d+[,\d+]*').search(self.ads_selector)): re.compile(r"\d+[,\d+]*").search(self.ads_selector)):
LOG.warning('You provided no ads selector. Defaulting to "due".') LOG.warning('You provided no ads selector. Defaulting to "due".')
self.ads_selector = 'due' self.ads_selector = "due"
if ads := self.load_ads(): if ads := self.load_ads():
await self.create_browser_session() await self.create_browser_session()
@@ -111,9 +111,9 @@ class KleinanzeigenBot(WebScrapingMixin):
case "download": case "download":
self.configure_file_logging() self.configure_file_logging()
# ad IDs depends on selector # ad IDs depends on selector
if not (self.ads_selector in {'all', 'new'} or re.compile(r'\d+[,\d+]*').search(self.ads_selector)): if not (self.ads_selector in {"all", "new"} or re.compile(r"\d+[,\d+]*").search(self.ads_selector)):
LOG.warning('You provided no ads selector. Defaulting to "new".') LOG.warning('You provided no ads selector. Defaulting to "new".')
self.ads_selector = 'new' self.ads_selector = "new"
self.load_config() self.load_config()
await self.create_browser_session() await self.create_browser_session()
await self.login() await self.login()
@@ -265,7 +265,7 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.info("App version: %s", self.get_version()) LOG.info("App version: %s", self.get_version())
LOG.info("Python version: %s", sys.version) LOG.info("Python version: %s", sys.version)
def __check_ad_republication(self, ad_cfg: dict[str, Any], ad_file_relative: str) -> bool: def __check_ad_republication(self, ad_cfg:dict[str, Any], ad_file_relative:str) -> bool:
""" """
Check if an ad needs to be republished based on republication interval. Check if an ad needs to be republished based on republication interval.
Returns True if the ad should be republished based on the interval. Returns True if the ad should be republished based on the interval.
@@ -295,7 +295,7 @@ class KleinanzeigenBot(WebScrapingMixin):
return True return True
def __check_ad_changed(self, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any], ad_file_relative: str) -> bool: def __check_ad_changed(self, ad_cfg:dict[str, Any], ad_cfg_orig:dict[str, Any], ad_file_relative:str) -> bool:
""" """
Check if an ad has been changed since last publication. Check if an ad has been changed since last publication.
Returns True if the ad has been changed. Returns True if the ad has been changed.
@@ -327,7 +327,7 @@ class KleinanzeigenBot(WebScrapingMixin):
data_root_dir = os.path.dirname(self.config_file_path) data_root_dir = os.path.dirname(self.config_file_path)
for file_pattern in self.config["ad_files"]: for file_pattern in self.config["ad_files"]:
for ad_file in glob.glob(file_pattern, root_dir = data_root_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB): for ad_file in glob.glob(file_pattern, root_dir = data_root_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB):
if not str(ad_file).endswith('ad_fields.yaml'): if not str(ad_file).endswith("ad_fields.yaml"):
ad_files[abspath(ad_file, relative_to = data_root_dir)] = ad_file ad_files[abspath(ad_file, relative_to = data_root_dir)] = ad_file
LOG.info(" -> found %s", pluralize("ad config file", ad_files)) LOG.info(" -> found %s", pluralize("ad config file", ad_files))
if not ad_files: if not ad_files:
@@ -335,13 +335,13 @@ class KleinanzeigenBot(WebScrapingMixin):
ids = [] ids = []
use_specific_ads = False use_specific_ads = False
selectors = self.ads_selector.split(',') selectors = self.ads_selector.split(",")
if re.compile(r'\d+[,\d+]*').search(self.ads_selector): if re.compile(r"\d+[,\d+]*").search(self.ads_selector):
ids = [int(n) for n in self.ads_selector.split(',')] ids = [int(n) for n in self.ads_selector.split(",")]
use_specific_ads = True use_specific_ads = True
LOG.info('Start fetch task for the ad(s) with id(s):') LOG.info("Start fetch task for the ad(s) with id(s):")
LOG.info(' | '.join([str(id_) for id_ in ids])) LOG.info(" | ".join([str(id_) for id_ in ids]))
ad_fields = dicts.load_dict_from_module(resources, "ad_fields.yaml") ad_fields = dicts.load_dict_from_module(resources, "ad_fields.yaml")
ads = [] ads = []
@@ -548,7 +548,7 @@ class KleinanzeigenBot(WebScrapingMixin):
async def is_logged_in(self) -> bool: async def is_logged_in(self) -> bool:
try: try:
user_info = await self.web_text(By.CLASS_NAME, "mr-medium") user_info = await self.web_text(By.CLASS_NAME, "mr-medium")
if self.config['login']['username'].lower() in user_info.lower(): if self.config["login"]["username"].lower() in user_info.lower():
return True return True
except TimeoutError: except TimeoutError:
return False return False
@@ -570,7 +570,7 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.info("DONE: Deleted %s", pluralize("ad", count)) LOG.info("DONE: Deleted %s", pluralize("ad", count))
LOG.info("############################################") LOG.info("############################################")
async def delete_ad(self, ad_cfg: dict[str, Any], published_ads: list[dict[str, Any]], *, delete_old_ads_by_title: bool) -> bool: async def delete_ad(self, ad_cfg:dict[str, Any], published_ads:list[dict[str, Any]], *, delete_old_ads_by_title:bool) -> bool:
LOG.info("Deleting ad '%s' if already present...", ad_cfg["title"]) LOG.info("Deleting ad '%s' if already present...", ad_cfg["title"])
await self.web_open(f"{self.root_url}/m-meine-anzeigen.html") await self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
@@ -627,7 +627,7 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.info("DONE: (Re-)published %s", pluralize("ad", count)) LOG.info("DONE: (Re-)published %s", pluralize("ad", count))
LOG.info("############################################") LOG.info("############################################")
async def publish_ad(self, ad_file:str, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any], published_ads: list[dict[str, Any]]) -> None: async def publish_ad(self, ad_file:str, ad_cfg:dict[str, Any], ad_cfg_orig:dict[str, Any], published_ads:list[dict[str, Any]]) -> None:
""" """
@param ad_cfg: the effective ad config (i.e. with default values applied etc.) @param ad_cfg: the effective ad config (i.e. with default values applied etc.)
@param ad_cfg_orig: the ad config as present in the YAML file @param ad_cfg_orig: the ad config as present in the YAML file
@@ -657,7 +657,7 @@ class KleinanzeigenBot(WebScrapingMixin):
############################# #############################
# set category # set category
############################# #############################
await self.__set_category(ad_cfg['category'], ad_file) await self.__set_category(ad_cfg["category"], ad_file)
############################# #############################
# set special attributes # set special attributes
@@ -674,7 +674,7 @@ class KleinanzeigenBot(WebScrapingMixin):
try: try:
await self.web_select(By.XPATH, "//select[contains(@id, '.versand_s')]", shipping_value) await self.web_select(By.XPATH, "//select[contains(@id, '.versand_s')]", shipping_value)
except TimeoutError: except TimeoutError:
LOG.warning("Failed to set shipping attribute for type '%s'!", ad_cfg['shipping_type']) LOG.warning("Failed to set shipping attribute for type '%s'!", ad_cfg["shipping_type"])
else: else:
await self.__set_shipping(ad_cfg) await self.__set_shipping(ad_cfg)
@@ -698,9 +698,9 @@ class KleinanzeigenBot(WebScrapingMixin):
if ad_cfg["shipping_type"] == "SHIPPING": if ad_cfg["shipping_type"] == "SHIPPING":
if sell_directly and ad_cfg["shipping_options"] and price_type in {"FIXED", "NEGOTIABLE"}: if sell_directly and ad_cfg["shipping_options"] and price_type in {"FIXED", "NEGOTIABLE"}:
if not await self.web_check(By.ID, "radio-buy-now-yes", Is.SELECTED): if not await self.web_check(By.ID, "radio-buy-now-yes", Is.SELECTED):
await self.web_click(By.ID, 'radio-buy-now-yes') await self.web_click(By.ID, "radio-buy-now-yes")
elif not await self.web_check(By.ID, "radio-buy-now-no", Is.SELECTED): elif not await self.web_check(By.ID, "radio-buy-now-no", Is.SELECTED):
await self.web_click(By.ID, 'radio-buy-now-no') await self.web_click(By.ID, "radio-buy-now-no")
except TimeoutError as ex: except TimeoutError as ex:
LOG.debug(ex, exc_info = True) LOG.debug(ex, exc_info = True)
@@ -832,7 +832,7 @@ class KleinanzeigenBot(WebScrapingMixin):
dicts.save_dict(ad_file, ad_cfg_orig) dicts.save_dict(ad_file, ad_cfg_orig)
async def __set_condition(self, condition_value: str) -> None: async def __set_condition(self, condition_value:str) -> None:
condition_mapping = { condition_mapping = {
"new_with_tag": "Neu mit Etikett", "new_with_tag": "Neu mit Etikett",
"new": "Neu", "new": "Neu",
@@ -862,7 +862,7 @@ class KleinanzeigenBot(WebScrapingMixin):
except TimeoutError as ex: except TimeoutError as ex:
raise TimeoutError(_("Unable to close condition dialog!")) from ex raise TimeoutError(_("Unable to close condition dialog!")) from ex
async def __set_category(self, category: str | None, ad_file:str) -> None: async def __set_category(self, category:str | None, ad_file:str) -> None:
# click on something to trigger automatic category detection # click on something to trigger automatic category detection
await self.web_click(By.ID, "pstad-descrptn") await self.web_click(By.ID, "pstad-descrptn")
@@ -884,9 +884,9 @@ class KleinanzeigenBot(WebScrapingMixin):
else: else:
ensure(is_category_auto_selected, f"No category specified in [{ad_file}] and automatic category detection failed") ensure(is_category_auto_selected, f"No category specified in [{ad_file}] and automatic category detection failed")
async def __set_special_attributes(self, ad_cfg: dict[str, Any]) -> None: async def __set_special_attributes(self, ad_cfg:dict[str, Any]) -> None:
if ad_cfg["special_attributes"]: if ad_cfg["special_attributes"]:
LOG.debug('Found %i special attributes', len(ad_cfg["special_attributes"])) LOG.debug("Found %i special attributes", len(ad_cfg["special_attributes"]))
for special_attribute_key, special_attribute_value in ad_cfg["special_attributes"].items(): for special_attribute_key, special_attribute_value in ad_cfg["special_attributes"].items():
if special_attribute_key == "condition_s": if special_attribute_key == "condition_s":
@@ -911,10 +911,10 @@ class KleinanzeigenBot(WebScrapingMixin):
try: try:
elem_id = special_attr_elem.attrs.id elem_id = special_attr_elem.attrs.id
if special_attr_elem.local_name == 'select': if special_attr_elem.local_name == "select":
LOG.debug("Attribute field '%s' seems to be a select...", special_attribute_key) LOG.debug("Attribute field '%s' seems to be a select...", special_attribute_key)
await self.web_select(By.ID, elem_id, special_attribute_value) await self.web_select(By.ID, elem_id, special_attribute_value)
elif special_attr_elem.attrs.type == 'checkbox': elif special_attr_elem.attrs.type == "checkbox":
LOG.debug("Attribute field '%s' seems to be a checkbox...", special_attribute_key) LOG.debug("Attribute field '%s' seems to be a checkbox...", special_attribute_key)
await self.web_click(By.ID, elem_id) await self.web_click(By.ID, elem_id)
else: else:
@@ -925,7 +925,7 @@ class KleinanzeigenBot(WebScrapingMixin):
raise TimeoutError(f"Failed to set special attribute [{special_attribute_key}]") from ex raise TimeoutError(f"Failed to set special attribute [{special_attribute_key}]") from ex
LOG.debug("Successfully set attribute field [%s] to [%s]...", special_attribute_key, special_attribute_value) LOG.debug("Successfully set attribute field [%s] to [%s]...", special_attribute_key, special_attribute_value)
async def __set_shipping(self, ad_cfg: dict[str, Any]) -> None: async def __set_shipping(self, ad_cfg:dict[str, Any]) -> None:
if ad_cfg["shipping_type"] == "PICKUP": if ad_cfg["shipping_type"] == "PICKUP":
try: try:
await self.web_click(By.XPATH, await self.web_click(By.XPATH,
@@ -960,7 +960,7 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.debug(ex, exc_info = True) LOG.debug(ex, exc_info = True)
raise TimeoutError(_("Unable to close shipping dialog!")) from ex raise TimeoutError(_("Unable to close shipping dialog!")) from ex
async def __set_shipping_options(self, ad_cfg: dict[str, Any]) -> None: async def __set_shipping_options(self, ad_cfg:dict[str, Any]) -> None:
shipping_options_mapping = { shipping_options_mapping = {
"DHL_2": ("Klein", "Paket 2 kg"), "DHL_2": ("Klein", "Paket 2 kg"),
"Hermes_Päckchen": ("Klein", "Päckchen"), "Hermes_Päckchen": ("Klein", "Päckchen"),
@@ -980,7 +980,7 @@ class KleinanzeigenBot(WebScrapingMixin):
except KeyError as ex: except KeyError as ex:
raise KeyError(f"Unknown shipping option(s), please refer to the documentation/README: {ad_cfg['shipping_options']}") from ex raise KeyError(f"Unknown shipping option(s), please refer to the documentation/README: {ad_cfg['shipping_options']}") from ex
shipping_sizes, shipping_packages = zip(*mapped_shipping_options, strict=False) shipping_sizes, shipping_packages = zip(*mapped_shipping_options, strict = False)
try: try:
shipping_size, = set(shipping_sizes) shipping_size, = set(shipping_sizes)
@@ -1025,7 +1025,7 @@ class KleinanzeigenBot(WebScrapingMixin):
except TimeoutError as ex: except TimeoutError as ex:
raise TimeoutError(_("Unable to close shipping dialog!")) from ex raise TimeoutError(_("Unable to close shipping dialog!")) from ex
async def __upload_images(self, ad_cfg: dict[str, Any]) -> None: async def __upload_images(self, ad_cfg:dict[str, Any]) -> None:
LOG.info(" -> found %s", pluralize("image", ad_cfg["images"])) LOG.info(" -> found %s", pluralize("image", ad_cfg["images"]))
image_upload:Element = await self.web_find(By.CSS_SELECTOR, "input[type=file]") image_upload:Element = await self.web_find(By.CSS_SELECTOR, "input[type=file]")
@@ -1036,7 +1036,7 @@ class KleinanzeigenBot(WebScrapingMixin):
async def assert_free_ad_limit_not_reached(self) -> None: async def assert_free_ad_limit_not_reached(self) -> None:
try: try:
await self.web_find(By.XPATH, '/html/body/div[1]/form/fieldset[6]/div[1]/header', timeout = 2) await self.web_find(By.XPATH, "/html/body/div[1]/form/fieldset[6]/div[1]/header", timeout = 2)
raise AssertionError(f"Cannot publish more ads. The monthly limit of free ads of account {self.config['login']['username']} is reached.") raise AssertionError(f"Cannot publish more ads. The monthly limit of free ads of account {self.config['login']['username']} is reached.")
except TimeoutError: except TimeoutError:
pass pass
@@ -1050,13 +1050,13 @@ class KleinanzeigenBot(WebScrapingMixin):
ad_extractor = extract.AdExtractor(self.browser, self.config) ad_extractor = extract.AdExtractor(self.browser, self.config)
# use relevant download routine # use relevant download routine
if self.ads_selector in {'all', 'new'}: # explore ads overview for these two modes if self.ads_selector in {"all", "new"}: # explore ads overview for these two modes
LOG.info('Scanning your ad overview...') LOG.info("Scanning your ad overview...")
own_ad_urls = await ad_extractor.extract_own_ads_urls() own_ad_urls = await ad_extractor.extract_own_ads_urls()
LOG.info('%s found.', pluralize("ad", len(own_ad_urls))) LOG.info("%s found.", pluralize("ad", len(own_ad_urls)))
if self.ads_selector == 'all': # download all of your adds if self.ads_selector == "all": # download all of your adds
LOG.info('Starting download of all ads...') LOG.info("Starting download of all ads...")
success_count = 0 success_count = 0
# call download function for each ad page # call download function for each ad page
@@ -1067,12 +1067,12 @@ class KleinanzeigenBot(WebScrapingMixin):
success_count += 1 success_count += 1
LOG.info("%d of %d ads were downloaded from your profile.", success_count, len(own_ad_urls)) LOG.info("%d of %d ads were downloaded from your profile.", success_count, len(own_ad_urls))
elif self.ads_selector == 'new': # download only unsaved ads elif self.ads_selector == "new": # download only unsaved ads
# check which ads already saved # check which ads already saved
saved_ad_ids = [] saved_ad_ids = []
ads = self.load_ads(ignore_inactive = False, check_id = False) # do not skip because of existing IDs ads = self.load_ads(ignore_inactive = False, check_id = False) # do not skip because of existing IDs
for ad in ads: for ad in ads:
ad_id = int(ad[2]['id']) ad_id = int(ad[2]["id"])
saved_ad_ids.append(ad_id) saved_ad_ids.append(ad_id)
# determine ad IDs from links # determine ad IDs from links
@@ -1083,28 +1083,28 @@ class KleinanzeigenBot(WebScrapingMixin):
for ad_url, ad_id in ad_id_by_url.items(): for ad_url, ad_id in ad_id_by_url.items():
# check if ad with ID already saved # check if ad with ID already saved
if ad_id in saved_ad_ids: if ad_id in saved_ad_ids:
LOG.info('The ad with id %d has already been saved.', ad_id) LOG.info("The ad with id %d has already been saved.", ad_id)
continue continue
if await ad_extractor.naviagte_to_ad_page(ad_url): if await ad_extractor.naviagte_to_ad_page(ad_url):
await ad_extractor.download_ad(ad_id) await ad_extractor.download_ad(ad_id)
new_count += 1 new_count += 1
LOG.info('%s were downloaded from your profile.', pluralize("new ad", new_count)) LOG.info("%s were downloaded from your profile.", pluralize("new ad", new_count))
elif re.compile(r'\d+[,\d+]*').search(self.ads_selector): # download ad(s) with specific id(s) elif re.compile(r"\d+[,\d+]*").search(self.ads_selector): # download ad(s) with specific id(s)
ids = [int(n) for n in self.ads_selector.split(',')] ids = [int(n) for n in self.ads_selector.split(",")]
LOG.info('Starting download of ad(s) with the id(s):') LOG.info("Starting download of ad(s) with the id(s):")
LOG.info(' | '.join([str(ad_id) for ad_id in ids])) LOG.info(" | ".join([str(ad_id) for ad_id in ids]))
for ad_id in ids: # call download routine for every id for ad_id in ids: # call download routine for every id
exists = await ad_extractor.naviagte_to_ad_page(ad_id) exists = await ad_extractor.naviagte_to_ad_page(ad_id)
if exists: if exists:
await ad_extractor.download_ad(ad_id) await ad_extractor.download_ad(ad_id)
LOG.info('Downloaded ad with id %d', ad_id) LOG.info("Downloaded ad with id %d", ad_id)
else: else:
LOG.error('The page with the id %d does not exist!', ad_id) LOG.error("The page with the id %d does not exist!", ad_id)
def __get_description_with_affixes(self, ad_cfg: dict[str, Any]) -> str: def __get_description_with_affixes(self, ad_cfg:dict[str, Any]) -> str:
"""Get the complete description with prefix and suffix applied. """Get the complete description with prefix and suffix applied.
Precedence (highest to lowest): Precedence (highest to lowest):

View File

@@ -9,7 +9,7 @@ from .utils import dicts
MAX_DESCRIPTION_LENGTH:Final[int] = 4000 MAX_DESCRIPTION_LENGTH:Final[int] = 4000
def calculate_content_hash(ad_cfg: dict[str, Any]) -> str: def calculate_content_hash(ad_cfg:dict[str, Any]) -> str:
"""Calculate a hash for user-modifiable fields of the ad.""" """Calculate a hash for user-modifiable fields of the ad."""
# Relevant fields for the hash # Relevant fields for the hash
@@ -40,7 +40,7 @@ def calculate_content_hash(ad_cfg: dict[str, Any]) -> str:
return hashlib.sha256(content_str.encode()).hexdigest() return hashlib.sha256(content_str.encode()).hexdigest()
def get_description_affixes(config: dict[str, Any], *, prefix: bool = True) -> str: def get_description_affixes(config:dict[str, Any], *, prefix:bool = True) -> str:
"""Get prefix or suffix for description with proper precedence. """Get prefix or suffix for description with proper precedence.
This function handles both the new flattened format and legacy nested format: This function handles both the new flattened format and legacy nested format:

View File

@@ -36,22 +36,22 @@ class AdExtractor(WebScrapingMixin):
""" """
# create sub-directory for ad(s) to download (if necessary): # create sub-directory for ad(s) to download (if necessary):
relative_directory = 'downloaded-ads' relative_directory = "downloaded-ads"
# make sure configured base directory exists # make sure configured base directory exists
if not os.path.exists(relative_directory) or not os.path.isdir(relative_directory): if not os.path.exists(relative_directory) or not os.path.isdir(relative_directory):
os.mkdir(relative_directory) os.mkdir(relative_directory)
LOG.info('Created ads directory at ./%s.', relative_directory) LOG.info("Created ads directory at ./%s.", relative_directory)
new_base_dir = os.path.join(relative_directory, f'ad_{ad_id}') new_base_dir = os.path.join(relative_directory, f'ad_{ad_id}')
if os.path.exists(new_base_dir): if os.path.exists(new_base_dir):
LOG.info('Deleting current folder of ad %s...', ad_id) LOG.info("Deleting current folder of ad %s...", ad_id)
shutil.rmtree(new_base_dir) shutil.rmtree(new_base_dir)
os.mkdir(new_base_dir) os.mkdir(new_base_dir)
LOG.info('New directory for ad created at %s.', new_base_dir) LOG.info("New directory for ad created at %s.", new_base_dir)
# call extraction function # call extraction function
info = await self._extract_ad_page_info(new_base_dir, ad_id) info = await self._extract_ad_page_info(new_base_dir, ad_id)
ad_file_path = new_base_dir + '/' + f'ad_{ad_id}.yaml' ad_file_path = new_base_dir + "/" + f'ad_{ad_id}.yaml'
dicts.save_dict(ad_file_path, info) dicts.save_dict(ad_file_path, info)
async def _download_images_from_ad_page(self, directory:str, ad_id:int) -> list[str]: async def _download_images_from_ad_page(self, directory:str, ad_id:int) -> list[str]:
@@ -67,18 +67,18 @@ class AdExtractor(WebScrapingMixin):
img_paths = [] img_paths = []
try: try:
# download all images from box # download all images from box
image_box = await self.web_find(By.CLASS_NAME, 'galleryimage-large') image_box = await self.web_find(By.CLASS_NAME, "galleryimage-large")
n_images = len(await self.web_find_all(By.CSS_SELECTOR, '.galleryimage-element[data-ix]', parent = image_box)) n_images = len(await self.web_find_all(By.CSS_SELECTOR, ".galleryimage-element[data-ix]", parent = image_box))
LOG.info('Found %s.', i18n.pluralize("image", n_images)) LOG.info("Found %s.", i18n.pluralize("image", n_images))
img_element:Element = await self.web_find(By.CSS_SELECTOR, 'div:nth-child(1) > img', parent = image_box) img_element:Element = await self.web_find(By.CSS_SELECTOR, "div:nth-child(1) > img", parent = image_box)
img_fn_prefix = 'ad_' + str(ad_id) + '__img' img_fn_prefix = "ad_" + str(ad_id) + "__img"
img_nr = 1 img_nr = 1
dl_counter = 0 dl_counter = 0
while img_nr <= n_images: # scrolling + downloading while img_nr <= n_images: # scrolling + downloading
current_img_url = img_element.attrs['src'] # URL of the image current_img_url = img_element.attrs["src"] # URL of the image
if current_img_url is None: if current_img_url is None:
continue continue
@@ -86,43 +86,43 @@ class AdExtractor(WebScrapingMixin):
content_type = response.info().get_content_type() content_type = response.info().get_content_type()
file_ending = mimetypes.guess_extension(content_type) file_ending = mimetypes.guess_extension(content_type)
img_path = f"{directory}/{img_fn_prefix}{img_nr}{file_ending}" img_path = f"{directory}/{img_fn_prefix}{img_nr}{file_ending}"
with open(img_path, 'wb') as f: with open(img_path, "wb") as f:
shutil.copyfileobj(response, f) shutil.copyfileobj(response, f)
dl_counter += 1 dl_counter += 1
img_paths.append(img_path.rsplit('/', maxsplit = 1)[-1]) img_paths.append(img_path.rsplit("/", maxsplit = 1)[-1])
# navigate to next image (if exists) # navigate to next image (if exists)
if img_nr < n_images: if img_nr < n_images:
try: try:
# click next button, wait, and re-establish reference # click next button, wait, and re-establish reference
await (await self.web_find(By.CLASS_NAME, 'galleryimage--navigation--next')).click() await (await self.web_find(By.CLASS_NAME, "galleryimage--navigation--next")).click()
new_div = await self.web_find(By.CSS_SELECTOR, f'div.galleryimage-element:nth-child({img_nr + 1})') new_div = await self.web_find(By.CSS_SELECTOR, f'div.galleryimage-element:nth-child({img_nr + 1})')
img_element = await self.web_find(By.TAG_NAME, 'img', parent = new_div) img_element = await self.web_find(By.TAG_NAME, "img", parent = new_div)
except TimeoutError: except TimeoutError:
LOG.error('NEXT button in image gallery somehow missing, aborting image fetching.') LOG.error("NEXT button in image gallery somehow missing, aborting image fetching.")
break break
img_nr += 1 img_nr += 1
LOG.info('Downloaded %s.', i18n.pluralize("image", dl_counter)) LOG.info("Downloaded %s.", i18n.pluralize("image", dl_counter))
except TimeoutError: # some ads do not require images except TimeoutError: # some ads do not require images
LOG.warning('No image area found. Continuing without downloading images.') LOG.warning("No image area found. Continuing without downloading images.")
return img_paths return img_paths
def extract_ad_id_from_ad_url(self, url: str) -> int: def extract_ad_id_from_ad_url(self, url:str) -> int:
""" """
Extracts the ID of an ad, given by its reference link. Extracts the ID of an ad, given by its reference link.
:param url: the URL to the ad page :param url: the URL to the ad page
:return: the ad ID, a (ten-digit) integer number :return: the ad ID, a (ten-digit) integer number
""" """
num_part = url.split('/')[-1] # suffix num_part = url.split("/")[-1] # suffix
id_part = num_part.split('-')[0] id_part = num_part.split("-")[0]
try: try:
path = url.split('?', 1)[0] # Remove query string if present path = url.split("?", 1)[0] # Remove query string if present
last_segment = path.rstrip('/').split('/')[-1] # Get last path component last_segment = path.rstrip("/").split("/")[-1] # Get last path component
id_part = last_segment.split('-')[0] # Extract part before first hyphen id_part = last_segment.split("-")[0] # Extract part before first hyphen
return int(id_part) return int(id_part)
except (IndexError, ValueError) as ex: except (IndexError, ValueError) as ex:
LOG.warning("Failed to extract ad ID from URL '%s': %s", url, ex) LOG.warning("Failed to extract ad ID from URL '%s': %s", url, ex)
@@ -135,41 +135,41 @@ class AdExtractor(WebScrapingMixin):
:return: the links to your ad pages :return: the links to your ad pages
""" """
# navigate to "your ads" page # navigate to "your ads" page
await self.web_open('https://www.kleinanzeigen.de/m-meine-anzeigen.html') await self.web_open("https://www.kleinanzeigen.de/m-meine-anzeigen.html")
await self.web_sleep(2000, 3000) # Consider replacing with explicit waits later await self.web_sleep(2000, 3000) # Consider replacing with explicit waits later
# Try to find the main ad list container first # Try to find the main ad list container first
try: try:
ad_list_container = await self.web_find(By.ID, 'my-manageitems-adlist') ad_list_container = await self.web_find(By.ID, "my-manageitems-adlist")
except TimeoutError: except TimeoutError:
LOG.warning('Ad list container #my-manageitems-adlist not found. Maybe no ads present?') LOG.warning("Ad list container #my-manageitems-adlist not found. Maybe no ads present?")
return [] return []
# --- Pagination handling --- # --- Pagination handling ---
multi_page = False multi_page = False
try: try:
# Correct selector: Use uppercase '.Pagination' # Correct selector: Use uppercase '.Pagination'
pagination_section = await self.web_find(By.CSS_SELECTOR, '.Pagination', timeout=10) # Increased timeout slightly pagination_section = await self.web_find(By.CSS_SELECTOR, ".Pagination", timeout = 10) # Increased timeout slightly
# Correct selector: Use 'aria-label' # Correct selector: Use 'aria-label'
# Also check if the button is actually present AND potentially enabled (though enabled check isn't strictly necessary here, only for clicking later) # Also check if the button is actually present AND potentially enabled (though enabled check isn't strictly necessary here, only for clicking later)
next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent=pagination_section) next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section)
if next_buttons: if next_buttons:
# Check if at least one 'Nächste' button is not disabled (optional but good practice) # Check if at least one 'Nächste' button is not disabled (optional but good practice)
enabled_next_buttons = [btn for btn in next_buttons if not btn.attrs.get('disabled')] enabled_next_buttons = [btn for btn in next_buttons if not btn.attrs.get("disabled")]
if enabled_next_buttons: if enabled_next_buttons:
multi_page = True multi_page = True
LOG.info('Multiple ad pages detected.') LOG.info("Multiple ad pages detected.")
else: else:
LOG.info('Next button found but is disabled. Assuming single effective page.') LOG.info("Next button found but is disabled. Assuming single effective page.")
else: else:
LOG.info('No "Naechste" button found within pagination. Assuming single page.') LOG.info('No "Naechste" button found within pagination. Assuming single page.')
except TimeoutError: except TimeoutError:
# This will now correctly trigger only if the '.Pagination' div itself is not found # This will now correctly trigger only if the '.Pagination' div itself is not found
LOG.info('No pagination controls found. Assuming single page.') LOG.info("No pagination controls found. Assuming single page.")
except Exception as e: except Exception as e:
LOG.exception("Error during pagination detection: %s", e) LOG.exception("Error during pagination detection: %s", e)
LOG.info('Assuming single page due to error during pagination check.') LOG.info("Assuming single page due to error during pagination check.")
# --- End Pagination Handling --- # --- End Pagination Handling ---
refs:list[str] = [] refs:list[str] = []
@@ -182,8 +182,8 @@ class AdExtractor(WebScrapingMixin):
# Re-find the ad list container on the current page/state # Re-find the ad list container on the current page/state
try: try:
ad_list_container = await self.web_find(By.ID, 'my-manageitems-adlist') ad_list_container = await self.web_find(By.ID, "my-manageitems-adlist")
list_items = await self.web_find_all(By.CLASS_NAME, 'cardbox', parent=ad_list_container) list_items = await self.web_find_all(By.CLASS_NAME, "cardbox", parent = ad_list_container)
LOG.info("Found %s ad items on page %s.", len(list_items), current_page) LOG.info("Found %s ad items on page %s.", len(list_items), current_page)
except TimeoutError: except TimeoutError:
LOG.warning("Could not find ad list container or items on page %s.", current_page) LOG.warning("Could not find ad list container or items on page %s.", current_page)
@@ -192,7 +192,7 @@ class AdExtractor(WebScrapingMixin):
# Extract references using the CORRECTED selector # Extract references using the CORRECTED selector
try: try:
page_refs = [ page_refs = [
(await self.web_find(By.CSS_SELECTOR, 'div.manageitems-item-ad h3 a.text-onSurface', parent=li)).attrs['href'] (await self.web_find(By.CSS_SELECTOR, "div.manageitems-item-ad h3 a.text-onSurface", parent = li)).attrs["href"]
for li in list_items for li in list_items
] ]
refs.extend(page_refs) refs.extend(page_refs)
@@ -207,12 +207,12 @@ class AdExtractor(WebScrapingMixin):
# --- Navigate to next page --- # --- Navigate to next page ---
try: try:
# Find the pagination section again (scope might have changed after scroll/wait) # Find the pagination section again (scope might have changed after scroll/wait)
pagination_section = await self.web_find(By.CSS_SELECTOR, '.Pagination', timeout=5) pagination_section = await self.web_find(By.CSS_SELECTOR, ".Pagination", timeout = 5)
# Find the "Next" button using the correct aria-label selector and ensure it's not disabled # Find the "Next" button using the correct aria-label selector and ensure it's not disabled
next_button_element = None next_button_element = None
possible_next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent=pagination_section) possible_next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section)
for btn in possible_next_buttons: for btn in possible_next_buttons:
if not btn.attrs.get('disabled'): # Check if the button is enabled if not btn.attrs.get("disabled"): # Check if the button is enabled
next_button_element = btn next_button_element = btn
break # Found an enabled next button break # Found an enabled next button
@@ -235,7 +235,7 @@ class AdExtractor(WebScrapingMixin):
# --- End Navigation --- # --- End Navigation ---
if not refs: if not refs:
LOG.warning('No ad URLs were extracted.') LOG.warning("No ad URLs were extracted.")
return refs return refs
@@ -246,27 +246,27 @@ class AdExtractor(WebScrapingMixin):
""" """
if reflect.is_integer(id_or_url): if reflect.is_integer(id_or_url):
# navigate to start page, otherwise page can be None! # navigate to start page, otherwise page can be None!
await self.web_open('https://www.kleinanzeigen.de/') await self.web_open("https://www.kleinanzeigen.de/")
# enter the ad ID into the search bar # enter the ad ID into the search bar
await self.web_input(By.ID, "site-search-query", id_or_url) await self.web_input(By.ID, "site-search-query", id_or_url)
# navigate to ad page and wait # navigate to ad page and wait
await self.web_check(By.ID, 'site-search-submit', Is.CLICKABLE) await self.web_check(By.ID, "site-search-submit", Is.CLICKABLE)
submit_button = await self.web_find(By.ID, 'site-search-submit') submit_button = await self.web_find(By.ID, "site-search-submit")
await submit_button.click() await submit_button.click()
else: else:
await self.web_open(str(id_or_url)) # navigate to URL directly given await self.web_open(str(id_or_url)) # navigate to URL directly given
await self.web_sleep() await self.web_sleep()
# handle the case that invalid ad ID given # handle the case that invalid ad ID given
if self.page.url.endswith('k0'): if self.page.url.endswith("k0"):
LOG.error('There is no ad under the given ID.') LOG.error("There is no ad under the given ID.")
return False return False
# close (warning) popup, if given # close (warning) popup, if given
try: try:
await self.web_find(By.ID, 'vap-ovrly-secure') await self.web_find(By.ID, "vap-ovrly-secure")
LOG.warning('A popup appeared!') LOG.warning("A popup appeared!")
await self.web_click(By.CLASS_NAME, 'mfp-close') await self.web_click(By.CLASS_NAME, "mfp-close")
await self.web_sleep() await self.web_sleep()
except TimeoutError: except TimeoutError:
pass pass
@@ -280,22 +280,22 @@ class AdExtractor(WebScrapingMixin):
:param ad_id: the ad ID, already extracted by a calling function :param ad_id: the ad ID, already extracted by a calling function
:return: a dictionary with the keys as given in an ad YAML, and their respective values :return: a dictionary with the keys as given in an ad YAML, and their respective values
""" """
info:dict[str, Any] = {'active': True} info:dict[str, Any] = {"active": True}
# extract basic info # extract basic info
info['type'] = 'OFFER' if 's-anzeige' in self.page.url else 'WANTED' info["type"] = "OFFER" if "s-anzeige" in self.page.url else "WANTED"
title:str = await self.web_text(By.ID, 'viewad-title') title:str = await self.web_text(By.ID, "viewad-title")
LOG.info('Extracting information from ad with title "%s"', title) LOG.info('Extracting information from ad with title "%s"', title)
info['category'] = await self._extract_category_from_ad_page() info["category"] = await self._extract_category_from_ad_page()
info['title'] = title info["title"] = title
# Get raw description text # Get raw description text
raw_description = (await self.web_text(By.ID, 'viewad-description-text')).strip() raw_description = (await self.web_text(By.ID, "viewad-description-text")).strip()
# Get prefix and suffix from config # Get prefix and suffix from config
prefix = get_description_affixes(self.config, prefix=True) prefix = get_description_affixes(self.config, prefix = True)
suffix = get_description_affixes(self.config, prefix=False) suffix = get_description_affixes(self.config, prefix = False)
# Remove prefix and suffix if present # Remove prefix and suffix if present
description_text = raw_description description_text = raw_description
@@ -304,38 +304,38 @@ class AdExtractor(WebScrapingMixin):
if suffix and description_text.endswith(suffix.strip()): if suffix and description_text.endswith(suffix.strip()):
description_text = description_text[:-len(suffix.strip())] description_text = description_text[:-len(suffix.strip())]
info['description'] = description_text.strip() info["description"] = description_text.strip()
info['special_attributes'] = await self._extract_special_attributes_from_ad_page() info["special_attributes"] = await self._extract_special_attributes_from_ad_page()
if "art_s" in info['special_attributes']: if "art_s" in info["special_attributes"]:
# change e.g. category "161/172" to "161/172/lautsprecher_kopfhoerer" # change e.g. category "161/172" to "161/172/lautsprecher_kopfhoerer"
info['category'] = f"{info['category']}/{info['special_attributes']['art_s']}" info["category"] = f"{info['category']}/{info['special_attributes']['art_s']}"
del info['special_attributes']['art_s'] del info["special_attributes"]["art_s"]
if "schaden_s" in info['special_attributes']: if "schaden_s" in info["special_attributes"]:
# change f to 'nein' and 't' to 'ja' # change f to 'nein' and 't' to 'ja'
info['special_attributes']['schaden_s'] = info['special_attributes']['schaden_s'].translate(str.maketrans({'t': 'ja', 'f': 'nein'})) info["special_attributes"]["schaden_s"] = info["special_attributes"]["schaden_s"].translate(str.maketrans({"t": "ja", "f": "nein"}))
info['price'], info['price_type'] = await self._extract_pricing_info_from_ad_page() info["price"], info["price_type"] = await self._extract_pricing_info_from_ad_page()
info['shipping_type'], info['shipping_costs'], info['shipping_options'] = await self._extract_shipping_info_from_ad_page() info["shipping_type"], info["shipping_costs"], info["shipping_options"] = await self._extract_shipping_info_from_ad_page()
info['sell_directly'] = await self._extract_sell_directly_from_ad_page() info["sell_directly"] = await self._extract_sell_directly_from_ad_page()
info['images'] = await self._download_images_from_ad_page(directory, ad_id) info["images"] = await self._download_images_from_ad_page(directory, ad_id)
info['contact'] = await self._extract_contact_from_ad_page() info["contact"] = await self._extract_contact_from_ad_page()
info['id'] = ad_id info["id"] = ad_id
try: # try different locations known for creation date element try: # try different locations known for creation date element
creation_date = await self.web_text(By.XPATH, creation_date = await self.web_text(By.XPATH,
'/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/div[1]/span') "/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/div[1]/span")
except TimeoutError: except TimeoutError:
creation_date = await self.web_text(By.CSS_SELECTOR, '#viewad-extra-info > div:nth-child(1) > span:nth-child(2)') creation_date = await self.web_text(By.CSS_SELECTOR, "#viewad-extra-info > div:nth-child(1) > span:nth-child(2)")
# convert creation date to ISO format # convert creation date to ISO format
created_parts = creation_date.split('.') created_parts = creation_date.split(".")
creation_date = created_parts[2] + '-' + created_parts[1] + '-' + created_parts[0] + ' 00:00:00' creation_date = created_parts[2] + "-" + created_parts[1] + "-" + created_parts[0] + " 00:00:00"
creation_date = datetime.fromisoformat(creation_date).isoformat() creation_date = datetime.fromisoformat(creation_date).isoformat()
info['created_on'] = creation_date info["created_on"] = creation_date
info['updated_on'] = None # will be set later on info["updated_on"] = None # will be set later on
# Calculate the initial hash for the downloaded ad # Calculate the initial hash for the downloaded ad
info['content_hash'] = calculate_content_hash(info) info["content_hash"] = calculate_content_hash(info)
return info return info
@@ -346,12 +346,12 @@ class AdExtractor(WebScrapingMixin):
:return: a category string of form abc/def, where a-f are digits :return: a category string of form abc/def, where a-f are digits
""" """
category_line = await self.web_find(By.ID, 'vap-brdcrmb') category_line = await self.web_find(By.ID, "vap-brdcrmb")
category_first_part = await self.web_find(By.CSS_SELECTOR, 'a:nth-of-type(2)', parent = category_line) category_first_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(2)", parent = category_line)
category_second_part = await self.web_find(By.CSS_SELECTOR, 'a:nth-of-type(3)', parent = category_line) category_second_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line)
cat_num_first = category_first_part.attrs['href'].split('/')[-1][1:] cat_num_first = category_first_part.attrs["href"].split("/")[-1][1:]
cat_num_second = category_second_part.attrs['href'].split('/')[-1][1:] cat_num_second = category_second_part.attrs["href"].split("/")[-1][1:]
category:str = cat_num_first + '/' + cat_num_second category:str = cat_num_first + "/" + cat_num_second
return category return category
@@ -368,7 +368,7 @@ class AdExtractor(WebScrapingMixin):
special_attributes_str = belen_conf["universalAnalyticsOpts"]["dimensions"]["dimension108"] special_attributes_str = belen_conf["universalAnalyticsOpts"]["dimensions"]["dimension108"]
special_attributes = dict(item.split(":") for item in special_attributes_str.split("|") if ":" in item) special_attributes = dict(item.split(":") for item in special_attributes_str.split("|") if ":" in item)
special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith('.versand_s') and k != "versand_s"} special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith(".versand_s") and k != "versand_s"}
return special_attributes return special_attributes
async def _extract_pricing_info_from_ad_page(self) -> tuple[float | None, str]: async def _extract_pricing_info_from_ad_page(self) -> tuple[float | None, str]:
@@ -378,24 +378,24 @@ class AdExtractor(WebScrapingMixin):
:return: the price of the offer (optional); and the pricing type :return: the price of the offer (optional); and the pricing type
""" """
try: try:
price_str:str = await self.web_text(By.ID, 'viewad-price') price_str:str = await self.web_text(By.ID, "viewad-price")
price:int | None = None price:int | None = None
match price_str.split()[-1]: match price_str.split()[-1]:
case '': case "":
price_type = 'FIXED' price_type = "FIXED"
# replace('.', '') is to remove the thousands separator before parsing as int # replace('.', '') is to remove the thousands separator before parsing as int
price = int(price_str.replace('.', '').split()[0]) price = int(price_str.replace(".", "").split()[0])
case 'VB': case "VB":
price_type = 'NEGOTIABLE' price_type = "NEGOTIABLE"
if price_str != "VB": # can be either 'X € VB', or just 'VB' if price_str != "VB": # can be either 'X € VB', or just 'VB'
price = int(price_str.replace('.', '').split()[0]) price = int(price_str.replace(".", "").split()[0])
case 'verschenken': case "verschenken":
price_type = 'GIVE_AWAY' price_type = "GIVE_AWAY"
case _: case _:
price_type = 'NOT_APPLICABLE' price_type = "NOT_APPLICABLE"
return price, price_type return price, price_type
except TimeoutError: # no 'commercial' ad, has no pricing box etc. except TimeoutError: # no 'commercial' ad, has no pricing box etc.
return None, 'NOT_APPLICABLE' return None, "NOT_APPLICABLE"
async def _extract_shipping_info_from_ad_page(self) -> tuple[str, float | None, list[str] | None]: async def _extract_shipping_info_from_ad_page(self) -> tuple[str, float | None, list[str] | None]:
""" """
@@ -403,17 +403,17 @@ class AdExtractor(WebScrapingMixin):
:return: the shipping type, and the shipping price (optional) :return: the shipping type, and the shipping price (optional)
""" """
ship_type, ship_costs, shipping_options = 'NOT_APPLICABLE', None, None ship_type, ship_costs, shipping_options = "NOT_APPLICABLE", None, None
try: try:
shipping_text = await self.web_text(By.CLASS_NAME, 'boxedarticle--details--shipping') shipping_text = await self.web_text(By.CLASS_NAME, "boxedarticle--details--shipping")
# e.g. '+ Versand ab 5,49 €' OR 'Nur Abholung' # e.g. '+ Versand ab 5,49 €' OR 'Nur Abholung'
if shipping_text == 'Nur Abholung': if shipping_text == "Nur Abholung":
ship_type = 'PICKUP' ship_type = "PICKUP"
elif shipping_text == 'Versand möglich': elif shipping_text == "Versand möglich":
ship_type = 'SHIPPING' ship_type = "SHIPPING"
elif '' in shipping_text: elif "" in shipping_text:
shipping_price_parts = shipping_text.split(' ') shipping_price_parts = shipping_text.split(" ")
ship_type = 'SHIPPING' ship_type = "SHIPPING"
ship_costs = float(misc.parse_decimal(shipping_price_parts[-2])) ship_costs = float(misc.parse_decimal(shipping_price_parts[-2]))
# reading shipping option from kleinanzeigen # reading shipping option from kleinanzeigen
@@ -425,7 +425,7 @@ class AdExtractor(WebScrapingMixin):
internal_shipping_opt = [x for x in shipping_costs if x["priceInEuroCent"] == ship_costs * 100] internal_shipping_opt = [x for x in shipping_costs if x["priceInEuroCent"] == ship_costs * 100]
if not internal_shipping_opt: if not internal_shipping_opt:
return 'NOT_APPLICABLE', ship_costs, shipping_options return "NOT_APPLICABLE", ship_costs, shipping_options
# map to internal shipping identifiers used by kleinanzeigen-bot # map to internal shipping identifiers used by kleinanzeigen-bot
shipping_option_mapping = { shipping_option_mapping = {
@@ -440,13 +440,13 @@ class AdExtractor(WebScrapingMixin):
"HERMES_004": "Hermes_L" "HERMES_004": "Hermes_L"
} }
shipping_option = shipping_option_mapping.get(internal_shipping_opt[0]['id']) shipping_option = shipping_option_mapping.get(internal_shipping_opt[0]["id"])
if not shipping_option: if not shipping_option:
return 'NOT_APPLICABLE', ship_costs, shipping_options return "NOT_APPLICABLE", ship_costs, shipping_options
shipping_options = [shipping_option] shipping_options = [shipping_option]
except TimeoutError: # no pricing box -> no shipping given except TimeoutError: # no pricing box -> no shipping given
ship_type = 'NOT_APPLICABLE' ship_type = "NOT_APPLICABLE"
return ship_type, ship_costs, shipping_options return ship_type, ship_costs, shipping_options
@@ -457,7 +457,7 @@ class AdExtractor(WebScrapingMixin):
:return: a boolean indicating whether the sell directly option is active (optional) :return: a boolean indicating whether the sell directly option is active (optional)
""" """
try: try:
buy_now_is_active:bool = 'Direkt kaufen' in (await self.web_text(By.ID, 'payment-buttons-sidebar')) buy_now_is_active:bool = "Direkt kaufen" in (await self.web_text(By.ID, "payment-buttons-sidebar"))
return buy_now_is_active return buy_now_is_active
except TimeoutError: except TimeoutError:
return None return None
@@ -469,34 +469,34 @@ class AdExtractor(WebScrapingMixin):
:return: a dictionary containing the address parts with their corresponding values :return: a dictionary containing the address parts with their corresponding values
""" """
contact:dict[str, (str | None)] = {} contact:dict[str, (str | None)] = {}
address_text = await self.web_text(By.ID, 'viewad-locality') address_text = await self.web_text(By.ID, "viewad-locality")
# format: e.g. (Beispiel Allee 42,) 12345 Bundesland - Stadt # format: e.g. (Beispiel Allee 42,) 12345 Bundesland - Stadt
try: try:
street = (await self.web_text(By.ID, 'street-address'))[:-1] # trailing comma street = (await self.web_text(By.ID, "street-address"))[:-1] # trailing comma
contact['street'] = street contact["street"] = street
except TimeoutError: except TimeoutError:
LOG.info('No street given in the contact.') LOG.info("No street given in the contact.")
(zipcode, location) = address_text.split(" ", 1) (zipcode, location) = address_text.split(" ", 1)
contact['zipcode'] = zipcode # e.g. 19372 contact["zipcode"] = zipcode # e.g. 19372
contact['location'] = location # e.g. Mecklenburg-Vorpommern - Steinbeck contact["location"] = location # e.g. Mecklenburg-Vorpommern - Steinbeck
contact_person_element:Element = await self.web_find(By.ID, 'viewad-contact') contact_person_element:Element = await self.web_find(By.ID, "viewad-contact")
name_element = await self.web_find(By.CLASS_NAME, 'iconlist-text', parent = contact_person_element) name_element = await self.web_find(By.CLASS_NAME, "iconlist-text", parent = contact_person_element)
try: try:
name = await self.web_text(By.TAG_NAME, 'a', parent = name_element) name = await self.web_text(By.TAG_NAME, "a", parent = name_element)
except TimeoutError: # edge case: name without link except TimeoutError: # edge case: name without link
name = await self.web_text(By.TAG_NAME, 'span', parent = name_element) name = await self.web_text(By.TAG_NAME, "span", parent = name_element)
contact['name'] = name contact["name"] = name
if 'street' not in contact: if "street" not in contact:
contact['street'] = None contact["street"] = None
try: # phone number is unusual for non-professional sellers today try: # phone number is unusual for non-professional sellers today
phone_element = await self.web_find(By.ID, 'viewad-contact-phone') phone_element = await self.web_find(By.ID, "viewad-contact-phone")
phone_number = await self.web_text(By.TAG_NAME, 'a', parent = phone_element) phone_number = await self.web_text(By.TAG_NAME, "a", parent = phone_element)
contact['phone'] = ''.join(phone_number.replace('-', ' ').split(' ')).replace('+49(0)', '0') contact["phone"] = "".join(phone_number.replace("-", " ").split(" ")).replace("+49(0)", "0")
except TimeoutError: except TimeoutError:
contact['phone'] = None # phone seems to be a deprecated feature (for non-professional users) contact["phone"] = None # phone seems to be a deprecated feature (for non-professional users)
# also see 'https://themen.kleinanzeigen.de/hilfe/deine-anzeigen/Telefon/ # also see 'https://themen.kleinanzeigen.de/hilfe/deine-anzeigen/Telefon/
return contact return contact

View File

@@ -96,7 +96,7 @@ def save_dict(filepath:str, content:dict[str, Any]) -> None:
yaml.indent(mapping = 2, sequence = 4, offset = 2) yaml.indent(mapping = 2, sequence = 4, offset = 2)
yaml.representer.add_representer(str, # use YAML | block style for multi-line strings yaml.representer.add_representer(str, # use YAML | block style for multi-line strings
lambda dumper, data: lambda dumper, data:
dumper.represent_scalar('tag:yaml.org,2002:str', data, style = '|' if '\n' in data else None) dumper.represent_scalar("tag:yaml.org,2002:str", data, style = "|" if "\n" in data else None)
) )
yaml.allow_duplicate_keys = False yaml.allow_duplicate_keys = False
yaml.explicit_start = False yaml.explicit_start = False

View File

@@ -3,14 +3,14 @@
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import sys, traceback # isort: skip import sys, traceback # isort: skip
from types import FrameType, TracebackType from types import FrameType, TracebackType
from typing import Any, Final from typing import Final
from . import loggers from . import loggers
LOG:Final[loggers.Logger] = loggers.get_logger(__name__) LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
def on_exception(ex_type: type[BaseException] | None, ex_value: BaseException | None, ex_traceback: TracebackType | None) -> None: def on_exception(ex_type:type[BaseException] | None, ex_value:BaseException | None, ex_traceback:TracebackType | None) -> None:
if ex_type is None or ex_value is None: if ex_type is None or ex_value is None:
LOG.error("Unknown exception occurred (missing exception info): ex_type=%s, ex_value=%s", ex_type, ex_value) LOG.error("Unknown exception occurred (missing exception info): ex_type=%s, ex_value=%s", ex_type, ex_value)
return return

View File

@@ -11,6 +11,6 @@ class KleinanzeigenBotError(RuntimeError):
class CaptchaEncountered(KleinanzeigenBotError): class CaptchaEncountered(KleinanzeigenBotError):
"""Raised when a Captcha was detected and auto-restart is enabled.""" """Raised when a Captcha was detected and auto-restart is enabled."""
def __init__(self, restart_delay: timedelta) -> None: def __init__(self, restart_delay:timedelta) -> None:
super().__init__() super().__init__()
self.restart_delay = restart_delay self.restart_delay = restart_delay

View File

@@ -42,7 +42,7 @@ class Locale(NamedTuple):
return f"{self.language}{region_part}{encoding_part}" return f"{self.language}{region_part}{encoding_part}"
@staticmethod @staticmethod
def of(locale_string: str) -> 'Locale': def of(locale_string:str) -> "Locale":
""" """
>>> Locale.of("en_US.UTF-8") >>> Locale.of("en_US.UTF-8")
Locale(language='en', region='US', encoding='UTF-8') Locale(language='en', region='US', encoding='UTF-8')
@@ -86,11 +86,11 @@ def _detect_locale() -> Locale:
return Locale.of(lang) if lang else Locale("en", "US", "UTF-8") return Locale.of(lang) if lang else Locale("en", "US", "UTF-8")
_CURRENT_LOCALE: Locale = _detect_locale() _CURRENT_LOCALE:Locale = _detect_locale()
_TRANSLATIONS: dict[str, Any] | None = None _TRANSLATIONS:dict[str, Any] | None = None
def translate(text:object, caller: inspect.FrameInfo | None) -> str: def translate(text:object, caller:inspect.FrameInfo | None) -> str:
text = str(text) text = str(text)
if not caller: if not caller:
return text return text
@@ -105,7 +105,7 @@ def translate(text:object, caller: inspect.FrameInfo | None) -> str:
if not _TRANSLATIONS: if not _TRANSLATIONS:
return text return text
module_name = caller.frame.f_globals.get('__name__') # pylint: disable=redefined-outer-name module_name = caller.frame.f_globals.get("__name__") # pylint: disable=redefined-outer-name
file_basename = os.path.splitext(os.path.basename(caller.filename))[0] file_basename = os.path.splitext(os.path.basename(caller.filename))[0]
if module_name and module_name.endswith(f".{file_basename}"): if module_name and module_name.endswith(f".{file_basename}"):
module_name = module_name[:-(len(file_basename) + 1)] module_name = module_name[:-(len(file_basename) + 1)]
@@ -124,9 +124,9 @@ gettext.gettext = lambda message: translate(_original_gettext(message), reflect.
for module_name, module in sys.modules.items(): for module_name, module in sys.modules.items():
if module is None or module_name in sys.builtin_module_names: if module is None or module_name in sys.builtin_module_names:
continue continue
if hasattr(module, '_') and module._ is _original_gettext: if hasattr(module, "_") and module._ is _original_gettext:
module._ = gettext.gettext # type: ignore[attr-defined] module._ = gettext.gettext # type: ignore[attr-defined]
if hasattr(module, 'gettext') and module.gettext is _original_gettext: if hasattr(module, "gettext") and module.gettext is _original_gettext:
module.gettext = gettext.gettext # type: ignore[attr-defined] module.gettext = gettext.gettext # type: ignore[attr-defined]
@@ -190,8 +190,8 @@ def pluralize(noun:str, count:int | Sized, *, prefix_with_count:bool = True) ->
# English # English
if len(noun) < 2: # noqa: PLR2004 Magic value used in comparison if len(noun) < 2: # noqa: PLR2004 Magic value used in comparison
return f"{prefix}{noun}s" return f"{prefix}{noun}s"
if noun.endswith(('s', 'sh', 'ch', 'x', 'z')): if noun.endswith(("s", "sh", "ch", "x", "z")):
return f"{prefix}{noun}es" return f"{prefix}{noun}es"
if noun.endswith('y') and noun[-2].lower() not in "aeiou": if noun.endswith("y") and noun[-2].lower() not in "aeiou":
return f"{prefix}{noun[:-1]}ies" return f"{prefix}{noun[:-1]}ies"
return f"{prefix}{noun}s" return f"{prefix}{noun}s"

View File

@@ -28,11 +28,11 @@ LOG_ROOT:Final[logging.Logger] = logging.getLogger()
class _MaxLevelFilter(logging.Filter): class _MaxLevelFilter(logging.Filter):
def __init__(self, level: int) -> None: def __init__(self, level:int) -> None:
super().__init__() super().__init__()
self.level = level self.level = level
def filter(self, record: logging.LogRecord) -> bool: def filter(self, record:logging.LogRecord) -> bool:
return record.levelno <= self.level return record.levelno <= self.level
@@ -104,7 +104,7 @@ def configure_console_logging() -> None:
class LogFileHandle: class LogFileHandle:
"""Encapsulates a log file handler with close and status methods.""" """Encapsulates a log file handler with close and status methods."""
def __init__(self, file_path: str, handler: RotatingFileHandler, logger: logging.Logger) -> None: def __init__(self, file_path:str, handler:RotatingFileHandler, logger:logging.Logger) -> None:
self.file_path = file_path self.file_path = file_path
self._handler:RotatingFileHandler | None = handler self._handler:RotatingFileHandler | None = handler
self._logger = logger self._logger = logger
@@ -146,14 +146,14 @@ def flush_all_handlers() -> None:
handler.flush() handler.flush()
def get_logger(name: str | None = None) -> logging.Logger: def get_logger(name:str | None = None) -> logging.Logger:
""" """
Returns a localized logger Returns a localized logger
""" """
class TranslatingLogger(logging.Logger): class TranslatingLogger(logging.Logger):
def _log(self, level: int, msg: object, *args: Any, **kwargs: Any) -> None: def _log(self, level:int, msg:object, *args:Any, **kwargs:Any) -> None:
if level != DEBUG: # debug messages should not be translated if level != DEBUG: # debug messages should not be translated
msg = i18n.translate(msg, reflect.get_caller(2)) msg = i18n.translate(msg, reflect.get_caller(2))
super()._log(level, msg, *args, **kwargs) super()._log(level, msg, *args, **kwargs)

View File

@@ -10,7 +10,7 @@ from typing import Any, TypeVar
from . import i18n from . import i18n
# https://mypy.readthedocs.io/en/stable/generics.html#generic-functions # https://mypy.readthedocs.io/en/stable/generics.html#generic-functions
T = TypeVar('T') T = TypeVar("T")
def ensure(condition:Any | bool | Callable[[], bool], error_message:str, timeout:float = 5, poll_requency:float = 0.5) -> None: def ensure(condition:Any | bool | Callable[[], bool], error_message:str, timeout:float = 5, poll_requency:float = 0.5) -> None:
@@ -49,7 +49,7 @@ def is_frozen() -> bool:
return getattr(sys, "frozen", False) return getattr(sys, "frozen", False)
async def ainput(prompt: str) -> str: async def ainput(prompt:str) -> str:
return await asyncio.to_thread(input, f'{prompt} ') return await asyncio.to_thread(input, f'{prompt} ')
@@ -84,10 +84,10 @@ def parse_decimal(number:float | int | str) -> decimal.Decimal:
def parse_datetime( def parse_datetime(
date: datetime | str | None, date:datetime | str | None,
*, *,
add_timezone_if_missing: bool = True, add_timezone_if_missing:bool = True,
use_local_timezone: bool = True use_local_timezone:bool = True
) -> datetime | None: ) -> datetime | None:
""" """
Parses a datetime object or ISO-formatted string. Parses a datetime object or ISO-formatted string.
@@ -152,22 +152,22 @@ def parse_duration(text:str) -> timedelta:
>>> parse_duration("invalid input") >>> parse_duration("invalid input")
datetime.timedelta(0) datetime.timedelta(0)
""" """
pattern = re.compile(r'(\d+)\s*([dhms])') pattern = re.compile(r"(\d+)\s*([dhms])")
parts = pattern.findall(text.lower()) parts = pattern.findall(text.lower())
kwargs: dict[str, int] = {} kwargs:dict[str, int] = {}
for value, unit in parts: for value, unit in parts:
if unit == 'd': if unit == "d":
kwargs['days'] = kwargs.get('days', 0) + int(value) kwargs["days"] = kwargs.get("days", 0) + int(value)
elif unit == 'h': elif unit == "h":
kwargs['hours'] = kwargs.get('hours', 0) + int(value) kwargs["hours"] = kwargs.get("hours", 0) + int(value)
elif unit == 'm': elif unit == "m":
kwargs['minutes'] = kwargs.get('minutes', 0) + int(value) kwargs["minutes"] = kwargs.get("minutes", 0) + int(value)
elif unit == 's': elif unit == "s":
kwargs['seconds'] = kwargs.get('seconds', 0) + int(value) kwargs["seconds"] = kwargs.get("seconds", 0) + int(value)
return timedelta(**kwargs) return timedelta(**kwargs)
def format_timedelta(td: timedelta) -> str: def format_timedelta(td:timedelta) -> str:
""" """
Formats a timedelta into a human-readable string using the pluralize utility. Formats a timedelta into a human-readable string using the pluralize utility.

View File

@@ -5,7 +5,7 @@ import inspect
from typing import Any from typing import Any
def get_caller(depth: int = 1) -> inspect.FrameInfo | None: def get_caller(depth:int = 1) -> inspect.FrameInfo | None:
stack = inspect.stack() stack = inspect.stack()
try: try:
for frame in stack[depth + 1:]: for frame in stack[depth + 1:]:

View File

@@ -165,7 +165,7 @@ class WebScrapingMixin:
prefs_file = os.path.join(profile_dir, "Preferences") prefs_file = os.path.join(profile_dir, "Preferences")
if not os.path.exists(prefs_file): if not os.path.exists(prefs_file):
LOG.info(" -> Setting chrome prefs [%s]...", prefs_file) LOG.info(" -> Setting chrome prefs [%s]...", prefs_file)
with open(prefs_file, "w", encoding = 'UTF-8') as fd: with open(prefs_file, "w", encoding = "UTF-8") as fd:
json.dump({ json.dump({
"credentials_enable_service": False, "credentials_enable_service": False,
"enable_do_not_track": True, "enable_do_not_track": True,
@@ -234,16 +234,16 @@ class WebScrapingMixin:
case "Windows": case "Windows":
browser_paths = [ browser_paths = [
os.environ.get("PROGRAMFILES", "C:\\Program Files") + r'\Microsoft\Edge\Application\msedge.exe', os.environ.get("PROGRAMFILES", "C:\\Program Files") + r"\Microsoft\Edge\Application\msedge.exe",
os.environ.get("PROGRAMFILES(X86)", "C:\\Program Files (x86)") + r'\Microsoft\Edge\Application\msedge.exe', os.environ.get("PROGRAMFILES(X86)", "C:\\Program Files (x86)") + r"\Microsoft\Edge\Application\msedge.exe",
os.environ["PROGRAMFILES"] + r'\Chromium\Application\chrome.exe', os.environ["PROGRAMFILES"] + r"\Chromium\Application\chrome.exe",
os.environ["PROGRAMFILES(X86)"] + r'\Chromium\Application\chrome.exe', os.environ["PROGRAMFILES(X86)"] + r"\Chromium\Application\chrome.exe",
os.environ["LOCALAPPDATA"] + r'\Chromium\Application\chrome.exe', os.environ["LOCALAPPDATA"] + r"\Chromium\Application\chrome.exe",
os.environ["PROGRAMFILES"] + r'\Chrome\Application\chrome.exe', os.environ["PROGRAMFILES"] + r"\Chrome\Application\chrome.exe",
os.environ["PROGRAMFILES(X86)"] + r'\Chrome\Application\chrome.exe', os.environ["PROGRAMFILES(X86)"] + r"\Chrome\Application\chrome.exe",
os.environ["LOCALAPPDATA"] + r'\Chrome\Application\chrome.exe', os.environ["LOCALAPPDATA"] + r"\Chrome\Application\chrome.exe",
shutil.which("msedge.exe"), shutil.which("msedge.exe"),
shutil.which("chromium.exe"), shutil.which("chromium.exe"),
@@ -259,8 +259,8 @@ class WebScrapingMixin:
raise AssertionError(_("Installed browser could not be detected")) raise AssertionError(_("Installed browser could not be detected"))
async def web_await(self, condition: Callable[[], T | Never | Coroutine[Any, Any, T | Never]], *, async def web_await(self, condition:Callable[[], T | Never | Coroutine[Any, Any, T | Never]], *,
timeout:int | float = 5, timeout_error_message: str = "") -> T: timeout:int | float = 5, timeout_error_message:str = "") -> T:
""" """
Blocks/waits until the given condition is met. Blocks/waits until the given condition is met.
@@ -523,7 +523,7 @@ class WebScrapingMixin:
return response return response
# pylint: enable=dangerous-default-value # pylint: enable=dangerous-default-value
async def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10_000, *, scroll_back_top: bool = False) -> None: async def web_scroll_page_down(self, scroll_length:int = 10, scroll_speed:int = 10_000, *, scroll_back_top:bool = False) -> None:
""" """
Smoothly scrolls the current web page down. Smoothly scrolls the current web page down.
@@ -532,7 +532,7 @@ class WebScrapingMixin:
:param scroll_back_top: whether to scroll the page back to the top after scrolling to the bottom :param scroll_back_top: whether to scroll the page back to the top after scrolling to the bottom
""" """
current_y_pos = 0 current_y_pos = 0
bottom_y_pos: int = await self.web_execute('document.body.scrollHeight') # get bottom position bottom_y_pos:int = await self.web_execute("document.body.scrollHeight") # get bottom position
while current_y_pos < bottom_y_pos: # scroll in steps until bottom reached while current_y_pos < bottom_y_pos: # scroll in steps until bottom reached
current_y_pos += scroll_length current_y_pos += scroll_length
await self.web_execute(f'window.scrollTo(0, {current_y_pos})') # scroll one step await self.web_execute(f'window.scrollTo(0, {current_y_pos})') # scroll one step

View File

@@ -1,8 +1,6 @@
""" # SPDX-FileCopyrightText: © Jens Bergmann and contributors
SPDX-FileCopyrightText: © Jens Bergmann and contributors # SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import os import os
from typing import Any, Final from typing import Any, Final
from unittest.mock import MagicMock from unittest.mock import MagicMock
@@ -21,7 +19,7 @@ LOG.setLevel(loggers.DEBUG)
@pytest.fixture @pytest.fixture
def test_data_dir(tmp_path: str) -> str: def test_data_dir(tmp_path:str) -> str:
"""Provides a temporary directory for test data. """Provides a temporary directory for test data.
This fixture uses pytest's built-in tmp_path fixture to create a temporary This fixture uses pytest's built-in tmp_path fixture to create a temporary
@@ -41,33 +39,33 @@ def sample_config() -> dict[str, Any]:
- Publishing settings - Publishing settings
""" """
return { return {
'login': { "login": {
'username': 'testuser', "username": "testuser",
'password': 'testpass' "password": "testpass"
}, },
'browser': { "browser": {
'arguments': [], "arguments": [],
'binary_location': None, "binary_location": None,
'extensions': [], "extensions": [],
'use_private_window': True, "use_private_window": True,
'user_data_dir': None, "user_data_dir": None,
'profile_name': None "profile_name": None
}, },
'ad_defaults': { "ad_defaults": {
'description': { "description": {
'prefix': 'Test Prefix', "prefix": "Test Prefix",
'suffix': 'Test Suffix' "suffix": "Test Suffix"
} }
}, },
'publishing': { "publishing": {
'delete_old_ads': 'BEFORE_PUBLISH', "delete_old_ads": "BEFORE_PUBLISH",
'delete_old_ads_by_title': False "delete_old_ads_by_title": False
} }
} }
@pytest.fixture @pytest.fixture
def test_bot(sample_config: dict[str, Any]) -> KleinanzeigenBot: def test_bot(sample_config:dict[str, Any]) -> KleinanzeigenBot:
"""Provides a fresh KleinanzeigenBot instance for all test classes. """Provides a fresh KleinanzeigenBot instance for all test classes.
Dependencies: Dependencies:
@@ -89,7 +87,7 @@ def browser_mock() -> MagicMock:
@pytest.fixture @pytest.fixture
def log_file_path(test_data_dir: str) -> str: def log_file_path(test_data_dir:str) -> str:
"""Provides a temporary path for log files. """Provides a temporary path for log files.
Dependencies: Dependencies:
@@ -99,7 +97,7 @@ def log_file_path(test_data_dir: str) -> str:
@pytest.fixture @pytest.fixture
def test_extractor(browser_mock: MagicMock, sample_config: dict[str, Any]) -> AdExtractor: def test_extractor(browser_mock:MagicMock, sample_config:dict[str, Any]) -> AdExtractor:
"""Provides a fresh AdExtractor instance for testing. """Provides a fresh AdExtractor instance for testing.
Dependencies: Dependencies:

View File

@@ -122,9 +122,9 @@ def test_calculate_content_hash_with_none_values() -> None:
) )
]) ])
def test_get_description_affixes( def test_get_description_affixes(
config: dict[str, Any], config:dict[str, Any],
prefix: bool, prefix:bool,
expected: str expected:str
) -> None: ) -> None:
"""Test get_description_affixes function with various inputs.""" """Test get_description_affixes function with various inputs."""
result = ads.get_description_affixes(config, prefix = prefix) result = ads.get_description_affixes(config, prefix = prefix)
@@ -157,7 +157,7 @@ def test_get_description_affixes(
"" ""
) )
]) ])
def test_get_description_affixes_edge_cases(config: dict[str, Any], prefix: bool, expected: str) -> None: def test_get_description_affixes_edge_cases(config:dict[str, Any], prefix:bool, expected:str) -> None:
"""Test edge cases for description affix handling.""" """Test edge cases for description affix handling."""
assert ads.get_description_affixes(config, prefix = prefix) == expected assert ads.get_description_affixes(config, prefix = prefix) == expected
@@ -170,7 +170,7 @@ def test_get_description_affixes_edge_cases(config: dict[str, Any], prefix: bool
(3.14, ""), # Test with a float (3.14, ""), # Test with a float
(set(), ""), # Test with an empty set (set(), ""), # Test with an empty set
]) ])
def test_get_description_affixes_edge_cases_non_dict(config: Any, expected: str) -> None: def test_get_description_affixes_edge_cases_non_dict(config:Any, expected:str) -> None:
"""Test get_description_affixes function with non-dict inputs.""" """Test get_description_affixes function with non-dict inputs."""
result = ads.get_description_affixes(config, prefix=True) result = ads.get_description_affixes(config, prefix = True)
assert result == expected assert result == expected

View File

@@ -12,21 +12,21 @@ class TestKleinanzeigenBot:
def bot(self) -> KleinanzeigenBot: def bot(self) -> KleinanzeigenBot:
return KleinanzeigenBot() return KleinanzeigenBot()
def test_parse_args_help(self, bot: KleinanzeigenBot) -> None: def test_parse_args_help(self, bot:KleinanzeigenBot) -> None:
"""Test parsing of help command""" """Test parsing of help command"""
bot.parse_args(["app", "help"]) bot.parse_args(["app", "help"])
assert bot.command == "help" assert bot.command == "help"
assert bot.ads_selector == "due" assert bot.ads_selector == "due"
assert not bot.keep_old_ads assert not bot.keep_old_ads
def test_parse_args_publish(self, bot: KleinanzeigenBot) -> None: def test_parse_args_publish(self, bot:KleinanzeigenBot) -> None:
"""Test parsing of publish command with options""" """Test parsing of publish command with options"""
bot.parse_args(["app", "publish", "--ads=all", "--keep-old"]) bot.parse_args(["app", "publish", "--ads=all", "--keep-old"])
assert bot.command == "publish" assert bot.command == "publish"
assert bot.ads_selector == "all" assert bot.ads_selector == "all"
assert bot.keep_old_ads assert bot.keep_old_ads
def test_get_version(self, bot: KleinanzeigenBot) -> None: def test_get_version(self, bot:KleinanzeigenBot) -> None:
"""Test version retrieval""" """Test version retrieval"""
version = bot.get_version() version = bot.get_version()
assert isinstance(version, str) assert isinstance(version, str)

View File

@@ -12,31 +12,31 @@ from kleinanzeigen_bot.utils.web_scraping_mixin import Browser, By, Element
class _DimensionsDict(TypedDict): class _DimensionsDict(TypedDict):
dimension108: str dimension108:str
class _UniversalAnalyticsOptsDict(TypedDict): class _UniversalAnalyticsOptsDict(TypedDict):
dimensions: _DimensionsDict dimensions:_DimensionsDict
class _BelenConfDict(TypedDict): class _BelenConfDict(TypedDict):
universalAnalyticsOpts: _UniversalAnalyticsOptsDict universalAnalyticsOpts:_UniversalAnalyticsOptsDict
class _SpecialAttributesDict(TypedDict, total = False): class _SpecialAttributesDict(TypedDict, total = False):
art_s: str art_s:str
condition_s: str condition_s:str
class _TestCaseDict(TypedDict): # noqa: PYI049 Private TypedDict `...` is never used class _TestCaseDict(TypedDict): # noqa: PYI049 Private TypedDict `...` is never used
belen_conf: _BelenConfDict belen_conf:_BelenConfDict
expected: _SpecialAttributesDict expected:_SpecialAttributesDict
class TestAdExtractorBasics: class TestAdExtractorBasics:
"""Basic synchronous tests for AdExtractor.""" """Basic synchronous tests for AdExtractor."""
def test_constructor(self, browser_mock: MagicMock, sample_config: dict[str, Any]) -> None: def test_constructor(self, browser_mock:MagicMock, sample_config:dict[str, Any]) -> None:
"""Test the constructor of AdExtractor""" """Test the constructor of AdExtractor"""
extractor = AdExtractor(browser_mock, sample_config) extractor = AdExtractor(browser_mock, sample_config)
assert extractor.browser == browser_mock assert extractor.browser == browser_mock
@@ -48,7 +48,7 @@ class TestAdExtractorBasics:
("https://www.kleinanzeigen.de/s-anzeige/invalid-id/abc", -1), ("https://www.kleinanzeigen.de/s-anzeige/invalid-id/abc", -1),
("https://www.kleinanzeigen.de/invalid-url", -1), ("https://www.kleinanzeigen.de/invalid-url", -1),
]) ])
def test_extract_ad_id_from_ad_url(self, test_extractor: AdExtractor, url: str, expected_id: int) -> None: def test_extract_ad_id_from_ad_url(self, test_extractor:AdExtractor, url:str, expected_id:int) -> None:
"""Test extraction of ad ID from different URL formats.""" """Test extraction of ad ID from different URL formats."""
assert test_extractor.extract_ad_id_from_ad_url(url) == expected_id assert test_extractor.extract_ad_id_from_ad_url(url) == expected_id
@@ -66,19 +66,19 @@ class TestAdExtractorPricing:
@pytest.mark.asyncio @pytest.mark.asyncio
# pylint: disable=protected-access # pylint: disable=protected-access
async def test_extract_pricing_info( async def test_extract_pricing_info(
self, test_extractor: AdExtractor, price_text: str, expected_price: int | None, expected_type: str self, test_extractor:AdExtractor, price_text:str, expected_price:int | None, expected_type:str
) -> None: ) -> None:
"""Test price extraction with different formats""" """Test price extraction with different formats"""
with patch.object(test_extractor, 'web_text', new_callable = AsyncMock, return_value = price_text): with patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = price_text):
price, price_type = await test_extractor._extract_pricing_info_from_ad_page() price, price_type = await test_extractor._extract_pricing_info_from_ad_page()
assert price == expected_price assert price == expected_price
assert price_type == expected_type assert price_type == expected_type
@pytest.mark.asyncio @pytest.mark.asyncio
# pylint: disable=protected-access # pylint: disable=protected-access
async def test_extract_pricing_info_timeout(self, test_extractor: AdExtractor) -> None: async def test_extract_pricing_info_timeout(self, test_extractor:AdExtractor) -> None:
"""Test price extraction when element is not found""" """Test price extraction when element is not found"""
with patch.object(test_extractor, 'web_text', new_callable = AsyncMock, side_effect = TimeoutError): with patch.object(test_extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError):
price, price_type = await test_extractor._extract_pricing_info_from_ad_page() price, price_type = await test_extractor._extract_pricing_info_from_ad_page()
assert price is None assert price is None
assert price_type == "NOT_APPLICABLE" assert price_type == "NOT_APPLICABLE"
@@ -95,15 +95,15 @@ class TestAdExtractorShipping:
@pytest.mark.asyncio @pytest.mark.asyncio
# pylint: disable=protected-access # pylint: disable=protected-access
async def test_extract_shipping_info( async def test_extract_shipping_info(
self, test_extractor: AdExtractor, shipping_text: str, expected_type: str, expected_cost: float | None self, test_extractor:AdExtractor, shipping_text:str, expected_type:str, expected_cost:float | None
) -> None: ) -> None:
"""Test shipping info extraction with different text formats.""" """Test shipping info extraction with different text formats."""
with patch.object(test_extractor, 'page', MagicMock()), \ with patch.object(test_extractor, "page", MagicMock()), \
patch.object(test_extractor, 'web_text', new_callable = AsyncMock, return_value = shipping_text), \ patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = shipping_text), \
patch.object(test_extractor, 'web_request', new_callable = AsyncMock) as mock_web_request: patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
if expected_cost: if expected_cost:
shipping_response: dict[str, Any] = { shipping_response:dict[str, Any] = {
"data": { "data": {
"shippingOptionsResponse": { "shippingOptionsResponse": {
"options": [ "options": [
@@ -125,7 +125,7 @@ class TestAdExtractorShipping:
@pytest.mark.asyncio @pytest.mark.asyncio
# pylint: disable=protected-access # pylint: disable=protected-access
async def test_extract_shipping_info_with_options(self, test_extractor: AdExtractor) -> None: async def test_extract_shipping_info_with_options(self, test_extractor:AdExtractor) -> None:
"""Test shipping info extraction with shipping options.""" """Test shipping info extraction with shipping options."""
shipping_response = { shipping_response = {
"content": json.dumps({ "content": json.dumps({
@@ -139,9 +139,9 @@ class TestAdExtractorShipping:
}) })
} }
with patch.object(test_extractor, 'page', MagicMock()), \ with patch.object(test_extractor, "page", MagicMock()), \
patch.object(test_extractor, 'web_text', new_callable = AsyncMock, return_value = "+ Versand ab 5,49 €"), \ patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 5,49 €"), \
patch.object(test_extractor, 'web_request', new_callable = AsyncMock, return_value = shipping_response): patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response):
shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page() shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page()
@@ -154,21 +154,21 @@ class TestAdExtractorNavigation:
"""Tests for navigation related functionality.""" """Tests for navigation related functionality."""
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_navigate_to_ad_page_with_url(self, test_extractor: AdExtractor) -> None: async def test_navigate_to_ad_page_with_url(self, test_extractor:AdExtractor) -> None:
"""Test navigation to ad page using a URL.""" """Test navigation to ad page using a URL."""
page_mock = AsyncMock() page_mock = AsyncMock()
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
with patch.object(test_extractor, 'page', page_mock), \ with patch.object(test_extractor, "page", page_mock), \
patch.object(test_extractor, 'web_open', new_callable = AsyncMock) as mock_web_open, \ patch.object(test_extractor, "web_open", new_callable = AsyncMock) as mock_web_open, \
patch.object(test_extractor, 'web_find', new_callable = AsyncMock, side_effect = TimeoutError): patch.object(test_extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError):
result = await test_extractor.naviagte_to_ad_page("https://www.kleinanzeigen.de/s-anzeige/test/12345") result = await test_extractor.naviagte_to_ad_page("https://www.kleinanzeigen.de/s-anzeige/test/12345")
assert result is True assert result is True
mock_web_open.assert_called_with("https://www.kleinanzeigen.de/s-anzeige/test/12345") mock_web_open.assert_called_with("https://www.kleinanzeigen.de/s-anzeige/test/12345")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_navigate_to_ad_page_with_id(self, test_extractor: AdExtractor) -> None: async def test_navigate_to_ad_page_with_id(self, test_extractor:AdExtractor) -> None:
"""Test navigation to ad page using an ID.""" """Test navigation to ad page using an ID."""
page_mock = AsyncMock() page_mock = AsyncMock()
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
@@ -186,7 +186,7 @@ class TestAdExtractorNavigation:
popup_close_mock.click = AsyncMock() popup_close_mock.click = AsyncMock()
popup_close_mock.apply = AsyncMock(return_value = True) popup_close_mock.apply = AsyncMock(return_value = True)
def find_mock(selector_type: By, selector_value: str, **_: Any) -> Element | None: def find_mock(selector_type:By, selector_value:str, **_:Any) -> Element | None:
if selector_type == By.ID and selector_value == "site-search-query": if selector_type == By.ID and selector_value == "site-search-query":
return input_mock return input_mock
if selector_type == By.ID and selector_value == "site-search-submit": if selector_type == By.ID and selector_value == "site-search-submit":
@@ -195,20 +195,20 @@ class TestAdExtractorNavigation:
return popup_close_mock return popup_close_mock
return None return None
with patch.object(test_extractor, 'page', page_mock), \ with patch.object(test_extractor, "page", page_mock), \
patch.object(test_extractor, 'web_open', new_callable = AsyncMock) as mock_web_open, \ patch.object(test_extractor, "web_open", new_callable = AsyncMock) as mock_web_open, \
patch.object(test_extractor, 'web_input', new_callable = AsyncMock), \ patch.object(test_extractor, "web_input", new_callable = AsyncMock), \
patch.object(test_extractor, 'web_check', new_callable = AsyncMock, return_value = True), \ patch.object(test_extractor, "web_check", new_callable = AsyncMock, return_value = True), \
patch.object(test_extractor, 'web_find', new_callable = AsyncMock, side_effect = find_mock): patch.object(test_extractor, "web_find", new_callable = AsyncMock, side_effect = find_mock):
result = await test_extractor.naviagte_to_ad_page(12345) result = await test_extractor.naviagte_to_ad_page(12345)
assert result is True assert result is True
mock_web_open.assert_called_with('https://www.kleinanzeigen.de/') mock_web_open.assert_called_with("https://www.kleinanzeigen.de/")
submit_button_mock.click.assert_awaited_once() submit_button_mock.click.assert_awaited_once()
popup_close_mock.click.assert_awaited_once() popup_close_mock.click.assert_awaited_once()
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_navigate_to_ad_page_with_popup(self, test_extractor: AdExtractor) -> None: async def test_navigate_to_ad_page_with_popup(self, test_extractor:AdExtractor) -> None:
"""Test navigation to ad page with popup handling.""" """Test navigation to ad page with popup handling."""
page_mock = AsyncMock() page_mock = AsyncMock()
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
@@ -218,18 +218,18 @@ class TestAdExtractorNavigation:
input_mock.send_keys = AsyncMock() input_mock.send_keys = AsyncMock()
input_mock.apply = AsyncMock(return_value = True) input_mock.apply = AsyncMock(return_value = True)
with patch.object(test_extractor, 'page', page_mock), \ with patch.object(test_extractor, "page", page_mock), \
patch.object(test_extractor, 'web_open', new_callable = AsyncMock), \ patch.object(test_extractor, "web_open", new_callable = AsyncMock), \
patch.object(test_extractor, 'web_find', new_callable = AsyncMock, return_value = input_mock), \ patch.object(test_extractor, "web_find", new_callable = AsyncMock, return_value = input_mock), \
patch.object(test_extractor, 'web_click', new_callable = AsyncMock) as mock_web_click, \ patch.object(test_extractor, "web_click", new_callable = AsyncMock) as mock_web_click, \
patch.object(test_extractor, 'web_check', new_callable = AsyncMock, return_value = True): patch.object(test_extractor, "web_check", new_callable = AsyncMock, return_value = True):
result = await test_extractor.naviagte_to_ad_page(12345) result = await test_extractor.naviagte_to_ad_page(12345)
assert result is True assert result is True
mock_web_click.assert_called_with(By.CLASS_NAME, 'mfp-close') mock_web_click.assert_called_with(By.CLASS_NAME, "mfp-close")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_navigate_to_ad_page_invalid_id(self, test_extractor: AdExtractor) -> None: async def test_navigate_to_ad_page_invalid_id(self, test_extractor:AdExtractor) -> None:
"""Test navigation to ad page with invalid ID.""" """Test navigation to ad page with invalid ID."""
page_mock = AsyncMock() page_mock = AsyncMock()
page_mock.url = "https://www.kleinanzeigen.de/s-suchen.html?k0" page_mock.url = "https://www.kleinanzeigen.de/s-suchen.html?k0"
@@ -240,22 +240,22 @@ class TestAdExtractorNavigation:
input_mock.apply = AsyncMock(return_value = True) input_mock.apply = AsyncMock(return_value = True)
input_mock.attrs = {} input_mock.attrs = {}
with patch.object(test_extractor, 'page', page_mock), \ with patch.object(test_extractor, "page", page_mock), \
patch.object(test_extractor, 'web_open', new_callable = AsyncMock), \ patch.object(test_extractor, "web_open", new_callable = AsyncMock), \
patch.object(test_extractor, 'web_find', new_callable = AsyncMock, return_value = input_mock): patch.object(test_extractor, "web_find", new_callable = AsyncMock, return_value = input_mock):
result = await test_extractor.naviagte_to_ad_page(99999) result = await test_extractor.naviagte_to_ad_page(99999)
assert result is False assert result is False
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_extract_own_ads_urls(self, test_extractor: AdExtractor) -> None: async def test_extract_own_ads_urls(self, test_extractor:AdExtractor) -> None:
"""Test extraction of own ads URLs - basic test.""" """Test extraction of own ads URLs - basic test."""
with patch.object(test_extractor, 'web_open', new_callable=AsyncMock), \ with patch.object(test_extractor, "web_open", new_callable = AsyncMock), \
patch.object(test_extractor, 'web_sleep', new_callable=AsyncMock), \ patch.object(test_extractor, "web_sleep", new_callable = AsyncMock), \
patch.object(test_extractor, 'web_find', new_callable=AsyncMock) as mock_web_find, \ patch.object(test_extractor, "web_find", new_callable = AsyncMock) as mock_web_find, \
patch.object(test_extractor, 'web_find_all', new_callable=AsyncMock) as mock_web_find_all, \ patch.object(test_extractor, "web_find_all", new_callable = AsyncMock) as mock_web_find_all, \
patch.object(test_extractor, 'web_scroll_page_down', new_callable=AsyncMock), \ patch.object(test_extractor, "web_scroll_page_down", new_callable = AsyncMock), \
patch.object(test_extractor, 'web_execute', new_callable=AsyncMock): patch.object(test_extractor, "web_execute", new_callable = AsyncMock):
# --- Setup mock objects for DOM elements --- # --- Setup mock objects for DOM elements ---
# Mocks needed for the actual execution flow # Mocks needed for the actual execution flow
@@ -263,7 +263,7 @@ class TestAdExtractorNavigation:
pagination_section_mock = MagicMock() pagination_section_mock = MagicMock()
cardbox_mock = MagicMock() # Represents the <li> element cardbox_mock = MagicMock() # Represents the <li> element
link_mock = MagicMock() # Represents the <a> element link_mock = MagicMock() # Represents the <a> element
link_mock.attrs = {'href': '/s-anzeige/test/12345'} # Configure the desired output link_mock.attrs = {"href": "/s-anzeige/test/12345"} # Configure the desired output
# Mocks for elements potentially checked but maybe not strictly needed for output # Mocks for elements potentially checked but maybe not strictly needed for output
# (depending on how robust the mocking is) # (depending on how robust the mocking is)
@@ -287,7 +287,7 @@ class TestAdExtractorNavigation:
# 2. Find all '.cardbox' elements (inside loop) # 2. Find all '.cardbox' elements (inside loop)
mock_web_find_all.side_effect = [ mock_web_find_all.side_effect = [
[], # Call 1: find 'button[aria-label="Nächste"]' -> No next button = single page [], # Call 1: find 'button[aria-label="Nächste"]' -> No next button = single page
[cardbox_mock] # Call 2: find .cardbox -> One ad item [cardbox_mock] # Call 2: find .cardbox -> One ad item
# Add more mocks here if pagination navigation calls web_find_all # Add more mocks here if pagination navigation calls web_find_all
] ]
@@ -295,20 +295,20 @@ class TestAdExtractorNavigation:
refs = await test_extractor.extract_own_ads_urls() refs = await test_extractor.extract_own_ads_urls()
# --- Assertions --- # --- Assertions ---
assert refs == ['/s-anzeige/test/12345'] # Now it should match assert refs == ["/s-anzeige/test/12345"] # Now it should match
# Optional: Verify calls were made as expected # Optional: Verify calls were made as expected
mock_web_find.assert_has_calls([ mock_web_find.assert_has_calls([
call(By.ID, 'my-manageitems-adlist'), call(By.ID, "my-manageitems-adlist"),
call(By.CSS_SELECTOR, '.Pagination', timeout=10), call(By.CSS_SELECTOR, ".Pagination", timeout = 10),
call(By.ID, 'my-manageitems-adlist'), call(By.ID, "my-manageitems-adlist"),
call(By.CSS_SELECTOR, 'div.manageitems-item-ad h3 a.text-onSurface', parent=cardbox_mock), call(By.CSS_SELECTOR, "div.manageitems-item-ad h3 a.text-onSurface", parent = cardbox_mock),
], any_order=False) # Check order if important ], any_order = False) # Check order if important
mock_web_find_all.assert_has_calls([ mock_web_find_all.assert_has_calls([
call(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent=pagination_section_mock), call(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section_mock),
call(By.CLASS_NAME, 'cardbox', parent=ad_list_container_mock), call(By.CLASS_NAME, "cardbox", parent = ad_list_container_mock),
], any_order=False) ], any_order = False)
class TestAdExtractorContent: class TestAdExtractorContent:
@@ -318,14 +318,14 @@ class TestAdExtractorContent:
@pytest.fixture @pytest.fixture
def extractor_with_config(self) -> AdExtractor: def extractor_with_config(self) -> AdExtractor:
"""Create extractor with specific config for testing prefix/suffix handling.""" """Create extractor with specific config for testing prefix/suffix handling."""
browser_mock = MagicMock(spec=Browser) browser_mock = MagicMock(spec = Browser)
return AdExtractor(browser_mock, {}) # Empty config, will be overridden in tests return AdExtractor(browser_mock, {}) # Empty config, will be overridden in tests
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_extract_description_with_affixes( async def test_extract_description_with_affixes(
self, self,
test_extractor: AdExtractor, test_extractor:AdExtractor,
description_test_cases: list[tuple[dict[str, Any], str, str]] description_test_cases:list[tuple[dict[str, Any], str, str]]
) -> None: ) -> None:
"""Test extraction of description with various prefix/suffix configurations.""" """Test extraction of description with various prefix/suffix configurations."""
# Mock the page # Mock the page
@@ -337,18 +337,18 @@ class TestAdExtractorContent:
test_extractor.config = config test_extractor.config = config
with patch.multiple(test_extractor, with patch.multiple(test_extractor,
web_text=AsyncMock(side_effect=[ web_text = AsyncMock(side_effect = [
"Test Title", # Title "Test Title", # Title
raw_description, # Raw description (without affixes) raw_description, # Raw description (without affixes)
"03.02.2025" # Creation date "03.02.2025" # Creation date
]), ]),
_extract_category_from_ad_page=AsyncMock(return_value="160"), _extract_category_from_ad_page = AsyncMock(return_value = "160"),
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}), _extract_special_attributes_from_ad_page = AsyncMock(return_value = {}),
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")), _extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")),
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)), _extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)),
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False), _extract_sell_directly_from_ad_page = AsyncMock(return_value = False),
_download_images_from_ad_page=AsyncMock(return_value=[]), _download_images_from_ad_page = AsyncMock(return_value = []),
_extract_contact_from_ad_page=AsyncMock(return_value={}) _extract_contact_from_ad_page = AsyncMock(return_value = {})
): ):
info = await test_extractor._extract_ad_page_info("/some/dir", 12345) info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
assert info["description"] == raw_description assert info["description"] == raw_description
@@ -356,7 +356,7 @@ class TestAdExtractorContent:
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_extract_description_with_affixes_timeout( async def test_extract_description_with_affixes_timeout(
self, self,
test_extractor: AdExtractor test_extractor:AdExtractor
) -> None: ) -> None:
"""Test handling of timeout when extracting description.""" """Test handling of timeout when extracting description."""
# Mock the page # Mock the page
@@ -365,18 +365,18 @@ class TestAdExtractorContent:
test_extractor.page = page_mock test_extractor.page = page_mock
with patch.multiple(test_extractor, with patch.multiple(test_extractor,
web_text=AsyncMock(side_effect=[ web_text = AsyncMock(side_effect = [
"Test Title", # Title succeeds "Test Title", # Title succeeds
TimeoutError("Timeout"), # Description times out TimeoutError("Timeout"), # Description times out
"03.02.2025" # Date succeeds "03.02.2025" # Date succeeds
]), ]),
_extract_category_from_ad_page=AsyncMock(return_value="160"), _extract_category_from_ad_page = AsyncMock(return_value = "160"),
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}), _extract_special_attributes_from_ad_page = AsyncMock(return_value = {}),
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")), _extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")),
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)), _extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)),
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False), _extract_sell_directly_from_ad_page = AsyncMock(return_value = False),
_download_images_from_ad_page=AsyncMock(return_value=[]), _download_images_from_ad_page = AsyncMock(return_value = []),
_extract_contact_from_ad_page=AsyncMock(return_value={}) _extract_contact_from_ad_page = AsyncMock(return_value = {})
): ):
try: try:
info = await test_extractor._extract_ad_page_info("/some/dir", 12345) info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
@@ -388,7 +388,7 @@ class TestAdExtractorContent:
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_extract_description_with_affixes_no_affixes( async def test_extract_description_with_affixes_no_affixes(
self, self,
test_extractor: AdExtractor test_extractor:AdExtractor
) -> None: ) -> None:
"""Test extraction of description without any affixes in config.""" """Test extraction of description without any affixes in config."""
# Mock the page # Mock the page
@@ -399,24 +399,24 @@ class TestAdExtractorContent:
raw_description = "Original Description" raw_description = "Original Description"
with patch.multiple(test_extractor, with patch.multiple(test_extractor,
web_text=AsyncMock(side_effect=[ web_text = AsyncMock(side_effect = [
"Test Title", # Title "Test Title", # Title
raw_description, # Description without affixes raw_description, # Description without affixes
"03.02.2025" # Creation date "03.02.2025" # Creation date
]), ]),
_extract_category_from_ad_page=AsyncMock(return_value="160"), _extract_category_from_ad_page = AsyncMock(return_value = "160"),
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}), _extract_special_attributes_from_ad_page = AsyncMock(return_value = {}),
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")), _extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")),
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)), _extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)),
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False), _extract_sell_directly_from_ad_page = AsyncMock(return_value = False),
_download_images_from_ad_page=AsyncMock(return_value=[]), _download_images_from_ad_page = AsyncMock(return_value = []),
_extract_contact_from_ad_page=AsyncMock(return_value={}) _extract_contact_from_ad_page = AsyncMock(return_value = {})
): ):
info = await test_extractor._extract_ad_page_info("/some/dir", 12345) info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
assert info["description"] == raw_description assert info["description"] == raw_description
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_extract_sell_directly(self, test_extractor: AdExtractor) -> None: async def test_extract_sell_directly(self, test_extractor:AdExtractor) -> None:
"""Test extraction of sell directly option.""" """Test extraction of sell directly option."""
test_cases = [ test_cases = [
("Direkt kaufen", True), ("Direkt kaufen", True),
@@ -424,11 +424,11 @@ class TestAdExtractorContent:
] ]
for text, expected in test_cases: for text, expected in test_cases:
with patch.object(test_extractor, 'web_text', new_callable=AsyncMock, return_value=text): with patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = text):
result = await test_extractor._extract_sell_directly_from_ad_page() result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is expected assert result is expected
with patch.object(test_extractor, 'web_text', new_callable=AsyncMock, side_effect=TimeoutError): with patch.object(test_extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError):
result = await test_extractor._extract_sell_directly_from_ad_page() result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None assert result is None
@@ -451,15 +451,15 @@ class TestAdExtractorCategory:
@pytest.mark.asyncio @pytest.mark.asyncio
# pylint: disable=protected-access # pylint: disable=protected-access
async def test_extract_category(self, extractor: AdExtractor) -> None: async def test_extract_category(self, extractor:AdExtractor) -> None:
"""Test category extraction from breadcrumb.""" """Test category extraction from breadcrumb."""
category_line = MagicMock() category_line = MagicMock()
first_part = MagicMock() first_part = MagicMock()
first_part.attrs = {'href': '/s-familie-kind-baby/c17'} first_part.attrs = {"href": "/s-familie-kind-baby/c17"}
second_part = MagicMock() second_part = MagicMock()
second_part.attrs = {'href': '/s-spielzeug/c23'} second_part.attrs = {"href": "/s-spielzeug/c23"}
with patch.object(extractor, 'web_find', new_callable = AsyncMock) as mock_web_find: with patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find:
mock_web_find.side_effect = [ mock_web_find.side_effect = [
category_line, category_line,
first_part, first_part,
@@ -469,15 +469,15 @@ class TestAdExtractorCategory:
result = await extractor._extract_category_from_ad_page() result = await extractor._extract_category_from_ad_page()
assert result == "17/23" assert result == "17/23"
mock_web_find.assert_any_call(By.ID, 'vap-brdcrmb') mock_web_find.assert_any_call(By.ID, "vap-brdcrmb")
mock_web_find.assert_any_call(By.CSS_SELECTOR, 'a:nth-of-type(2)', parent = category_line) mock_web_find.assert_any_call(By.CSS_SELECTOR, "a:nth-of-type(2)", parent = category_line)
mock_web_find.assert_any_call(By.CSS_SELECTOR, 'a:nth-of-type(3)', parent = category_line) mock_web_find.assert_any_call(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line)
@pytest.mark.asyncio @pytest.mark.asyncio
# pylint: disable=protected-access # pylint: disable=protected-access
async def test_extract_special_attributes_empty(self, extractor: AdExtractor) -> None: async def test_extract_special_attributes_empty(self, extractor:AdExtractor) -> None:
"""Test extraction of special attributes when empty.""" """Test extraction of special attributes when empty."""
with patch.object(extractor, 'web_execute', new_callable = AsyncMock) as mock_web_execute: with patch.object(extractor, "web_execute", new_callable = AsyncMock) as mock_web_execute:
mock_web_execute.return_value = { mock_web_execute.return_value = {
"universalAnalyticsOpts": { "universalAnalyticsOpts": {
"dimensions": { "dimensions": {
@@ -507,11 +507,11 @@ class TestAdExtractorContact:
@pytest.mark.asyncio @pytest.mark.asyncio
# pylint: disable=protected-access # pylint: disable=protected-access
async def test_extract_contact_info(self, extractor: AdExtractor) -> None: async def test_extract_contact_info(self, extractor:AdExtractor) -> None:
"""Test extraction of contact information.""" """Test extraction of contact information."""
with patch.object(extractor, 'page', MagicMock()), \ with patch.object(extractor, "page", MagicMock()), \
patch.object(extractor, 'web_text', new_callable = AsyncMock) as mock_web_text, \ patch.object(extractor, "web_text", new_callable = AsyncMock) as mock_web_text, \
patch.object(extractor, 'web_find', new_callable = AsyncMock) as mock_web_find: patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find:
mock_web_text.side_effect = [ mock_web_text.side_effect = [
"12345 Berlin - Mitte", "12345 Berlin - Mitte",
@@ -535,22 +535,22 @@ class TestAdExtractorContact:
@pytest.mark.asyncio @pytest.mark.asyncio
# pylint: disable=protected-access # pylint: disable=protected-access
async def test_extract_contact_info_timeout(self, extractor: AdExtractor) -> None: async def test_extract_contact_info_timeout(self, extractor:AdExtractor) -> None:
"""Test contact info extraction when elements are not found.""" """Test contact info extraction when elements are not found."""
with patch.object(extractor, 'page', MagicMock()), \ with patch.object(extractor, "page", MagicMock()), \
patch.object(extractor, 'web_text', new_callable = AsyncMock, side_effect = TimeoutError()), \ patch.object(extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError()), \
patch.object(extractor, 'web_find', new_callable = AsyncMock, side_effect = TimeoutError()), \ patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError()), \
pytest.raises(TimeoutError): pytest.raises(TimeoutError):
await extractor._extract_contact_from_ad_page() await extractor._extract_contact_from_ad_page()
@pytest.mark.asyncio @pytest.mark.asyncio
# pylint: disable=protected-access # pylint: disable=protected-access
async def test_extract_contact_info_with_phone(self, extractor: AdExtractor) -> None: async def test_extract_contact_info_with_phone(self, extractor:AdExtractor) -> None:
"""Test extraction of contact information including phone number.""" """Test extraction of contact information including phone number."""
with patch.object(extractor, 'page', MagicMock()), \ with patch.object(extractor, "page", MagicMock()), \
patch.object(extractor, 'web_text', new_callable = AsyncMock) as mock_web_text, \ patch.object(extractor, "web_text", new_callable = AsyncMock) as mock_web_text, \
patch.object(extractor, 'web_find', new_callable = AsyncMock) as mock_web_find: patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find:
mock_web_text.side_effect = [ mock_web_text.side_effect = [
"12345 Berlin - Mitte", "12345 Berlin - Mitte",
@@ -588,19 +588,19 @@ class TestAdExtractorDownload:
return AdExtractor(browser_mock, config_mock) return AdExtractor(browser_mock, config_mock)
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_download_ad_existing_directory(self, extractor: AdExtractor) -> None: async def test_download_ad_existing_directory(self, extractor:AdExtractor) -> None:
"""Test downloading an ad when the directory already exists.""" """Test downloading an ad when the directory already exists."""
with patch('os.path.exists') as mock_exists, \ with patch("os.path.exists") as mock_exists, \
patch('os.path.isdir') as mock_isdir, \ patch("os.path.isdir") as mock_isdir, \
patch('os.makedirs') as mock_makedirs, \ patch("os.makedirs") as mock_makedirs, \
patch('os.mkdir') as mock_mkdir, \ patch("os.mkdir") as mock_mkdir, \
patch('shutil.rmtree') as mock_rmtree, \ patch("shutil.rmtree") as mock_rmtree, \
patch('kleinanzeigen_bot.extract.dicts.save_dict', autospec = True) as mock_save_dict, \ patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
patch.object(extractor, '_extract_ad_page_info', new_callable = AsyncMock) as mock_extract: patch.object(extractor, "_extract_ad_page_info", new_callable = AsyncMock) as mock_extract:
base_dir = 'downloaded-ads' base_dir = "downloaded-ads"
ad_dir = os.path.join(base_dir, 'ad_12345') ad_dir = os.path.join(base_dir, "ad_12345")
yaml_path = os.path.join(ad_dir, 'ad_12345.yaml') yaml_path = os.path.join(ad_dir, "ad_12345.yaml")
# Configure mocks for directory checks # Configure mocks for directory checks
existing_paths = {base_dir, ad_dir} existing_paths = {base_dir, ad_dir}
@@ -632,32 +632,32 @@ class TestAdExtractorDownload:
# Workaround for hard-coded path in download_ad # Workaround for hard-coded path in download_ad
actual_call = mock_save_dict.call_args actual_call = mock_save_dict.call_args
assert actual_call is not None assert actual_call is not None
actual_path = actual_call[0][0].replace('/', os.path.sep) actual_path = actual_call[0][0].replace("/", os.path.sep)
assert actual_path == yaml_path assert actual_path == yaml_path
assert actual_call[0][1] == mock_extract.return_value assert actual_call[0][1] == mock_extract.return_value
@pytest.mark.asyncio @pytest.mark.asyncio
# pylint: disable=protected-access # pylint: disable=protected-access
async def test_download_images_no_images(self, extractor: AdExtractor) -> None: async def test_download_images_no_images(self, extractor:AdExtractor) -> None:
"""Test image download when no images are found.""" """Test image download when no images are found."""
with patch.object(extractor, 'web_find', new_callable = AsyncMock, side_effect = TimeoutError): with patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError):
image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345) image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345)
assert len(image_paths) == 0 assert len(image_paths) == 0
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_download_ad(self, extractor: AdExtractor) -> None: async def test_download_ad(self, extractor:AdExtractor) -> None:
"""Test downloading an entire ad.""" """Test downloading an entire ad."""
with patch('os.path.exists') as mock_exists, \ with patch("os.path.exists") as mock_exists, \
patch('os.path.isdir') as mock_isdir, \ patch("os.path.isdir") as mock_isdir, \
patch('os.makedirs') as mock_makedirs, \ patch("os.makedirs") as mock_makedirs, \
patch('os.mkdir') as mock_mkdir, \ patch("os.mkdir") as mock_mkdir, \
patch('shutil.rmtree') as mock_rmtree, \ patch("shutil.rmtree") as mock_rmtree, \
patch('kleinanzeigen_bot.extract.dicts.save_dict', autospec = True) as mock_save_dict, \ patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
patch.object(extractor, '_extract_ad_page_info', new_callable = AsyncMock) as mock_extract: patch.object(extractor, "_extract_ad_page_info", new_callable = AsyncMock) as mock_extract:
base_dir = 'downloaded-ads' base_dir = "downloaded-ads"
ad_dir = os.path.join(base_dir, 'ad_12345') ad_dir = os.path.join(base_dir, "ad_12345")
yaml_path = os.path.join(ad_dir, 'ad_12345.yaml') yaml_path = os.path.join(ad_dir, "ad_12345.yaml")
# Configure mocks for directory checks # Configure mocks for directory checks
mock_exists.return_value = False mock_exists.return_value = False
@@ -690,6 +690,6 @@ class TestAdExtractorDownload:
# Get the actual call arguments # Get the actual call arguments
actual_call = mock_save_dict.call_args actual_call = mock_save_dict.call_args
assert actual_call is not None assert actual_call is not None
actual_path = actual_call[0][0].replace('/', os.path.sep) actual_path = actual_call[0][0].replace("/", os.path.sep)
assert actual_path == yaml_path assert actual_path == yaml_path
assert actual_call[0][1] == mock_extract.return_value assert actual_call[0][1] == mock_extract.return_value

View File

@@ -13,7 +13,7 @@ from kleinanzeigen_bot.utils import i18n
("fr_CA", ("fr", "CA", "UTF-8")), # Test with language + region, no encoding ("fr_CA", ("fr", "CA", "UTF-8")), # Test with language + region, no encoding
("pt_BR.iso8859-1", ("pt", "BR", "ISO8859-1")), # Test with language + region + encoding ("pt_BR.iso8859-1", ("pt", "BR", "ISO8859-1")), # Test with language + region + encoding
]) ])
def test_detect_locale(monkeypatch: MonkeyPatch, lang: str | None, expected: i18n.Locale) -> None: def test_detect_locale(monkeypatch:MonkeyPatch, lang:str | None, expected:i18n.Locale) -> None:
""" """
Pytest test case to verify detect_system_language() behavior under various LANG values. Pytest test case to verify detect_system_language() behavior under various LANG values.
""" """
@@ -49,7 +49,7 @@ def test_pluralize(
noun:str, noun:str,
count:int, count:int,
prefix_with_count:bool, prefix_with_count:bool,
expected: str expected:str
) -> None: ) -> None:
i18n.set_current_locale(i18n.Locale(lang, "US", "UTF_8")) i18n.set_current_locale(i18n.Locale(lang, "US", "UTF_8"))

File diff suppressed because it is too large Load Diff

View File

@@ -26,12 +26,12 @@ from ruamel.yaml import YAML
from kleinanzeigen_bot import resources from kleinanzeigen_bot import resources
# Messages that are intentionally not translated (internal/debug messages) # Messages that are intentionally not translated (internal/debug messages)
EXCLUDED_MESSAGES: dict[str, set[str]] = { EXCLUDED_MESSAGES:dict[str, set[str]] = {
"kleinanzeigen_bot/__init__.py": {"############################################"} "kleinanzeigen_bot/__init__.py": {"############################################"}
} }
# Special modules that are known to be needed even if not in messages_by_file # Special modules that are known to be needed even if not in messages_by_file
KNOWN_NEEDED_MODULES = {'getopt.py'} KNOWN_NEEDED_MODULES = {"getopt.py"}
# Type aliases for better readability # Type aliases for better readability
ModulePath = str ModulePath = str
@@ -45,12 +45,12 @@ MissingDict = dict[FunctionName, dict[Message, set[Message]]]
@dataclass @dataclass
class MessageLocation: class MessageLocation:
"""Represents the location of a message in the codebase.""" """Represents the location of a message in the codebase."""
module: str module:str
function: str function:str
message: str message:str
def _get_function_name(node: ast.AST) -> str: def _get_function_name(node:ast.AST) -> str:
""" """
Get the name of the function containing this AST node. Get the name of the function containing this AST node.
This matches i18n.py's behavior which only uses the function name for translation lookups. This matches i18n.py's behavior which only uses the function name for translation lookups.
@@ -63,14 +63,14 @@ def _get_function_name(node: ast.AST) -> str:
The function name or "module" for module-level code The function name or "module" for module-level code
""" """
def find_parent_context(n: ast.AST) -> tuple[str | None, str | None]: def find_parent_context(n:ast.AST) -> tuple[str | None, str | None]:
"""Find the containing class and function names.""" """Find the containing class and function names."""
class_name = None class_name = None
function_name = None function_name = None
current = n current = n
while hasattr(current, '_parent'): while hasattr(current, "_parent"):
current = getattr(current, '_parent') current = getattr(current, "_parent")
if isinstance(current, ast.ClassDef) and not class_name: if isinstance(current, ast.ClassDef) and not class_name:
class_name = current.name class_name = current.name
elif isinstance(current, ast.FunctionDef) or isinstance(current, ast.AsyncFunctionDef) and not function_name: elif isinstance(current, ast.FunctionDef) or isinstance(current, ast.AsyncFunctionDef) and not function_name:
@@ -84,7 +84,7 @@ def _get_function_name(node: ast.AST) -> str:
return "module" # For module-level code return "module" # For module-level code
def _extract_log_messages(file_path: str, exclude_debug:bool = False) -> MessageDict: def _extract_log_messages(file_path:str, exclude_debug:bool = False) -> MessageDict:
""" """
Extract all translatable messages from a Python file with their function context. Extract all translatable messages from a Python file with their function context.
@@ -94,27 +94,27 @@ def _extract_log_messages(file_path: str, exclude_debug:bool = False) -> Message
Returns: Returns:
Dictionary mapping function names to their messages Dictionary mapping function names to their messages
""" """
with open(file_path, 'r', encoding = 'utf-8') as file: with open(file_path, "r", encoding = "utf-8") as file:
tree = ast.parse(file.read(), filename = file_path) tree = ast.parse(file.read(), filename = file_path)
# Add parent references for context tracking # Add parent references for context tracking
for parent in ast.walk(tree): for parent in ast.walk(tree):
for child in ast.iter_child_nodes(parent): for child in ast.iter_child_nodes(parent):
setattr(child, '_parent', parent) setattr(child, "_parent", parent)
messages: MessageDict = defaultdict(lambda: defaultdict(set)) messages:MessageDict = defaultdict(lambda: defaultdict(set))
def add_message(function: str, msg: str) -> None: def add_message(function:str, msg:str) -> None:
"""Add a message to the messages dictionary.""" """Add a message to the messages dictionary."""
if function not in messages: if function not in messages:
messages[function] = defaultdict(set) messages[function] = defaultdict(set)
if msg not in messages[function]: if msg not in messages[function]:
messages[function][msg] = {msg} messages[function][msg] = {msg}
def extract_string_value(node: ast.AST) -> str | None: def extract_string_value(node:ast.AST) -> str | None:
"""Safely extract string value from an AST node.""" """Safely extract string value from an AST node."""
if isinstance(node, ast.Constant): if isinstance(node, ast.Constant):
value = getattr(node, 'value', None) value = getattr(node, "value", None)
return value if isinstance(value, str) else None return value if isinstance(value, str) else None
return None return None
@@ -127,24 +127,24 @@ def _extract_log_messages(file_path: str, exclude_debug:bool = False) -> Message
# Extract messages from various call types # Extract messages from various call types
if (isinstance(node.func, ast.Attribute) and if (isinstance(node.func, ast.Attribute) and
isinstance(node.func.value, ast.Name) and isinstance(node.func.value, ast.Name) and
node.func.value.id in {'LOG', 'logger', 'logging'} and node.func.value.id in {"LOG", "logger", "logging"} and
node.func.attr in {None if exclude_debug else 'debug', 'info', 'warning', 'error', 'exception', 'critical'}): node.func.attr in {None if exclude_debug else "debug", "info", "warning", "error", "exception", "critical"}):
if node.args: if node.args:
msg = extract_string_value(node.args[0]) msg = extract_string_value(node.args[0])
if msg: if msg:
add_message(function_name, msg) add_message(function_name, msg)
# Handle gettext calls # Handle gettext calls
elif ((isinstance(node.func, ast.Name) and node.func.id == '_') or elif ((isinstance(node.func, ast.Name) and node.func.id == "_") or
(isinstance(node.func, ast.Attribute) and node.func.attr == 'gettext')): (isinstance(node.func, ast.Attribute) and node.func.attr == "gettext")):
if node.args: if node.args:
msg = extract_string_value(node.args[0]) msg = extract_string_value(node.args[0])
if msg: if msg:
add_message(function_name, msg) add_message(function_name, msg)
# Handle other translatable function calls # Handle other translatable function calls
elif isinstance(node.func, ast.Name) and node.func.id in {'ainput', 'pluralize', 'ensure'}: elif isinstance(node.func, ast.Name) and node.func.id in {"ainput", "pluralize", "ensure"}:
arg_index = 0 if node.func.id == 'ainput' else 1 arg_index = 0 if node.func.id == "ainput" else 1
if len(node.args) > arg_index: if len(node.args) > arg_index:
msg = extract_string_value(node.args[arg_index]) msg = extract_string_value(node.args[arg_index])
if msg: if msg:
@@ -162,10 +162,10 @@ def _get_all_log_messages(exclude_debug:bool = False) -> dict[str, MessageDict]:
Returns: Returns:
Dictionary mapping module paths to their function messages Dictionary mapping module paths to their function messages
""" """
src_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src', 'kleinanzeigen_bot') src_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src", "kleinanzeigen_bot")
print(f"\nScanning for messages in directory: {src_dir}") print(f"\nScanning for messages in directory: {src_dir}")
messages_by_file: dict[str, MessageDict] = { messages_by_file:dict[str, MessageDict] = {
# Special case for getopt.py which is imported # Special case for getopt.py which is imported
"getopt.py": { "getopt.py": {
"do_longs": { "do_longs": {
@@ -187,15 +187,15 @@ def _get_all_log_messages(exclude_debug:bool = False) -> dict[str, MessageDict]:
for root, _, filenames in os.walk(src_dir): for root, _, filenames in os.walk(src_dir):
for filename in filenames: for filename in filenames:
if filename.endswith('.py'): if filename.endswith(".py"):
file_path = os.path.join(root, filename) file_path = os.path.join(root, filename)
relative_path = os.path.relpath(file_path, src_dir) relative_path = os.path.relpath(file_path, src_dir)
if relative_path.startswith('resources/'): if relative_path.startswith("resources/"):
continue continue
messages = _extract_log_messages(file_path, exclude_debug) messages = _extract_log_messages(file_path, exclude_debug)
if messages: if messages:
module_path = os.path.join('kleinanzeigen_bot', relative_path) module_path = os.path.join("kleinanzeigen_bot", relative_path)
module_path = module_path.replace(os.sep, '/') module_path = module_path.replace(os.sep, "/")
messages_by_file[module_path] = messages messages_by_file[module_path] = messages
return messages_by_file return messages_by_file
@@ -217,7 +217,7 @@ def _get_available_languages() -> list[str]:
return sorted(languages) return sorted(languages)
def _get_translations_for_language(lang: str) -> TranslationDict: def _get_translations_for_language(lang:str) -> TranslationDict:
""" """
Get translations for a specific language from its YAML file. Get translations for a specific language from its YAML file.
@@ -227,7 +227,7 @@ def _get_translations_for_language(lang: str) -> TranslationDict:
Returns: Returns:
Dictionary containing all translations for the language Dictionary containing all translations for the language
""" """
yaml = YAML(typ = 'safe') yaml = YAML(typ = "safe")
translation_file = f"translations.{lang}.yaml" translation_file = f"translations.{lang}.yaml"
print(f"Loading translations from {translation_file}") print(f"Loading translations from {translation_file}")
content = files(resources).joinpath(translation_file).read_text() content = files(resources).joinpath(translation_file).read_text()
@@ -235,10 +235,10 @@ def _get_translations_for_language(lang: str) -> TranslationDict:
return translations return translations
def _find_translation(translations: TranslationDict, def _find_translation(translations:TranslationDict,
module: str, module:str,
function: str, function:str,
message: str) -> bool: message:str) -> bool:
""" """
Check if a translation exists for a given message in the exact location where i18n.py will look. Check if a translation exists for a given message in the exact location where i18n.py will look.
This matches the lookup logic in i18n.py which uses dicts.safe_get(). This matches the lookup logic in i18n.py which uses dicts.safe_get().
@@ -253,11 +253,11 @@ def _find_translation(translations: TranslationDict,
True if translation exists in the correct location, False otherwise True if translation exists in the correct location, False otherwise
""" """
# Special case for getopt.py # Special case for getopt.py
if module == 'getopt.py': if module == "getopt.py":
return bool(translations.get(module, {}).get(function, {}).get(message)) return bool(translations.get(module, {}).get(function, {}).get(message))
# Add kleinanzeigen_bot/ prefix if not present # Add kleinanzeigen_bot/ prefix if not present
module_path = f'kleinanzeigen_bot/{module}' if not module.startswith('kleinanzeigen_bot/') else module module_path = f'kleinanzeigen_bot/{module}' if not module.startswith("kleinanzeigen_bot/") else module
# Check if module exists in translations # Check if module exists in translations
module_trans = translations.get(module_path, {}) module_trans = translations.get(module_path, {})
@@ -277,10 +277,10 @@ def _find_translation(translations: TranslationDict,
return has_translation return has_translation
def _message_exists_in_code(code_messages: dict[str, MessageDict], def _message_exists_in_code(code_messages:dict[str, MessageDict],
module: str, module:str,
function: str, function:str,
message: str) -> bool: message:str) -> bool:
""" """
Check if a message exists in the code at the given location. Check if a message exists in the code at the given location.
This is the reverse of _find_translation - it checks if a translation's message This is the reverse of _find_translation - it checks if a translation's message
@@ -296,11 +296,11 @@ def _message_exists_in_code(code_messages: dict[str, MessageDict],
True if message exists in the code, False otherwise True if message exists in the code, False otherwise
""" """
# Special case for getopt.py # Special case for getopt.py
if module == 'getopt.py': if module == "getopt.py":
return bool(code_messages.get(module, {}).get(function, {}).get(message)) return bool(code_messages.get(module, {}).get(function, {}).get(message))
# Remove kleinanzeigen_bot/ prefix if present for code message lookup # Remove kleinanzeigen_bot/ prefix if present for code message lookup
module_path = module[len('kleinanzeigen_bot/'):] if module.startswith('kleinanzeigen_bot/') else module module_path = module[len("kleinanzeigen_bot/"):] if module.startswith("kleinanzeigen_bot/") else module
module_path = f'kleinanzeigen_bot/{module_path}' module_path = f'kleinanzeigen_bot/{module_path}'
# Check if module exists in code messages # Check if module exists in code messages
@@ -318,7 +318,7 @@ def _message_exists_in_code(code_messages: dict[str, MessageDict],
@pytest.mark.parametrize("lang", _get_available_languages()) @pytest.mark.parametrize("lang", _get_available_languages())
def test_all_log_messages_have_translations(lang: str) -> None: def test_all_log_messages_have_translations(lang:str) -> None:
""" """
Test that all translatable messages in the code have translations for each language. Test that all translatable messages in the code have translations for each language.
@@ -345,7 +345,7 @@ def test_all_log_messages_have_translations(lang: str) -> None:
def make_inner_dict() -> defaultdict[str, set[str]]: def make_inner_dict() -> defaultdict[str, set[str]]:
return defaultdict(set) return defaultdict(set)
by_module: defaultdict[str, defaultdict[str, set[str]]] = defaultdict(make_inner_dict) by_module:defaultdict[str, defaultdict[str, set[str]]] = defaultdict(make_inner_dict)
for loc in missing_translations: for loc in missing_translations:
assert isinstance(loc.module, str), "Module must be a string" assert isinstance(loc.module, str), "Module must be a string"
@@ -364,7 +364,7 @@ def test_all_log_messages_have_translations(lang: str) -> None:
@pytest.mark.parametrize("lang", _get_available_languages()) @pytest.mark.parametrize("lang", _get_available_languages())
def test_no_obsolete_translations(lang: str) -> None: def test_no_obsolete_translations(lang:str) -> None:
""" """
Test that all translations in each language YAML file are actually used in the code. Test that all translations in each language YAML file are actually used in the code.
@@ -376,7 +376,7 @@ def test_no_obsolete_translations(lang: str) -> None:
""" """
messages_by_file = _get_all_log_messages(exclude_debug = False) messages_by_file = _get_all_log_messages(exclude_debug = False)
translations = _get_translations_for_language(lang) translations = _get_translations_for_language(lang)
obsolete_items: list[tuple[str, str, str]] = [] obsolete_items:list[tuple[str, str, str]] = []
for module, module_trans in translations.items(): for module, module_trans in translations.items():
if not isinstance(module_trans, dict): if not isinstance(module_trans, dict):
@@ -402,7 +402,7 @@ def test_no_obsolete_translations(lang: str) -> None:
obsolete_str = f"\nObsolete translations found for language [{lang}]:\n" obsolete_str = f"\nObsolete translations found for language [{lang}]:\n"
# Group by module and function for better readability # Group by module and function for better readability
by_module: defaultdict[str, defaultdict[str, list[str]]] = defaultdict(lambda: defaultdict(list)) by_module:defaultdict[str, defaultdict[str, list[str]]] = defaultdict(lambda: defaultdict(list))
for module, function, message in obsolete_items: for module, function, message in obsolete_items:
by_module[module][function].append(message) by_module[module][function].append(message)