refact: apply consistent formatting

This commit is contained in:
sebthom
2025-04-27 23:54:22 +02:00
parent fe33a0e461
commit ef923a8337
21 changed files with 1020 additions and 709 deletions

View File

@@ -20,8 +20,8 @@ Select the type(s) of change(s) included in this pull request:
Before requesting a review, confirm the following:
- [ ] I have reviewed my changes to ensure they meet the project's standards.
- [ ] I have tested my changes and ensured that all tests pass (`pdm run test`).
- [ ] I have formatted the code (`pdm run format`).
- [ ] I have verified that linting passes (`pdm run lint`).
- [ ] I have run security scans and addressed any identified issues (`pdm run audit`).
- [ ] I have updated documentation where necessary.
By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.

View File

@@ -82,7 +82,7 @@ app = "python -m kleinanzeigen_bot"
compile.cmd = "python -O -m PyInstaller pyinstaller.spec --clean"
compile.env = {PYTHONHASHSEED = "1", SOURCE_DATE_EPOCH = "0"} # https://pyinstaller.org/en/stable/advanced-topics.html#creating-a-reproducible-build
debug = "python -m pdb -m kleinanzeigen_bot"
format = "autopep8 --recursive --in-place src tests --verbose"
format = {shell = "autopep8 --recursive --in-place scripts src tests --verbose && python scripts/post_autopep8.py scripts src tests" }
lint = {shell = "ruff check && mypy && basedpyright" }
fix = {shell = "ruff check --fix" }
test = "python -m pytest --capture=tee-sys -v"
@@ -113,7 +113,7 @@ aggressive = 3
# https://docs.astral.sh/ruff/configuration/
#####################
[tool.ruff]
include = ["pyproject.toml", "src/**/*.py", "tests/**/*.py"]
include = ["pyproject.toml", "scripts/**/*.py", "src/**/*.py", "tests/**/*.py"]
line-length = 160
indent-width = 4
target-version = "py310"
@@ -208,14 +208,10 @@ ignore = [
"TC006", # Add quotes to type expression in `typing.cast()`
]
[tool.ruff.format]
quote-style = "double"
indent-style = "space"
line-ending = "native"
docstring-code-format = false
skip-magic-trailing-comma = false
[tool.ruff.lint.per-file-ignores]
"scripts/**/*.py" = [
"INP001", # File `...` is part of an implicit namespace package. Add an `__init__.py`.
]
"tests/**/*.py" = [
"ARG",
"B",
@@ -247,7 +243,7 @@ max-statements = 150 # max. number of statements in function / method body (R091
# https://mypy.readthedocs.io/en/stable/config_file.html
#mypy_path = "$MYPY_CONFIG_FILE_DIR/tests/stubs"
python_version = "3.10"
files = "src,tests"
files = "scripts,src,tests"
strict = true
disallow_untyped_calls = false
disallow_untyped_defs = true
@@ -264,7 +260,7 @@ verbosity = 0
#####################
[tool.basedpyright]
# https://docs.basedpyright.com/latest/configuration/config-files/
include = ["src", "tests"]
include = ["scripts", "src", "tests"]
defineConstant = { DEBUG = false }
pythonVersion = "3.10"
typeCheckingMode = "standard"

317
scripts/post_autopep8.py Normal file
View File

@@ -0,0 +1,317 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import ast, logging, re, sys # isort: skip
from pathlib import Path
from typing import Final, List, Protocol, Tuple
from typing_extensions import override
# Configure basic logging
logging.basicConfig(level = logging.INFO, format = "%(levelname)s: %(message)s")
LOG:Final[logging.Logger] = logging.getLogger(__name__)
class FormatterRule(Protocol):
"""
A code processor that can modify source lines based on the AST.
"""
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
...
class NoSpaceAfterColonInTypeAnnotationRule(FormatterRule):
"""
Removes whitespace between the colon (:) and the type annotation in variable and function parameter declarations.
This rule enforces `a:int` instead of `a: int`.
It is the opposite behavior of autopep8 rule E231.
Example:
# Before
def foo(a: int, b : str) -> None:
pass
# After
def foo(a:int, b:str) -> None:
pass
"""
@override
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
ann_positions:List[Tuple[int, int]] = []
for node in ast.walk(tree):
if isinstance(node, ast.arg) and node.annotation is not None:
ann_positions.append((node.annotation.lineno - 1, node.annotation.col_offset))
elif isinstance(node, ast.AnnAssign) and node.annotation is not None:
ann = node.annotation
ann_positions.append((ann.lineno - 1, ann.col_offset))
if not ann_positions:
return lines
new_lines:List[str] = []
for idx, line in enumerate(lines):
if line.lstrip().startswith("#"):
new_lines.append(line)
continue
chars = list(line)
offsets = [col for (lin, col) in ann_positions if lin == idx]
for col in sorted(offsets, reverse = True):
prefix = "".join(chars[:col])
colon_idx = prefix.rfind(":")
if colon_idx == -1:
continue
j = colon_idx + 1
while j < len(chars) and chars[j].isspace():
del chars[j]
new_lines.append("".join(chars))
return new_lines
class EqualSignSpacingInDefaultsAndNamedArgsRule(FormatterRule):
"""
Ensures that the '=' sign in default values for function parameters and keyword arguments in function calls
is surrounded by exactly one space on each side.
This rule enforces `a:int = 3` instead of `a:int=3`, and `x = 42` instead of `x=42` or `x =42`.
It is the opposite behavior of autopep8 rule E251.
Example:
# Before
def foo(a:int=3, b :str= "bar"):
pass
foo(x=42,y = "hello")
# After
def foo(a:int = 3, b:str = "bar"):
pass
foo(x = 42, y = "hello")
"""
@override
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
equals_positions:List[Tuple[int, int]] = []
for node in ast.walk(tree):
# --- Defaults in function definitions, async defs & lambdas ---
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.Lambda)):
# positional defaults
equals_positions.extend(
(d.lineno - 1, d.col_offset)
for d in node.args.defaults
if d is not None
)
# keyword-only defaults (only on defs, not lambdas)
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
equals_positions.extend(
(d.lineno - 1, d.col_offset)
for d in node.args.kw_defaults
if d is not None
)
# --- Keyword arguments in calls ---
if isinstance(node, ast.Call):
equals_positions.extend(
(kw.value.lineno - 1, kw.value.col_offset)
for kw in node.keywords
if kw.arg is not None
)
if not equals_positions:
return lines
new_lines:List[str] = []
for line_idx, line in enumerate(lines):
if line.lstrip().startswith("#"):
new_lines.append(line)
continue
chars = list(line)
equals_offsets = [col for (lineno, col) in equals_positions if lineno == line_idx]
for col in sorted(equals_offsets, reverse = True):
prefix = "".join(chars[:col])
equal_sign_idx = prefix.rfind("=")
if equal_sign_idx == -1:
continue
# remove spaces before '='
left_index = equal_sign_idx - 1
while left_index >= 0 and chars[left_index].isspace():
del chars[left_index]
equal_sign_idx -= 1
left_index -= 1
# remove spaces after '='
right_index = equal_sign_idx + 1
while right_index < len(chars) and chars[right_index].isspace():
del chars[right_index]
# insert single spaces
chars.insert(equal_sign_idx, " ")
chars.insert(equal_sign_idx + 2, " ")
new_lines.append("".join(chars))
return new_lines
class PreferDoubleQuotesRule(FormatterRule):
"""
Ensures string literals use double quotes unless the content contains a double quote.
Example:
# Before
foo = 'hello'
bar = 'a "quote" inside'
# After
foo = "hello"
bar = 'a "quote" inside' # kept as-is, because it contains a double quote
"""
@override
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
new_lines = lines.copy()
# Track how much each line has shifted so far
line_shifts:dict[int, int] = dict.fromkeys(range(len(lines)), 0)
# Build a parent map for f-string detection
parent_map:dict[ast.AST, ast.AST] = {}
for parent in ast.walk(tree):
for child in ast.iter_child_nodes(parent):
parent_map[child] = parent
def is_in_fstring(node:ast.AST) -> bool:
p = parent_map.get(node)
while p:
if isinstance(p, ast.JoinedStr):
return True
p = parent_map.get(p)
return False
# Regex to locate a single- or triple-quoted literal:
# (?P<prefix>[rRbuUfF]*) optional string flags (r, b, u, f, etc.), case-insensitive
# (?P<quote>'{3}|') the opening delimiter: either three single-quotes (''') or one ('),
# but never two in a row (so we won't mis-interpret adjacent quotes)
# (?P<content>.*?) the literal's content, non-greedy up to the next same delimiter
# (?P=quote) the matching closing delimiter (same length as the opener)
literal_re = re.compile(
r"(?P<prefix>[rRbuUfF]*)(?P<quote>'{3}|')(?P<content>.*?)(?P=quote)",
re.DOTALL,
)
for node in ast.walk(tree):
# only handle simple string constants
if not (isinstance(node, ast.Constant) and isinstance(node.value, str)):
continue
# skip anything inside an f-string, at any depth
if is_in_fstring(node):
continue
starting_line_number = getattr(node, "lineno", None)
starting_col_offset = getattr(node, "col_offset", None)
if starting_line_number is None or starting_col_offset is None:
continue
start_line = starting_line_number - 1
shift = line_shifts[start_line]
raw = new_lines[start_line]
# apply shift so we match against current edited line
idx = starting_col_offset + shift
if idx >= len(raw) or raw[idx] not in ("'", "r", "u", "b", "f", "R", "U", "B", "F"):
continue
# match literal at that column
m = literal_re.match(raw[idx:])
if not m:
continue
prefix = m.group("prefix")
quote = m.group("quote") # either "'" or "'''"
content = m.group("content") # what's inside
# skip if content has a double-quote already
if '"' in content:
continue
# build new literal with the same prefix, but doublequote delimiter
delim = '"' * len(quote)
escaped = content.replace(delim, "\\" + delim)
new_literal = f"{prefix}{delim}{escaped}{delim}"
literal_len = m.end() # how many chars we're replacing
before = raw[:idx]
after = raw[idx + literal_len:]
new_lines[start_line] = before + new_literal + after
# record shift delta for any further edits on this line
line_shifts[start_line] += len(new_literal) - literal_len
return new_lines
FORMATTER_RULES:List[FormatterRule] = [
NoSpaceAfterColonInTypeAnnotationRule(),
EqualSignSpacingInDefaultsAndNamedArgsRule(),
PreferDoubleQuotesRule(),
]
def format_file(path:Path) -> None:
# Read without newline conversion
with path.open("r", encoding = "utf-8", newline = "") as rf:
original_text = rf.read()
# Initial parse
try:
tree = ast.parse(original_text)
except SyntaxError as e:
LOG.error(
"Syntax error parsing %s[%d:%d]: %r -> %s",
path, e.lineno, e.offset, (e.text or "").rstrip(), e.msg
)
return
lines = original_text.splitlines(keepends = True)
formatted_text = original_text
success = True
for rule in FORMATTER_RULES:
lines = rule.apply(tree, lines, path)
formatted_text = "".join(lines)
# Re-parse the updated text
try:
tree = ast.parse(formatted_text)
except SyntaxError as e:
LOG.error(
"Syntax error after %s at %s[%d:%d]: %r -> %s",
rule.__class__.__name__, path, e.lineno, e.offset, (e.text or "").rstrip(), e.msg
)
success = False
break
if success and formatted_text != original_text:
with path.open("w", encoding = "utf-8", newline = "") as wf:
wf.write(formatted_text)
LOG.info("Formatted [%s].", path)
if __name__ == "__main__":
if len(sys.argv) < 2: # noqa: PLR2004 Magic value used in comparison
script_path = Path(sys.argv[0])
print(f"Usage: python {script_path} <directory1> [<directory2> ...]")
sys.exit(1)
for dir_arg in sys.argv[1:]:
root = Path(dir_arg)
if not root.exists():
LOG.warning("Directory [%s] does not exist, skipping...", root)
continue
for py_file in root.rglob("*.py"):
format_file(py_file)

View File

@@ -83,11 +83,11 @@ class KleinanzeigenBot(WebScrapingMixin):
self.configure_file_logging()
self.load_config()
if not (self.ads_selector in {'all', 'new', 'due', 'changed'} or
any(selector in self.ads_selector.split(',') for selector in ('all', 'new', 'due', 'changed')) or
re.compile(r'\d+[,\d+]*').search(self.ads_selector)):
if not (self.ads_selector in {"all", "new", "due", "changed"} or
any(selector in self.ads_selector.split(",") for selector in ("all", "new", "due", "changed")) or
re.compile(r"\d+[,\d+]*").search(self.ads_selector)):
LOG.warning('You provided no ads selector. Defaulting to "due".')
self.ads_selector = 'due'
self.ads_selector = "due"
if ads := self.load_ads():
await self.create_browser_session()
@@ -111,9 +111,9 @@ class KleinanzeigenBot(WebScrapingMixin):
case "download":
self.configure_file_logging()
# ad IDs depends on selector
if not (self.ads_selector in {'all', 'new'} or re.compile(r'\d+[,\d+]*').search(self.ads_selector)):
if not (self.ads_selector in {"all", "new"} or re.compile(r"\d+[,\d+]*").search(self.ads_selector)):
LOG.warning('You provided no ads selector. Defaulting to "new".')
self.ads_selector = 'new'
self.ads_selector = "new"
self.load_config()
await self.create_browser_session()
await self.login()
@@ -265,7 +265,7 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.info("App version: %s", self.get_version())
LOG.info("Python version: %s", sys.version)
def __check_ad_republication(self, ad_cfg: dict[str, Any], ad_file_relative: str) -> bool:
def __check_ad_republication(self, ad_cfg:dict[str, Any], ad_file_relative:str) -> bool:
"""
Check if an ad needs to be republished based on republication interval.
Returns True if the ad should be republished based on the interval.
@@ -295,7 +295,7 @@ class KleinanzeigenBot(WebScrapingMixin):
return True
def __check_ad_changed(self, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any], ad_file_relative: str) -> bool:
def __check_ad_changed(self, ad_cfg:dict[str, Any], ad_cfg_orig:dict[str, Any], ad_file_relative:str) -> bool:
"""
Check if an ad has been changed since last publication.
Returns True if the ad has been changed.
@@ -327,7 +327,7 @@ class KleinanzeigenBot(WebScrapingMixin):
data_root_dir = os.path.dirname(self.config_file_path)
for file_pattern in self.config["ad_files"]:
for ad_file in glob.glob(file_pattern, root_dir = data_root_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB):
if not str(ad_file).endswith('ad_fields.yaml'):
if not str(ad_file).endswith("ad_fields.yaml"):
ad_files[abspath(ad_file, relative_to = data_root_dir)] = ad_file
LOG.info(" -> found %s", pluralize("ad config file", ad_files))
if not ad_files:
@@ -335,13 +335,13 @@ class KleinanzeigenBot(WebScrapingMixin):
ids = []
use_specific_ads = False
selectors = self.ads_selector.split(',')
selectors = self.ads_selector.split(",")
if re.compile(r'\d+[,\d+]*').search(self.ads_selector):
ids = [int(n) for n in self.ads_selector.split(',')]
if re.compile(r"\d+[,\d+]*").search(self.ads_selector):
ids = [int(n) for n in self.ads_selector.split(",")]
use_specific_ads = True
LOG.info('Start fetch task for the ad(s) with id(s):')
LOG.info(' | '.join([str(id_) for id_ in ids]))
LOG.info("Start fetch task for the ad(s) with id(s):")
LOG.info(" | ".join([str(id_) for id_ in ids]))
ad_fields = dicts.load_dict_from_module(resources, "ad_fields.yaml")
ads = []
@@ -548,7 +548,7 @@ class KleinanzeigenBot(WebScrapingMixin):
async def is_logged_in(self) -> bool:
try:
user_info = await self.web_text(By.CLASS_NAME, "mr-medium")
if self.config['login']['username'].lower() in user_info.lower():
if self.config["login"]["username"].lower() in user_info.lower():
return True
except TimeoutError:
return False
@@ -570,7 +570,7 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.info("DONE: Deleted %s", pluralize("ad", count))
LOG.info("############################################")
async def delete_ad(self, ad_cfg: dict[str, Any], published_ads: list[dict[str, Any]], *, delete_old_ads_by_title: bool) -> bool:
async def delete_ad(self, ad_cfg:dict[str, Any], published_ads:list[dict[str, Any]], *, delete_old_ads_by_title:bool) -> bool:
LOG.info("Deleting ad '%s' if already present...", ad_cfg["title"])
await self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
@@ -627,7 +627,7 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.info("DONE: (Re-)published %s", pluralize("ad", count))
LOG.info("############################################")
async def publish_ad(self, ad_file:str, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any], published_ads: list[dict[str, Any]]) -> None:
async def publish_ad(self, ad_file:str, ad_cfg:dict[str, Any], ad_cfg_orig:dict[str, Any], published_ads:list[dict[str, Any]]) -> None:
"""
@param ad_cfg: the effective ad config (i.e. with default values applied etc.)
@param ad_cfg_orig: the ad config as present in the YAML file
@@ -657,7 +657,7 @@ class KleinanzeigenBot(WebScrapingMixin):
#############################
# set category
#############################
await self.__set_category(ad_cfg['category'], ad_file)
await self.__set_category(ad_cfg["category"], ad_file)
#############################
# set special attributes
@@ -674,7 +674,7 @@ class KleinanzeigenBot(WebScrapingMixin):
try:
await self.web_select(By.XPATH, "//select[contains(@id, '.versand_s')]", shipping_value)
except TimeoutError:
LOG.warning("Failed to set shipping attribute for type '%s'!", ad_cfg['shipping_type'])
LOG.warning("Failed to set shipping attribute for type '%s'!", ad_cfg["shipping_type"])
else:
await self.__set_shipping(ad_cfg)
@@ -698,9 +698,9 @@ class KleinanzeigenBot(WebScrapingMixin):
if ad_cfg["shipping_type"] == "SHIPPING":
if sell_directly and ad_cfg["shipping_options"] and price_type in {"FIXED", "NEGOTIABLE"}:
if not await self.web_check(By.ID, "radio-buy-now-yes", Is.SELECTED):
await self.web_click(By.ID, 'radio-buy-now-yes')
await self.web_click(By.ID, "radio-buy-now-yes")
elif not await self.web_check(By.ID, "radio-buy-now-no", Is.SELECTED):
await self.web_click(By.ID, 'radio-buy-now-no')
await self.web_click(By.ID, "radio-buy-now-no")
except TimeoutError as ex:
LOG.debug(ex, exc_info = True)
@@ -832,7 +832,7 @@ class KleinanzeigenBot(WebScrapingMixin):
dicts.save_dict(ad_file, ad_cfg_orig)
async def __set_condition(self, condition_value: str) -> None:
async def __set_condition(self, condition_value:str) -> None:
condition_mapping = {
"new_with_tag": "Neu mit Etikett",
"new": "Neu",
@@ -862,7 +862,7 @@ class KleinanzeigenBot(WebScrapingMixin):
except TimeoutError as ex:
raise TimeoutError(_("Unable to close condition dialog!")) from ex
async def __set_category(self, category: str | None, ad_file:str) -> None:
async def __set_category(self, category:str | None, ad_file:str) -> None:
# click on something to trigger automatic category detection
await self.web_click(By.ID, "pstad-descrptn")
@@ -884,9 +884,9 @@ class KleinanzeigenBot(WebScrapingMixin):
else:
ensure(is_category_auto_selected, f"No category specified in [{ad_file}] and automatic category detection failed")
async def __set_special_attributes(self, ad_cfg: dict[str, Any]) -> None:
async def __set_special_attributes(self, ad_cfg:dict[str, Any]) -> None:
if ad_cfg["special_attributes"]:
LOG.debug('Found %i special attributes', len(ad_cfg["special_attributes"]))
LOG.debug("Found %i special attributes", len(ad_cfg["special_attributes"]))
for special_attribute_key, special_attribute_value in ad_cfg["special_attributes"].items():
if special_attribute_key == "condition_s":
@@ -911,10 +911,10 @@ class KleinanzeigenBot(WebScrapingMixin):
try:
elem_id = special_attr_elem.attrs.id
if special_attr_elem.local_name == 'select':
if special_attr_elem.local_name == "select":
LOG.debug("Attribute field '%s' seems to be a select...", special_attribute_key)
await self.web_select(By.ID, elem_id, special_attribute_value)
elif special_attr_elem.attrs.type == 'checkbox':
elif special_attr_elem.attrs.type == "checkbox":
LOG.debug("Attribute field '%s' seems to be a checkbox...", special_attribute_key)
await self.web_click(By.ID, elem_id)
else:
@@ -925,7 +925,7 @@ class KleinanzeigenBot(WebScrapingMixin):
raise TimeoutError(f"Failed to set special attribute [{special_attribute_key}]") from ex
LOG.debug("Successfully set attribute field [%s] to [%s]...", special_attribute_key, special_attribute_value)
async def __set_shipping(self, ad_cfg: dict[str, Any]) -> None:
async def __set_shipping(self, ad_cfg:dict[str, Any]) -> None:
if ad_cfg["shipping_type"] == "PICKUP":
try:
await self.web_click(By.XPATH,
@@ -960,7 +960,7 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.debug(ex, exc_info = True)
raise TimeoutError(_("Unable to close shipping dialog!")) from ex
async def __set_shipping_options(self, ad_cfg: dict[str, Any]) -> None:
async def __set_shipping_options(self, ad_cfg:dict[str, Any]) -> None:
shipping_options_mapping = {
"DHL_2": ("Klein", "Paket 2 kg"),
"Hermes_Päckchen": ("Klein", "Päckchen"),
@@ -980,7 +980,7 @@ class KleinanzeigenBot(WebScrapingMixin):
except KeyError as ex:
raise KeyError(f"Unknown shipping option(s), please refer to the documentation/README: {ad_cfg['shipping_options']}") from ex
shipping_sizes, shipping_packages = zip(*mapped_shipping_options, strict=False)
shipping_sizes, shipping_packages = zip(*mapped_shipping_options, strict = False)
try:
shipping_size, = set(shipping_sizes)
@@ -1025,7 +1025,7 @@ class KleinanzeigenBot(WebScrapingMixin):
except TimeoutError as ex:
raise TimeoutError(_("Unable to close shipping dialog!")) from ex
async def __upload_images(self, ad_cfg: dict[str, Any]) -> None:
async def __upload_images(self, ad_cfg:dict[str, Any]) -> None:
LOG.info(" -> found %s", pluralize("image", ad_cfg["images"]))
image_upload:Element = await self.web_find(By.CSS_SELECTOR, "input[type=file]")
@@ -1036,7 +1036,7 @@ class KleinanzeigenBot(WebScrapingMixin):
async def assert_free_ad_limit_not_reached(self) -> None:
try:
await self.web_find(By.XPATH, '/html/body/div[1]/form/fieldset[6]/div[1]/header', timeout = 2)
await self.web_find(By.XPATH, "/html/body/div[1]/form/fieldset[6]/div[1]/header", timeout = 2)
raise AssertionError(f"Cannot publish more ads. The monthly limit of free ads of account {self.config['login']['username']} is reached.")
except TimeoutError:
pass
@@ -1050,13 +1050,13 @@ class KleinanzeigenBot(WebScrapingMixin):
ad_extractor = extract.AdExtractor(self.browser, self.config)
# use relevant download routine
if self.ads_selector in {'all', 'new'}: # explore ads overview for these two modes
LOG.info('Scanning your ad overview...')
if self.ads_selector in {"all", "new"}: # explore ads overview for these two modes
LOG.info("Scanning your ad overview...")
own_ad_urls = await ad_extractor.extract_own_ads_urls()
LOG.info('%s found.', pluralize("ad", len(own_ad_urls)))
LOG.info("%s found.", pluralize("ad", len(own_ad_urls)))
if self.ads_selector == 'all': # download all of your adds
LOG.info('Starting download of all ads...')
if self.ads_selector == "all": # download all of your adds
LOG.info("Starting download of all ads...")
success_count = 0
# call download function for each ad page
@@ -1067,12 +1067,12 @@ class KleinanzeigenBot(WebScrapingMixin):
success_count += 1
LOG.info("%d of %d ads were downloaded from your profile.", success_count, len(own_ad_urls))
elif self.ads_selector == 'new': # download only unsaved ads
elif self.ads_selector == "new": # download only unsaved ads
# check which ads already saved
saved_ad_ids = []
ads = self.load_ads(ignore_inactive = False, check_id = False) # do not skip because of existing IDs
for ad in ads:
ad_id = int(ad[2]['id'])
ad_id = int(ad[2]["id"])
saved_ad_ids.append(ad_id)
# determine ad IDs from links
@@ -1083,28 +1083,28 @@ class KleinanzeigenBot(WebScrapingMixin):
for ad_url, ad_id in ad_id_by_url.items():
# check if ad with ID already saved
if ad_id in saved_ad_ids:
LOG.info('The ad with id %d has already been saved.', ad_id)
LOG.info("The ad with id %d has already been saved.", ad_id)
continue
if await ad_extractor.naviagte_to_ad_page(ad_url):
await ad_extractor.download_ad(ad_id)
new_count += 1
LOG.info('%s were downloaded from your profile.', pluralize("new ad", new_count))
LOG.info("%s were downloaded from your profile.", pluralize("new ad", new_count))
elif re.compile(r'\d+[,\d+]*').search(self.ads_selector): # download ad(s) with specific id(s)
ids = [int(n) for n in self.ads_selector.split(',')]
LOG.info('Starting download of ad(s) with the id(s):')
LOG.info(' | '.join([str(ad_id) for ad_id in ids]))
elif re.compile(r"\d+[,\d+]*").search(self.ads_selector): # download ad(s) with specific id(s)
ids = [int(n) for n in self.ads_selector.split(",")]
LOG.info("Starting download of ad(s) with the id(s):")
LOG.info(" | ".join([str(ad_id) for ad_id in ids]))
for ad_id in ids: # call download routine for every id
exists = await ad_extractor.naviagte_to_ad_page(ad_id)
if exists:
await ad_extractor.download_ad(ad_id)
LOG.info('Downloaded ad with id %d', ad_id)
LOG.info("Downloaded ad with id %d", ad_id)
else:
LOG.error('The page with the id %d does not exist!', ad_id)
LOG.error("The page with the id %d does not exist!", ad_id)
def __get_description_with_affixes(self, ad_cfg: dict[str, Any]) -> str:
def __get_description_with_affixes(self, ad_cfg:dict[str, Any]) -> str:
"""Get the complete description with prefix and suffix applied.
Precedence (highest to lowest):

View File

@@ -9,7 +9,7 @@ from .utils import dicts
MAX_DESCRIPTION_LENGTH:Final[int] = 4000
def calculate_content_hash(ad_cfg: dict[str, Any]) -> str:
def calculate_content_hash(ad_cfg:dict[str, Any]) -> str:
"""Calculate a hash for user-modifiable fields of the ad."""
# Relevant fields for the hash
@@ -40,7 +40,7 @@ def calculate_content_hash(ad_cfg: dict[str, Any]) -> str:
return hashlib.sha256(content_str.encode()).hexdigest()
def get_description_affixes(config: dict[str, Any], *, prefix: bool = True) -> str:
def get_description_affixes(config:dict[str, Any], *, prefix:bool = True) -> str:
"""Get prefix or suffix for description with proper precedence.
This function handles both the new flattened format and legacy nested format:

View File

@@ -36,22 +36,22 @@ class AdExtractor(WebScrapingMixin):
"""
# create sub-directory for ad(s) to download (if necessary):
relative_directory = 'downloaded-ads'
relative_directory = "downloaded-ads"
# make sure configured base directory exists
if not os.path.exists(relative_directory) or not os.path.isdir(relative_directory):
os.mkdir(relative_directory)
LOG.info('Created ads directory at ./%s.', relative_directory)
LOG.info("Created ads directory at ./%s.", relative_directory)
new_base_dir = os.path.join(relative_directory, f'ad_{ad_id}')
if os.path.exists(new_base_dir):
LOG.info('Deleting current folder of ad %s...', ad_id)
LOG.info("Deleting current folder of ad %s...", ad_id)
shutil.rmtree(new_base_dir)
os.mkdir(new_base_dir)
LOG.info('New directory for ad created at %s.', new_base_dir)
LOG.info("New directory for ad created at %s.", new_base_dir)
# call extraction function
info = await self._extract_ad_page_info(new_base_dir, ad_id)
ad_file_path = new_base_dir + '/' + f'ad_{ad_id}.yaml'
ad_file_path = new_base_dir + "/" + f'ad_{ad_id}.yaml'
dicts.save_dict(ad_file_path, info)
async def _download_images_from_ad_page(self, directory:str, ad_id:int) -> list[str]:
@@ -67,18 +67,18 @@ class AdExtractor(WebScrapingMixin):
img_paths = []
try:
# download all images from box
image_box = await self.web_find(By.CLASS_NAME, 'galleryimage-large')
image_box = await self.web_find(By.CLASS_NAME, "galleryimage-large")
n_images = len(await self.web_find_all(By.CSS_SELECTOR, '.galleryimage-element[data-ix]', parent = image_box))
LOG.info('Found %s.', i18n.pluralize("image", n_images))
n_images = len(await self.web_find_all(By.CSS_SELECTOR, ".galleryimage-element[data-ix]", parent = image_box))
LOG.info("Found %s.", i18n.pluralize("image", n_images))
img_element:Element = await self.web_find(By.CSS_SELECTOR, 'div:nth-child(1) > img', parent = image_box)
img_fn_prefix = 'ad_' + str(ad_id) + '__img'
img_element:Element = await self.web_find(By.CSS_SELECTOR, "div:nth-child(1) > img", parent = image_box)
img_fn_prefix = "ad_" + str(ad_id) + "__img"
img_nr = 1
dl_counter = 0
while img_nr <= n_images: # scrolling + downloading
current_img_url = img_element.attrs['src'] # URL of the image
current_img_url = img_element.attrs["src"] # URL of the image
if current_img_url is None:
continue
@@ -86,43 +86,43 @@ class AdExtractor(WebScrapingMixin):
content_type = response.info().get_content_type()
file_ending = mimetypes.guess_extension(content_type)
img_path = f"{directory}/{img_fn_prefix}{img_nr}{file_ending}"
with open(img_path, 'wb') as f:
with open(img_path, "wb") as f:
shutil.copyfileobj(response, f)
dl_counter += 1
img_paths.append(img_path.rsplit('/', maxsplit = 1)[-1])
img_paths.append(img_path.rsplit("/", maxsplit = 1)[-1])
# navigate to next image (if exists)
if img_nr < n_images:
try:
# click next button, wait, and re-establish reference
await (await self.web_find(By.CLASS_NAME, 'galleryimage--navigation--next')).click()
await (await self.web_find(By.CLASS_NAME, "galleryimage--navigation--next")).click()
new_div = await self.web_find(By.CSS_SELECTOR, f'div.galleryimage-element:nth-child({img_nr + 1})')
img_element = await self.web_find(By.TAG_NAME, 'img', parent = new_div)
img_element = await self.web_find(By.TAG_NAME, "img", parent = new_div)
except TimeoutError:
LOG.error('NEXT button in image gallery somehow missing, aborting image fetching.')
LOG.error("NEXT button in image gallery somehow missing, aborting image fetching.")
break
img_nr += 1
LOG.info('Downloaded %s.', i18n.pluralize("image", dl_counter))
LOG.info("Downloaded %s.", i18n.pluralize("image", dl_counter))
except TimeoutError: # some ads do not require images
LOG.warning('No image area found. Continuing without downloading images.')
LOG.warning("No image area found. Continuing without downloading images.")
return img_paths
def extract_ad_id_from_ad_url(self, url: str) -> int:
def extract_ad_id_from_ad_url(self, url:str) -> int:
"""
Extracts the ID of an ad, given by its reference link.
:param url: the URL to the ad page
:return: the ad ID, a (ten-digit) integer number
"""
num_part = url.split('/')[-1] # suffix
id_part = num_part.split('-')[0]
num_part = url.split("/")[-1] # suffix
id_part = num_part.split("-")[0]
try:
path = url.split('?', 1)[0] # Remove query string if present
last_segment = path.rstrip('/').split('/')[-1] # Get last path component
id_part = last_segment.split('-')[0] # Extract part before first hyphen
path = url.split("?", 1)[0] # Remove query string if present
last_segment = path.rstrip("/").split("/")[-1] # Get last path component
id_part = last_segment.split("-")[0] # Extract part before first hyphen
return int(id_part)
except (IndexError, ValueError) as ex:
LOG.warning("Failed to extract ad ID from URL '%s': %s", url, ex)
@@ -135,41 +135,41 @@ class AdExtractor(WebScrapingMixin):
:return: the links to your ad pages
"""
# navigate to "your ads" page
await self.web_open('https://www.kleinanzeigen.de/m-meine-anzeigen.html')
await self.web_open("https://www.kleinanzeigen.de/m-meine-anzeigen.html")
await self.web_sleep(2000, 3000) # Consider replacing with explicit waits later
# Try to find the main ad list container first
try:
ad_list_container = await self.web_find(By.ID, 'my-manageitems-adlist')
ad_list_container = await self.web_find(By.ID, "my-manageitems-adlist")
except TimeoutError:
LOG.warning('Ad list container #my-manageitems-adlist not found. Maybe no ads present?')
LOG.warning("Ad list container #my-manageitems-adlist not found. Maybe no ads present?")
return []
# --- Pagination handling ---
multi_page = False
try:
# Correct selector: Use uppercase '.Pagination'
pagination_section = await self.web_find(By.CSS_SELECTOR, '.Pagination', timeout=10) # Increased timeout slightly
pagination_section = await self.web_find(By.CSS_SELECTOR, ".Pagination", timeout = 10) # Increased timeout slightly
# Correct selector: Use 'aria-label'
# Also check if the button is actually present AND potentially enabled (though enabled check isn't strictly necessary here, only for clicking later)
next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent=pagination_section)
next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section)
if next_buttons:
# Check if at least one 'Nächste' button is not disabled (optional but good practice)
enabled_next_buttons = [btn for btn in next_buttons if not btn.attrs.get('disabled')]
enabled_next_buttons = [btn for btn in next_buttons if not btn.attrs.get("disabled")]
if enabled_next_buttons:
multi_page = True
LOG.info('Multiple ad pages detected.')
LOG.info("Multiple ad pages detected.")
else:
LOG.info('Next button found but is disabled. Assuming single effective page.')
LOG.info("Next button found but is disabled. Assuming single effective page.")
else:
LOG.info('No "Naechste" button found within pagination. Assuming single page.')
except TimeoutError:
# This will now correctly trigger only if the '.Pagination' div itself is not found
LOG.info('No pagination controls found. Assuming single page.')
LOG.info("No pagination controls found. Assuming single page.")
except Exception as e:
LOG.exception("Error during pagination detection: %s", e)
LOG.info('Assuming single page due to error during pagination check.')
LOG.info("Assuming single page due to error during pagination check.")
# --- End Pagination Handling ---
refs:list[str] = []
@@ -182,8 +182,8 @@ class AdExtractor(WebScrapingMixin):
# Re-find the ad list container on the current page/state
try:
ad_list_container = await self.web_find(By.ID, 'my-manageitems-adlist')
list_items = await self.web_find_all(By.CLASS_NAME, 'cardbox', parent=ad_list_container)
ad_list_container = await self.web_find(By.ID, "my-manageitems-adlist")
list_items = await self.web_find_all(By.CLASS_NAME, "cardbox", parent = ad_list_container)
LOG.info("Found %s ad items on page %s.", len(list_items), current_page)
except TimeoutError:
LOG.warning("Could not find ad list container or items on page %s.", current_page)
@@ -192,7 +192,7 @@ class AdExtractor(WebScrapingMixin):
# Extract references using the CORRECTED selector
try:
page_refs = [
(await self.web_find(By.CSS_SELECTOR, 'div.manageitems-item-ad h3 a.text-onSurface', parent=li)).attrs['href']
(await self.web_find(By.CSS_SELECTOR, "div.manageitems-item-ad h3 a.text-onSurface", parent = li)).attrs["href"]
for li in list_items
]
refs.extend(page_refs)
@@ -207,12 +207,12 @@ class AdExtractor(WebScrapingMixin):
# --- Navigate to next page ---
try:
# Find the pagination section again (scope might have changed after scroll/wait)
pagination_section = await self.web_find(By.CSS_SELECTOR, '.Pagination', timeout=5)
pagination_section = await self.web_find(By.CSS_SELECTOR, ".Pagination", timeout = 5)
# Find the "Next" button using the correct aria-label selector and ensure it's not disabled
next_button_element = None
possible_next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent=pagination_section)
possible_next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section)
for btn in possible_next_buttons:
if not btn.attrs.get('disabled'): # Check if the button is enabled
if not btn.attrs.get("disabled"): # Check if the button is enabled
next_button_element = btn
break # Found an enabled next button
@@ -235,7 +235,7 @@ class AdExtractor(WebScrapingMixin):
# --- End Navigation ---
if not refs:
LOG.warning('No ad URLs were extracted.')
LOG.warning("No ad URLs were extracted.")
return refs
@@ -246,27 +246,27 @@ class AdExtractor(WebScrapingMixin):
"""
if reflect.is_integer(id_or_url):
# navigate to start page, otherwise page can be None!
await self.web_open('https://www.kleinanzeigen.de/')
await self.web_open("https://www.kleinanzeigen.de/")
# enter the ad ID into the search bar
await self.web_input(By.ID, "site-search-query", id_or_url)
# navigate to ad page and wait
await self.web_check(By.ID, 'site-search-submit', Is.CLICKABLE)
submit_button = await self.web_find(By.ID, 'site-search-submit')
await self.web_check(By.ID, "site-search-submit", Is.CLICKABLE)
submit_button = await self.web_find(By.ID, "site-search-submit")
await submit_button.click()
else:
await self.web_open(str(id_or_url)) # navigate to URL directly given
await self.web_sleep()
# handle the case that invalid ad ID given
if self.page.url.endswith('k0'):
LOG.error('There is no ad under the given ID.')
if self.page.url.endswith("k0"):
LOG.error("There is no ad under the given ID.")
return False
# close (warning) popup, if given
try:
await self.web_find(By.ID, 'vap-ovrly-secure')
LOG.warning('A popup appeared!')
await self.web_click(By.CLASS_NAME, 'mfp-close')
await self.web_find(By.ID, "vap-ovrly-secure")
LOG.warning("A popup appeared!")
await self.web_click(By.CLASS_NAME, "mfp-close")
await self.web_sleep()
except TimeoutError:
pass
@@ -280,22 +280,22 @@ class AdExtractor(WebScrapingMixin):
:param ad_id: the ad ID, already extracted by a calling function
:return: a dictionary with the keys as given in an ad YAML, and their respective values
"""
info:dict[str, Any] = {'active': True}
info:dict[str, Any] = {"active": True}
# extract basic info
info['type'] = 'OFFER' if 's-anzeige' in self.page.url else 'WANTED'
title:str = await self.web_text(By.ID, 'viewad-title')
info["type"] = "OFFER" if "s-anzeige" in self.page.url else "WANTED"
title:str = await self.web_text(By.ID, "viewad-title")
LOG.info('Extracting information from ad with title "%s"', title)
info['category'] = await self._extract_category_from_ad_page()
info['title'] = title
info["category"] = await self._extract_category_from_ad_page()
info["title"] = title
# Get raw description text
raw_description = (await self.web_text(By.ID, 'viewad-description-text')).strip()
raw_description = (await self.web_text(By.ID, "viewad-description-text")).strip()
# Get prefix and suffix from config
prefix = get_description_affixes(self.config, prefix=True)
suffix = get_description_affixes(self.config, prefix=False)
prefix = get_description_affixes(self.config, prefix = True)
suffix = get_description_affixes(self.config, prefix = False)
# Remove prefix and suffix if present
description_text = raw_description
@@ -304,38 +304,38 @@ class AdExtractor(WebScrapingMixin):
if suffix and description_text.endswith(suffix.strip()):
description_text = description_text[:-len(suffix.strip())]
info['description'] = description_text.strip()
info["description"] = description_text.strip()
info['special_attributes'] = await self._extract_special_attributes_from_ad_page()
if "art_s" in info['special_attributes']:
info["special_attributes"] = await self._extract_special_attributes_from_ad_page()
if "art_s" in info["special_attributes"]:
# change e.g. category "161/172" to "161/172/lautsprecher_kopfhoerer"
info['category'] = f"{info['category']}/{info['special_attributes']['art_s']}"
del info['special_attributes']['art_s']
if "schaden_s" in info['special_attributes']:
info["category"] = f"{info['category']}/{info['special_attributes']['art_s']}"
del info["special_attributes"]["art_s"]
if "schaden_s" in info["special_attributes"]:
# change f to 'nein' and 't' to 'ja'
info['special_attributes']['schaden_s'] = info['special_attributes']['schaden_s'].translate(str.maketrans({'t': 'ja', 'f': 'nein'}))
info['price'], info['price_type'] = await self._extract_pricing_info_from_ad_page()
info['shipping_type'], info['shipping_costs'], info['shipping_options'] = await self._extract_shipping_info_from_ad_page()
info['sell_directly'] = await self._extract_sell_directly_from_ad_page()
info['images'] = await self._download_images_from_ad_page(directory, ad_id)
info['contact'] = await self._extract_contact_from_ad_page()
info['id'] = ad_id
info["special_attributes"]["schaden_s"] = info["special_attributes"]["schaden_s"].translate(str.maketrans({"t": "ja", "f": "nein"}))
info["price"], info["price_type"] = await self._extract_pricing_info_from_ad_page()
info["shipping_type"], info["shipping_costs"], info["shipping_options"] = await self._extract_shipping_info_from_ad_page()
info["sell_directly"] = await self._extract_sell_directly_from_ad_page()
info["images"] = await self._download_images_from_ad_page(directory, ad_id)
info["contact"] = await self._extract_contact_from_ad_page()
info["id"] = ad_id
try: # try different locations known for creation date element
creation_date = await self.web_text(By.XPATH,
'/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/div[1]/span')
"/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/div[1]/span")
except TimeoutError:
creation_date = await self.web_text(By.CSS_SELECTOR, '#viewad-extra-info > div:nth-child(1) > span:nth-child(2)')
creation_date = await self.web_text(By.CSS_SELECTOR, "#viewad-extra-info > div:nth-child(1) > span:nth-child(2)")
# convert creation date to ISO format
created_parts = creation_date.split('.')
creation_date = created_parts[2] + '-' + created_parts[1] + '-' + created_parts[0] + ' 00:00:00'
created_parts = creation_date.split(".")
creation_date = created_parts[2] + "-" + created_parts[1] + "-" + created_parts[0] + " 00:00:00"
creation_date = datetime.fromisoformat(creation_date).isoformat()
info['created_on'] = creation_date
info['updated_on'] = None # will be set later on
info["created_on"] = creation_date
info["updated_on"] = None # will be set later on
# Calculate the initial hash for the downloaded ad
info['content_hash'] = calculate_content_hash(info)
info["content_hash"] = calculate_content_hash(info)
return info
@@ -346,12 +346,12 @@ class AdExtractor(WebScrapingMixin):
:return: a category string of form abc/def, where a-f are digits
"""
category_line = await self.web_find(By.ID, 'vap-brdcrmb')
category_first_part = await self.web_find(By.CSS_SELECTOR, 'a:nth-of-type(2)', parent = category_line)
category_second_part = await self.web_find(By.CSS_SELECTOR, 'a:nth-of-type(3)', parent = category_line)
cat_num_first = category_first_part.attrs['href'].split('/')[-1][1:]
cat_num_second = category_second_part.attrs['href'].split('/')[-1][1:]
category:str = cat_num_first + '/' + cat_num_second
category_line = await self.web_find(By.ID, "vap-brdcrmb")
category_first_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(2)", parent = category_line)
category_second_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line)
cat_num_first = category_first_part.attrs["href"].split("/")[-1][1:]
cat_num_second = category_second_part.attrs["href"].split("/")[-1][1:]
category:str = cat_num_first + "/" + cat_num_second
return category
@@ -368,7 +368,7 @@ class AdExtractor(WebScrapingMixin):
special_attributes_str = belen_conf["universalAnalyticsOpts"]["dimensions"]["dimension108"]
special_attributes = dict(item.split(":") for item in special_attributes_str.split("|") if ":" in item)
special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith('.versand_s') and k != "versand_s"}
special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith(".versand_s") and k != "versand_s"}
return special_attributes
async def _extract_pricing_info_from_ad_page(self) -> tuple[float | None, str]:
@@ -378,24 +378,24 @@ class AdExtractor(WebScrapingMixin):
:return: the price of the offer (optional); and the pricing type
"""
try:
price_str:str = await self.web_text(By.ID, 'viewad-price')
price_str:str = await self.web_text(By.ID, "viewad-price")
price:int | None = None
match price_str.split()[-1]:
case '':
price_type = 'FIXED'
case "":
price_type = "FIXED"
# replace('.', '') is to remove the thousands separator before parsing as int
price = int(price_str.replace('.', '').split()[0])
case 'VB':
price_type = 'NEGOTIABLE'
price = int(price_str.replace(".", "").split()[0])
case "VB":
price_type = "NEGOTIABLE"
if price_str != "VB": # can be either 'X € VB', or just 'VB'
price = int(price_str.replace('.', '').split()[0])
case 'verschenken':
price_type = 'GIVE_AWAY'
price = int(price_str.replace(".", "").split()[0])
case "verschenken":
price_type = "GIVE_AWAY"
case _:
price_type = 'NOT_APPLICABLE'
price_type = "NOT_APPLICABLE"
return price, price_type
except TimeoutError: # no 'commercial' ad, has no pricing box etc.
return None, 'NOT_APPLICABLE'
return None, "NOT_APPLICABLE"
async def _extract_shipping_info_from_ad_page(self) -> tuple[str, float | None, list[str] | None]:
"""
@@ -403,17 +403,17 @@ class AdExtractor(WebScrapingMixin):
:return: the shipping type, and the shipping price (optional)
"""
ship_type, ship_costs, shipping_options = 'NOT_APPLICABLE', None, None
ship_type, ship_costs, shipping_options = "NOT_APPLICABLE", None, None
try:
shipping_text = await self.web_text(By.CLASS_NAME, 'boxedarticle--details--shipping')
shipping_text = await self.web_text(By.CLASS_NAME, "boxedarticle--details--shipping")
# e.g. '+ Versand ab 5,49 €' OR 'Nur Abholung'
if shipping_text == 'Nur Abholung':
ship_type = 'PICKUP'
elif shipping_text == 'Versand möglich':
ship_type = 'SHIPPING'
elif '' in shipping_text:
shipping_price_parts = shipping_text.split(' ')
ship_type = 'SHIPPING'
if shipping_text == "Nur Abholung":
ship_type = "PICKUP"
elif shipping_text == "Versand möglich":
ship_type = "SHIPPING"
elif "" in shipping_text:
shipping_price_parts = shipping_text.split(" ")
ship_type = "SHIPPING"
ship_costs = float(misc.parse_decimal(shipping_price_parts[-2]))
# reading shipping option from kleinanzeigen
@@ -425,7 +425,7 @@ class AdExtractor(WebScrapingMixin):
internal_shipping_opt = [x for x in shipping_costs if x["priceInEuroCent"] == ship_costs * 100]
if not internal_shipping_opt:
return 'NOT_APPLICABLE', ship_costs, shipping_options
return "NOT_APPLICABLE", ship_costs, shipping_options
# map to internal shipping identifiers used by kleinanzeigen-bot
shipping_option_mapping = {
@@ -440,13 +440,13 @@ class AdExtractor(WebScrapingMixin):
"HERMES_004": "Hermes_L"
}
shipping_option = shipping_option_mapping.get(internal_shipping_opt[0]['id'])
shipping_option = shipping_option_mapping.get(internal_shipping_opt[0]["id"])
if not shipping_option:
return 'NOT_APPLICABLE', ship_costs, shipping_options
return "NOT_APPLICABLE", ship_costs, shipping_options
shipping_options = [shipping_option]
except TimeoutError: # no pricing box -> no shipping given
ship_type = 'NOT_APPLICABLE'
ship_type = "NOT_APPLICABLE"
return ship_type, ship_costs, shipping_options
@@ -457,7 +457,7 @@ class AdExtractor(WebScrapingMixin):
:return: a boolean indicating whether the sell directly option is active (optional)
"""
try:
buy_now_is_active:bool = 'Direkt kaufen' in (await self.web_text(By.ID, 'payment-buttons-sidebar'))
buy_now_is_active:bool = "Direkt kaufen" in (await self.web_text(By.ID, "payment-buttons-sidebar"))
return buy_now_is_active
except TimeoutError:
return None
@@ -469,34 +469,34 @@ class AdExtractor(WebScrapingMixin):
:return: a dictionary containing the address parts with their corresponding values
"""
contact:dict[str, (str | None)] = {}
address_text = await self.web_text(By.ID, 'viewad-locality')
address_text = await self.web_text(By.ID, "viewad-locality")
# format: e.g. (Beispiel Allee 42,) 12345 Bundesland - Stadt
try:
street = (await self.web_text(By.ID, 'street-address'))[:-1] # trailing comma
contact['street'] = street
street = (await self.web_text(By.ID, "street-address"))[:-1] # trailing comma
contact["street"] = street
except TimeoutError:
LOG.info('No street given in the contact.')
LOG.info("No street given in the contact.")
(zipcode, location) = address_text.split(" ", 1)
contact['zipcode'] = zipcode # e.g. 19372
contact['location'] = location # e.g. Mecklenburg-Vorpommern - Steinbeck
contact["zipcode"] = zipcode # e.g. 19372
contact["location"] = location # e.g. Mecklenburg-Vorpommern - Steinbeck
contact_person_element:Element = await self.web_find(By.ID, 'viewad-contact')
name_element = await self.web_find(By.CLASS_NAME, 'iconlist-text', parent = contact_person_element)
contact_person_element:Element = await self.web_find(By.ID, "viewad-contact")
name_element = await self.web_find(By.CLASS_NAME, "iconlist-text", parent = contact_person_element)
try:
name = await self.web_text(By.TAG_NAME, 'a', parent = name_element)
name = await self.web_text(By.TAG_NAME, "a", parent = name_element)
except TimeoutError: # edge case: name without link
name = await self.web_text(By.TAG_NAME, 'span', parent = name_element)
contact['name'] = name
name = await self.web_text(By.TAG_NAME, "span", parent = name_element)
contact["name"] = name
if 'street' not in contact:
contact['street'] = None
if "street" not in contact:
contact["street"] = None
try: # phone number is unusual for non-professional sellers today
phone_element = await self.web_find(By.ID, 'viewad-contact-phone')
phone_number = await self.web_text(By.TAG_NAME, 'a', parent = phone_element)
contact['phone'] = ''.join(phone_number.replace('-', ' ').split(' ')).replace('+49(0)', '0')
phone_element = await self.web_find(By.ID, "viewad-contact-phone")
phone_number = await self.web_text(By.TAG_NAME, "a", parent = phone_element)
contact["phone"] = "".join(phone_number.replace("-", " ").split(" ")).replace("+49(0)", "0")
except TimeoutError:
contact['phone'] = None # phone seems to be a deprecated feature (for non-professional users)
contact["phone"] = None # phone seems to be a deprecated feature (for non-professional users)
# also see 'https://themen.kleinanzeigen.de/hilfe/deine-anzeigen/Telefon/
return contact

View File

@@ -96,7 +96,7 @@ def save_dict(filepath:str, content:dict[str, Any]) -> None:
yaml.indent(mapping = 2, sequence = 4, offset = 2)
yaml.representer.add_representer(str, # use YAML | block style for multi-line strings
lambda dumper, data:
dumper.represent_scalar('tag:yaml.org,2002:str', data, style = '|' if '\n' in data else None)
dumper.represent_scalar("tag:yaml.org,2002:str", data, style = "|" if "\n" in data else None)
)
yaml.allow_duplicate_keys = False
yaml.explicit_start = False

View File

@@ -3,14 +3,14 @@
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import sys, traceback # isort: skip
from types import FrameType, TracebackType
from typing import Any, Final
from typing import Final
from . import loggers
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
def on_exception(ex_type: type[BaseException] | None, ex_value: BaseException | None, ex_traceback: TracebackType | None) -> None:
def on_exception(ex_type:type[BaseException] | None, ex_value:BaseException | None, ex_traceback:TracebackType | None) -> None:
if ex_type is None or ex_value is None:
LOG.error("Unknown exception occurred (missing exception info): ex_type=%s, ex_value=%s", ex_type, ex_value)
return

View File

@@ -11,6 +11,6 @@ class KleinanzeigenBotError(RuntimeError):
class CaptchaEncountered(KleinanzeigenBotError):
"""Raised when a Captcha was detected and auto-restart is enabled."""
def __init__(self, restart_delay: timedelta) -> None:
def __init__(self, restart_delay:timedelta) -> None:
super().__init__()
self.restart_delay = restart_delay

View File

@@ -42,7 +42,7 @@ class Locale(NamedTuple):
return f"{self.language}{region_part}{encoding_part}"
@staticmethod
def of(locale_string: str) -> 'Locale':
def of(locale_string:str) -> "Locale":
"""
>>> Locale.of("en_US.UTF-8")
Locale(language='en', region='US', encoding='UTF-8')
@@ -86,11 +86,11 @@ def _detect_locale() -> Locale:
return Locale.of(lang) if lang else Locale("en", "US", "UTF-8")
_CURRENT_LOCALE: Locale = _detect_locale()
_TRANSLATIONS: dict[str, Any] | None = None
_CURRENT_LOCALE:Locale = _detect_locale()
_TRANSLATIONS:dict[str, Any] | None = None
def translate(text:object, caller: inspect.FrameInfo | None) -> str:
def translate(text:object, caller:inspect.FrameInfo | None) -> str:
text = str(text)
if not caller:
return text
@@ -105,7 +105,7 @@ def translate(text:object, caller: inspect.FrameInfo | None) -> str:
if not _TRANSLATIONS:
return text
module_name = caller.frame.f_globals.get('__name__') # pylint: disable=redefined-outer-name
module_name = caller.frame.f_globals.get("__name__") # pylint: disable=redefined-outer-name
file_basename = os.path.splitext(os.path.basename(caller.filename))[0]
if module_name and module_name.endswith(f".{file_basename}"):
module_name = module_name[:-(len(file_basename) + 1)]
@@ -124,9 +124,9 @@ gettext.gettext = lambda message: translate(_original_gettext(message), reflect.
for module_name, module in sys.modules.items():
if module is None or module_name in sys.builtin_module_names:
continue
if hasattr(module, '_') and module._ is _original_gettext:
if hasattr(module, "_") and module._ is _original_gettext:
module._ = gettext.gettext # type: ignore[attr-defined]
if hasattr(module, 'gettext') and module.gettext is _original_gettext:
if hasattr(module, "gettext") and module.gettext is _original_gettext:
module.gettext = gettext.gettext # type: ignore[attr-defined]
@@ -190,8 +190,8 @@ def pluralize(noun:str, count:int | Sized, *, prefix_with_count:bool = True) ->
# English
if len(noun) < 2: # noqa: PLR2004 Magic value used in comparison
return f"{prefix}{noun}s"
if noun.endswith(('s', 'sh', 'ch', 'x', 'z')):
if noun.endswith(("s", "sh", "ch", "x", "z")):
return f"{prefix}{noun}es"
if noun.endswith('y') and noun[-2].lower() not in "aeiou":
if noun.endswith("y") and noun[-2].lower() not in "aeiou":
return f"{prefix}{noun[:-1]}ies"
return f"{prefix}{noun}s"

View File

@@ -28,11 +28,11 @@ LOG_ROOT:Final[logging.Logger] = logging.getLogger()
class _MaxLevelFilter(logging.Filter):
def __init__(self, level: int) -> None:
def __init__(self, level:int) -> None:
super().__init__()
self.level = level
def filter(self, record: logging.LogRecord) -> bool:
def filter(self, record:logging.LogRecord) -> bool:
return record.levelno <= self.level
@@ -104,7 +104,7 @@ def configure_console_logging() -> None:
class LogFileHandle:
"""Encapsulates a log file handler with close and status methods."""
def __init__(self, file_path: str, handler: RotatingFileHandler, logger: logging.Logger) -> None:
def __init__(self, file_path:str, handler:RotatingFileHandler, logger:logging.Logger) -> None:
self.file_path = file_path
self._handler:RotatingFileHandler | None = handler
self._logger = logger
@@ -146,14 +146,14 @@ def flush_all_handlers() -> None:
handler.flush()
def get_logger(name: str | None = None) -> logging.Logger:
def get_logger(name:str | None = None) -> logging.Logger:
"""
Returns a localized logger
"""
class TranslatingLogger(logging.Logger):
def _log(self, level: int, msg: object, *args: Any, **kwargs: Any) -> None:
def _log(self, level:int, msg:object, *args:Any, **kwargs:Any) -> None:
if level != DEBUG: # debug messages should not be translated
msg = i18n.translate(msg, reflect.get_caller(2))
super()._log(level, msg, *args, **kwargs)

View File

@@ -10,7 +10,7 @@ from typing import Any, TypeVar
from . import i18n
# https://mypy.readthedocs.io/en/stable/generics.html#generic-functions
T = TypeVar('T')
T = TypeVar("T")
def ensure(condition:Any | bool | Callable[[], bool], error_message:str, timeout:float = 5, poll_requency:float = 0.5) -> None:
@@ -49,7 +49,7 @@ def is_frozen() -> bool:
return getattr(sys, "frozen", False)
async def ainput(prompt: str) -> str:
async def ainput(prompt:str) -> str:
return await asyncio.to_thread(input, f'{prompt} ')
@@ -84,10 +84,10 @@ def parse_decimal(number:float | int | str) -> decimal.Decimal:
def parse_datetime(
date: datetime | str | None,
date:datetime | str | None,
*,
add_timezone_if_missing: bool = True,
use_local_timezone: bool = True
add_timezone_if_missing:bool = True,
use_local_timezone:bool = True
) -> datetime | None:
"""
Parses a datetime object or ISO-formatted string.
@@ -152,22 +152,22 @@ def parse_duration(text:str) -> timedelta:
>>> parse_duration("invalid input")
datetime.timedelta(0)
"""
pattern = re.compile(r'(\d+)\s*([dhms])')
pattern = re.compile(r"(\d+)\s*([dhms])")
parts = pattern.findall(text.lower())
kwargs: dict[str, int] = {}
kwargs:dict[str, int] = {}
for value, unit in parts:
if unit == 'd':
kwargs['days'] = kwargs.get('days', 0) + int(value)
elif unit == 'h':
kwargs['hours'] = kwargs.get('hours', 0) + int(value)
elif unit == 'm':
kwargs['minutes'] = kwargs.get('minutes', 0) + int(value)
elif unit == 's':
kwargs['seconds'] = kwargs.get('seconds', 0) + int(value)
if unit == "d":
kwargs["days"] = kwargs.get("days", 0) + int(value)
elif unit == "h":
kwargs["hours"] = kwargs.get("hours", 0) + int(value)
elif unit == "m":
kwargs["minutes"] = kwargs.get("minutes", 0) + int(value)
elif unit == "s":
kwargs["seconds"] = kwargs.get("seconds", 0) + int(value)
return timedelta(**kwargs)
def format_timedelta(td: timedelta) -> str:
def format_timedelta(td:timedelta) -> str:
"""
Formats a timedelta into a human-readable string using the pluralize utility.

View File

@@ -5,7 +5,7 @@ import inspect
from typing import Any
def get_caller(depth: int = 1) -> inspect.FrameInfo | None:
def get_caller(depth:int = 1) -> inspect.FrameInfo | None:
stack = inspect.stack()
try:
for frame in stack[depth + 1:]:

View File

@@ -165,7 +165,7 @@ class WebScrapingMixin:
prefs_file = os.path.join(profile_dir, "Preferences")
if not os.path.exists(prefs_file):
LOG.info(" -> Setting chrome prefs [%s]...", prefs_file)
with open(prefs_file, "w", encoding = 'UTF-8') as fd:
with open(prefs_file, "w", encoding = "UTF-8") as fd:
json.dump({
"credentials_enable_service": False,
"enable_do_not_track": True,
@@ -234,16 +234,16 @@ class WebScrapingMixin:
case "Windows":
browser_paths = [
os.environ.get("PROGRAMFILES", "C:\\Program Files") + r'\Microsoft\Edge\Application\msedge.exe',
os.environ.get("PROGRAMFILES(X86)", "C:\\Program Files (x86)") + r'\Microsoft\Edge\Application\msedge.exe',
os.environ.get("PROGRAMFILES", "C:\\Program Files") + r"\Microsoft\Edge\Application\msedge.exe",
os.environ.get("PROGRAMFILES(X86)", "C:\\Program Files (x86)") + r"\Microsoft\Edge\Application\msedge.exe",
os.environ["PROGRAMFILES"] + r'\Chromium\Application\chrome.exe',
os.environ["PROGRAMFILES(X86)"] + r'\Chromium\Application\chrome.exe',
os.environ["LOCALAPPDATA"] + r'\Chromium\Application\chrome.exe',
os.environ["PROGRAMFILES"] + r"\Chromium\Application\chrome.exe",
os.environ["PROGRAMFILES(X86)"] + r"\Chromium\Application\chrome.exe",
os.environ["LOCALAPPDATA"] + r"\Chromium\Application\chrome.exe",
os.environ["PROGRAMFILES"] + r'\Chrome\Application\chrome.exe',
os.environ["PROGRAMFILES(X86)"] + r'\Chrome\Application\chrome.exe',
os.environ["LOCALAPPDATA"] + r'\Chrome\Application\chrome.exe',
os.environ["PROGRAMFILES"] + r"\Chrome\Application\chrome.exe",
os.environ["PROGRAMFILES(X86)"] + r"\Chrome\Application\chrome.exe",
os.environ["LOCALAPPDATA"] + r"\Chrome\Application\chrome.exe",
shutil.which("msedge.exe"),
shutil.which("chromium.exe"),
@@ -259,8 +259,8 @@ class WebScrapingMixin:
raise AssertionError(_("Installed browser could not be detected"))
async def web_await(self, condition: Callable[[], T | Never | Coroutine[Any, Any, T | Never]], *,
timeout:int | float = 5, timeout_error_message: str = "") -> T:
async def web_await(self, condition:Callable[[], T | Never | Coroutine[Any, Any, T | Never]], *,
timeout:int | float = 5, timeout_error_message:str = "") -> T:
"""
Blocks/waits until the given condition is met.
@@ -523,7 +523,7 @@ class WebScrapingMixin:
return response
# pylint: enable=dangerous-default-value
async def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10_000, *, scroll_back_top: bool = False) -> None:
async def web_scroll_page_down(self, scroll_length:int = 10, scroll_speed:int = 10_000, *, scroll_back_top:bool = False) -> None:
"""
Smoothly scrolls the current web page down.
@@ -532,7 +532,7 @@ class WebScrapingMixin:
:param scroll_back_top: whether to scroll the page back to the top after scrolling to the bottom
"""
current_y_pos = 0
bottom_y_pos: int = await self.web_execute('document.body.scrollHeight') # get bottom position
bottom_y_pos:int = await self.web_execute("document.body.scrollHeight") # get bottom position
while current_y_pos < bottom_y_pos: # scroll in steps until bottom reached
current_y_pos += scroll_length
await self.web_execute(f'window.scrollTo(0, {current_y_pos})') # scroll one step

View File

@@ -1,8 +1,6 @@
"""
SPDX-FileCopyrightText: © Jens Bergmann and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
# SPDX-FileCopyrightText: © Jens Bergmann and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import os
from typing import Any, Final
from unittest.mock import MagicMock
@@ -21,7 +19,7 @@ LOG.setLevel(loggers.DEBUG)
@pytest.fixture
def test_data_dir(tmp_path: str) -> str:
def test_data_dir(tmp_path:str) -> str:
"""Provides a temporary directory for test data.
This fixture uses pytest's built-in tmp_path fixture to create a temporary
@@ -41,33 +39,33 @@ def sample_config() -> dict[str, Any]:
- Publishing settings
"""
return {
'login': {
'username': 'testuser',
'password': 'testpass'
"login": {
"username": "testuser",
"password": "testpass"
},
'browser': {
'arguments': [],
'binary_location': None,
'extensions': [],
'use_private_window': True,
'user_data_dir': None,
'profile_name': None
"browser": {
"arguments": [],
"binary_location": None,
"extensions": [],
"use_private_window": True,
"user_data_dir": None,
"profile_name": None
},
'ad_defaults': {
'description': {
'prefix': 'Test Prefix',
'suffix': 'Test Suffix'
"ad_defaults": {
"description": {
"prefix": "Test Prefix",
"suffix": "Test Suffix"
}
},
'publishing': {
'delete_old_ads': 'BEFORE_PUBLISH',
'delete_old_ads_by_title': False
"publishing": {
"delete_old_ads": "BEFORE_PUBLISH",
"delete_old_ads_by_title": False
}
}
@pytest.fixture
def test_bot(sample_config: dict[str, Any]) -> KleinanzeigenBot:
def test_bot(sample_config:dict[str, Any]) -> KleinanzeigenBot:
"""Provides a fresh KleinanzeigenBot instance for all test classes.
Dependencies:
@@ -89,7 +87,7 @@ def browser_mock() -> MagicMock:
@pytest.fixture
def log_file_path(test_data_dir: str) -> str:
def log_file_path(test_data_dir:str) -> str:
"""Provides a temporary path for log files.
Dependencies:
@@ -99,7 +97,7 @@ def log_file_path(test_data_dir: str) -> str:
@pytest.fixture
def test_extractor(browser_mock: MagicMock, sample_config: dict[str, Any]) -> AdExtractor:
def test_extractor(browser_mock:MagicMock, sample_config:dict[str, Any]) -> AdExtractor:
"""Provides a fresh AdExtractor instance for testing.
Dependencies:

View File

@@ -122,9 +122,9 @@ def test_calculate_content_hash_with_none_values() -> None:
)
])
def test_get_description_affixes(
config: dict[str, Any],
prefix: bool,
expected: str
config:dict[str, Any],
prefix:bool,
expected:str
) -> None:
"""Test get_description_affixes function with various inputs."""
result = ads.get_description_affixes(config, prefix = prefix)
@@ -157,7 +157,7 @@ def test_get_description_affixes(
""
)
])
def test_get_description_affixes_edge_cases(config: dict[str, Any], prefix: bool, expected: str) -> None:
def test_get_description_affixes_edge_cases(config:dict[str, Any], prefix:bool, expected:str) -> None:
"""Test edge cases for description affix handling."""
assert ads.get_description_affixes(config, prefix = prefix) == expected
@@ -170,7 +170,7 @@ def test_get_description_affixes_edge_cases(config: dict[str, Any], prefix: bool
(3.14, ""), # Test with a float
(set(), ""), # Test with an empty set
])
def test_get_description_affixes_edge_cases_non_dict(config: Any, expected: str) -> None:
def test_get_description_affixes_edge_cases_non_dict(config:Any, expected:str) -> None:
"""Test get_description_affixes function with non-dict inputs."""
result = ads.get_description_affixes(config, prefix=True)
result = ads.get_description_affixes(config, prefix = True)
assert result == expected

View File

@@ -12,21 +12,21 @@ class TestKleinanzeigenBot:
def bot(self) -> KleinanzeigenBot:
return KleinanzeigenBot()
def test_parse_args_help(self, bot: KleinanzeigenBot) -> None:
def test_parse_args_help(self, bot:KleinanzeigenBot) -> None:
"""Test parsing of help command"""
bot.parse_args(["app", "help"])
assert bot.command == "help"
assert bot.ads_selector == "due"
assert not bot.keep_old_ads
def test_parse_args_publish(self, bot: KleinanzeigenBot) -> None:
def test_parse_args_publish(self, bot:KleinanzeigenBot) -> None:
"""Test parsing of publish command with options"""
bot.parse_args(["app", "publish", "--ads=all", "--keep-old"])
assert bot.command == "publish"
assert bot.ads_selector == "all"
assert bot.keep_old_ads
def test_get_version(self, bot: KleinanzeigenBot) -> None:
def test_get_version(self, bot:KleinanzeigenBot) -> None:
"""Test version retrieval"""
version = bot.get_version()
assert isinstance(version, str)

View File

@@ -12,31 +12,31 @@ from kleinanzeigen_bot.utils.web_scraping_mixin import Browser, By, Element
class _DimensionsDict(TypedDict):
dimension108: str
dimension108:str
class _UniversalAnalyticsOptsDict(TypedDict):
dimensions: _DimensionsDict
dimensions:_DimensionsDict
class _BelenConfDict(TypedDict):
universalAnalyticsOpts: _UniversalAnalyticsOptsDict
universalAnalyticsOpts:_UniversalAnalyticsOptsDict
class _SpecialAttributesDict(TypedDict, total = False):
art_s: str
condition_s: str
art_s:str
condition_s:str
class _TestCaseDict(TypedDict): # noqa: PYI049 Private TypedDict `...` is never used
belen_conf: _BelenConfDict
expected: _SpecialAttributesDict
belen_conf:_BelenConfDict
expected:_SpecialAttributesDict
class TestAdExtractorBasics:
"""Basic synchronous tests for AdExtractor."""
def test_constructor(self, browser_mock: MagicMock, sample_config: dict[str, Any]) -> None:
def test_constructor(self, browser_mock:MagicMock, sample_config:dict[str, Any]) -> None:
"""Test the constructor of AdExtractor"""
extractor = AdExtractor(browser_mock, sample_config)
assert extractor.browser == browser_mock
@@ -48,7 +48,7 @@ class TestAdExtractorBasics:
("https://www.kleinanzeigen.de/s-anzeige/invalid-id/abc", -1),
("https://www.kleinanzeigen.de/invalid-url", -1),
])
def test_extract_ad_id_from_ad_url(self, test_extractor: AdExtractor, url: str, expected_id: int) -> None:
def test_extract_ad_id_from_ad_url(self, test_extractor:AdExtractor, url:str, expected_id:int) -> None:
"""Test extraction of ad ID from different URL formats."""
assert test_extractor.extract_ad_id_from_ad_url(url) == expected_id
@@ -66,19 +66,19 @@ class TestAdExtractorPricing:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_pricing_info(
self, test_extractor: AdExtractor, price_text: str, expected_price: int | None, expected_type: str
self, test_extractor:AdExtractor, price_text:str, expected_price:int | None, expected_type:str
) -> None:
"""Test price extraction with different formats"""
with patch.object(test_extractor, 'web_text', new_callable = AsyncMock, return_value = price_text):
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = price_text):
price, price_type = await test_extractor._extract_pricing_info_from_ad_page()
assert price == expected_price
assert price_type == expected_type
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_pricing_info_timeout(self, test_extractor: AdExtractor) -> None:
async def test_extract_pricing_info_timeout(self, test_extractor:AdExtractor) -> None:
"""Test price extraction when element is not found"""
with patch.object(test_extractor, 'web_text', new_callable = AsyncMock, side_effect = TimeoutError):
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError):
price, price_type = await test_extractor._extract_pricing_info_from_ad_page()
assert price is None
assert price_type == "NOT_APPLICABLE"
@@ -95,15 +95,15 @@ class TestAdExtractorShipping:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_shipping_info(
self, test_extractor: AdExtractor, shipping_text: str, expected_type: str, expected_cost: float | None
self, test_extractor:AdExtractor, shipping_text:str, expected_type:str, expected_cost:float | None
) -> None:
"""Test shipping info extraction with different text formats."""
with patch.object(test_extractor, 'page', MagicMock()), \
patch.object(test_extractor, 'web_text', new_callable = AsyncMock, return_value = shipping_text), \
patch.object(test_extractor, 'web_request', new_callable = AsyncMock) as mock_web_request:
with patch.object(test_extractor, "page", MagicMock()), \
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = shipping_text), \
patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
if expected_cost:
shipping_response: dict[str, Any] = {
shipping_response:dict[str, Any] = {
"data": {
"shippingOptionsResponse": {
"options": [
@@ -125,7 +125,7 @@ class TestAdExtractorShipping:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_shipping_info_with_options(self, test_extractor: AdExtractor) -> None:
async def test_extract_shipping_info_with_options(self, test_extractor:AdExtractor) -> None:
"""Test shipping info extraction with shipping options."""
shipping_response = {
"content": json.dumps({
@@ -139,9 +139,9 @@ class TestAdExtractorShipping:
})
}
with patch.object(test_extractor, 'page', MagicMock()), \
patch.object(test_extractor, 'web_text', new_callable = AsyncMock, return_value = "+ Versand ab 5,49 €"), \
patch.object(test_extractor, 'web_request', new_callable = AsyncMock, return_value = shipping_response):
with patch.object(test_extractor, "page", MagicMock()), \
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 5,49 €"), \
patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response):
shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page()
@@ -154,21 +154,21 @@ class TestAdExtractorNavigation:
"""Tests for navigation related functionality."""
@pytest.mark.asyncio
async def test_navigate_to_ad_page_with_url(self, test_extractor: AdExtractor) -> None:
async def test_navigate_to_ad_page_with_url(self, test_extractor:AdExtractor) -> None:
"""Test navigation to ad page using a URL."""
page_mock = AsyncMock()
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
with patch.object(test_extractor, 'page', page_mock), \
patch.object(test_extractor, 'web_open', new_callable = AsyncMock) as mock_web_open, \
patch.object(test_extractor, 'web_find', new_callable = AsyncMock, side_effect = TimeoutError):
with patch.object(test_extractor, "page", page_mock), \
patch.object(test_extractor, "web_open", new_callable = AsyncMock) as mock_web_open, \
patch.object(test_extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError):
result = await test_extractor.naviagte_to_ad_page("https://www.kleinanzeigen.de/s-anzeige/test/12345")
assert result is True
mock_web_open.assert_called_with("https://www.kleinanzeigen.de/s-anzeige/test/12345")
@pytest.mark.asyncio
async def test_navigate_to_ad_page_with_id(self, test_extractor: AdExtractor) -> None:
async def test_navigate_to_ad_page_with_id(self, test_extractor:AdExtractor) -> None:
"""Test navigation to ad page using an ID."""
page_mock = AsyncMock()
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
@@ -186,7 +186,7 @@ class TestAdExtractorNavigation:
popup_close_mock.click = AsyncMock()
popup_close_mock.apply = AsyncMock(return_value = True)
def find_mock(selector_type: By, selector_value: str, **_: Any) -> Element | None:
def find_mock(selector_type:By, selector_value:str, **_:Any) -> Element | None:
if selector_type == By.ID and selector_value == "site-search-query":
return input_mock
if selector_type == By.ID and selector_value == "site-search-submit":
@@ -195,20 +195,20 @@ class TestAdExtractorNavigation:
return popup_close_mock
return None
with patch.object(test_extractor, 'page', page_mock), \
patch.object(test_extractor, 'web_open', new_callable = AsyncMock) as mock_web_open, \
patch.object(test_extractor, 'web_input', new_callable = AsyncMock), \
patch.object(test_extractor, 'web_check', new_callable = AsyncMock, return_value = True), \
patch.object(test_extractor, 'web_find', new_callable = AsyncMock, side_effect = find_mock):
with patch.object(test_extractor, "page", page_mock), \
patch.object(test_extractor, "web_open", new_callable = AsyncMock) as mock_web_open, \
patch.object(test_extractor, "web_input", new_callable = AsyncMock), \
patch.object(test_extractor, "web_check", new_callable = AsyncMock, return_value = True), \
patch.object(test_extractor, "web_find", new_callable = AsyncMock, side_effect = find_mock):
result = await test_extractor.naviagte_to_ad_page(12345)
assert result is True
mock_web_open.assert_called_with('https://www.kleinanzeigen.de/')
mock_web_open.assert_called_with("https://www.kleinanzeigen.de/")
submit_button_mock.click.assert_awaited_once()
popup_close_mock.click.assert_awaited_once()
@pytest.mark.asyncio
async def test_navigate_to_ad_page_with_popup(self, test_extractor: AdExtractor) -> None:
async def test_navigate_to_ad_page_with_popup(self, test_extractor:AdExtractor) -> None:
"""Test navigation to ad page with popup handling."""
page_mock = AsyncMock()
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
@@ -218,18 +218,18 @@ class TestAdExtractorNavigation:
input_mock.send_keys = AsyncMock()
input_mock.apply = AsyncMock(return_value = True)
with patch.object(test_extractor, 'page', page_mock), \
patch.object(test_extractor, 'web_open', new_callable = AsyncMock), \
patch.object(test_extractor, 'web_find', new_callable = AsyncMock, return_value = input_mock), \
patch.object(test_extractor, 'web_click', new_callable = AsyncMock) as mock_web_click, \
patch.object(test_extractor, 'web_check', new_callable = AsyncMock, return_value = True):
with patch.object(test_extractor, "page", page_mock), \
patch.object(test_extractor, "web_open", new_callable = AsyncMock), \
patch.object(test_extractor, "web_find", new_callable = AsyncMock, return_value = input_mock), \
patch.object(test_extractor, "web_click", new_callable = AsyncMock) as mock_web_click, \
patch.object(test_extractor, "web_check", new_callable = AsyncMock, return_value = True):
result = await test_extractor.naviagte_to_ad_page(12345)
assert result is True
mock_web_click.assert_called_with(By.CLASS_NAME, 'mfp-close')
mock_web_click.assert_called_with(By.CLASS_NAME, "mfp-close")
@pytest.mark.asyncio
async def test_navigate_to_ad_page_invalid_id(self, test_extractor: AdExtractor) -> None:
async def test_navigate_to_ad_page_invalid_id(self, test_extractor:AdExtractor) -> None:
"""Test navigation to ad page with invalid ID."""
page_mock = AsyncMock()
page_mock.url = "https://www.kleinanzeigen.de/s-suchen.html?k0"
@@ -240,22 +240,22 @@ class TestAdExtractorNavigation:
input_mock.apply = AsyncMock(return_value = True)
input_mock.attrs = {}
with patch.object(test_extractor, 'page', page_mock), \
patch.object(test_extractor, 'web_open', new_callable = AsyncMock), \
patch.object(test_extractor, 'web_find', new_callable = AsyncMock, return_value = input_mock):
with patch.object(test_extractor, "page", page_mock), \
patch.object(test_extractor, "web_open", new_callable = AsyncMock), \
patch.object(test_extractor, "web_find", new_callable = AsyncMock, return_value = input_mock):
result = await test_extractor.naviagte_to_ad_page(99999)
assert result is False
@pytest.mark.asyncio
async def test_extract_own_ads_urls(self, test_extractor: AdExtractor) -> None:
async def test_extract_own_ads_urls(self, test_extractor:AdExtractor) -> None:
"""Test extraction of own ads URLs - basic test."""
with patch.object(test_extractor, 'web_open', new_callable=AsyncMock), \
patch.object(test_extractor, 'web_sleep', new_callable=AsyncMock), \
patch.object(test_extractor, 'web_find', new_callable=AsyncMock) as mock_web_find, \
patch.object(test_extractor, 'web_find_all', new_callable=AsyncMock) as mock_web_find_all, \
patch.object(test_extractor, 'web_scroll_page_down', new_callable=AsyncMock), \
patch.object(test_extractor, 'web_execute', new_callable=AsyncMock):
with patch.object(test_extractor, "web_open", new_callable = AsyncMock), \
patch.object(test_extractor, "web_sleep", new_callable = AsyncMock), \
patch.object(test_extractor, "web_find", new_callable = AsyncMock) as mock_web_find, \
patch.object(test_extractor, "web_find_all", new_callable = AsyncMock) as mock_web_find_all, \
patch.object(test_extractor, "web_scroll_page_down", new_callable = AsyncMock), \
patch.object(test_extractor, "web_execute", new_callable = AsyncMock):
# --- Setup mock objects for DOM elements ---
# Mocks needed for the actual execution flow
@@ -263,7 +263,7 @@ class TestAdExtractorNavigation:
pagination_section_mock = MagicMock()
cardbox_mock = MagicMock() # Represents the <li> element
link_mock = MagicMock() # Represents the <a> element
link_mock.attrs = {'href': '/s-anzeige/test/12345'} # Configure the desired output
link_mock.attrs = {"href": "/s-anzeige/test/12345"} # Configure the desired output
# Mocks for elements potentially checked but maybe not strictly needed for output
# (depending on how robust the mocking is)
@@ -295,20 +295,20 @@ class TestAdExtractorNavigation:
refs = await test_extractor.extract_own_ads_urls()
# --- Assertions ---
assert refs == ['/s-anzeige/test/12345'] # Now it should match
assert refs == ["/s-anzeige/test/12345"] # Now it should match
# Optional: Verify calls were made as expected
mock_web_find.assert_has_calls([
call(By.ID, 'my-manageitems-adlist'),
call(By.CSS_SELECTOR, '.Pagination', timeout=10),
call(By.ID, 'my-manageitems-adlist'),
call(By.CSS_SELECTOR, 'div.manageitems-item-ad h3 a.text-onSurface', parent=cardbox_mock),
], any_order=False) # Check order if important
call(By.ID, "my-manageitems-adlist"),
call(By.CSS_SELECTOR, ".Pagination", timeout = 10),
call(By.ID, "my-manageitems-adlist"),
call(By.CSS_SELECTOR, "div.manageitems-item-ad h3 a.text-onSurface", parent = cardbox_mock),
], any_order = False) # Check order if important
mock_web_find_all.assert_has_calls([
call(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent=pagination_section_mock),
call(By.CLASS_NAME, 'cardbox', parent=ad_list_container_mock),
], any_order=False)
call(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section_mock),
call(By.CLASS_NAME, "cardbox", parent = ad_list_container_mock),
], any_order = False)
class TestAdExtractorContent:
@@ -318,14 +318,14 @@ class TestAdExtractorContent:
@pytest.fixture
def extractor_with_config(self) -> AdExtractor:
"""Create extractor with specific config for testing prefix/suffix handling."""
browser_mock = MagicMock(spec=Browser)
browser_mock = MagicMock(spec = Browser)
return AdExtractor(browser_mock, {}) # Empty config, will be overridden in tests
@pytest.mark.asyncio
async def test_extract_description_with_affixes(
self,
test_extractor: AdExtractor,
description_test_cases: list[tuple[dict[str, Any], str, str]]
test_extractor:AdExtractor,
description_test_cases:list[tuple[dict[str, Any], str, str]]
) -> None:
"""Test extraction of description with various prefix/suffix configurations."""
# Mock the page
@@ -337,18 +337,18 @@ class TestAdExtractorContent:
test_extractor.config = config
with patch.multiple(test_extractor,
web_text=AsyncMock(side_effect=[
web_text = AsyncMock(side_effect = [
"Test Title", # Title
raw_description, # Raw description (without affixes)
"03.02.2025" # Creation date
]),
_extract_category_from_ad_page=AsyncMock(return_value="160"),
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}),
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")),
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)),
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False),
_download_images_from_ad_page=AsyncMock(return_value=[]),
_extract_contact_from_ad_page=AsyncMock(return_value={})
_extract_category_from_ad_page = AsyncMock(return_value = "160"),
_extract_special_attributes_from_ad_page = AsyncMock(return_value = {}),
_extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")),
_extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)),
_extract_sell_directly_from_ad_page = AsyncMock(return_value = False),
_download_images_from_ad_page = AsyncMock(return_value = []),
_extract_contact_from_ad_page = AsyncMock(return_value = {})
):
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
assert info["description"] == raw_description
@@ -356,7 +356,7 @@ class TestAdExtractorContent:
@pytest.mark.asyncio
async def test_extract_description_with_affixes_timeout(
self,
test_extractor: AdExtractor
test_extractor:AdExtractor
) -> None:
"""Test handling of timeout when extracting description."""
# Mock the page
@@ -365,18 +365,18 @@ class TestAdExtractorContent:
test_extractor.page = page_mock
with patch.multiple(test_extractor,
web_text=AsyncMock(side_effect=[
web_text = AsyncMock(side_effect = [
"Test Title", # Title succeeds
TimeoutError("Timeout"), # Description times out
"03.02.2025" # Date succeeds
]),
_extract_category_from_ad_page=AsyncMock(return_value="160"),
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}),
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")),
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)),
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False),
_download_images_from_ad_page=AsyncMock(return_value=[]),
_extract_contact_from_ad_page=AsyncMock(return_value={})
_extract_category_from_ad_page = AsyncMock(return_value = "160"),
_extract_special_attributes_from_ad_page = AsyncMock(return_value = {}),
_extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")),
_extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)),
_extract_sell_directly_from_ad_page = AsyncMock(return_value = False),
_download_images_from_ad_page = AsyncMock(return_value = []),
_extract_contact_from_ad_page = AsyncMock(return_value = {})
):
try:
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
@@ -388,7 +388,7 @@ class TestAdExtractorContent:
@pytest.mark.asyncio
async def test_extract_description_with_affixes_no_affixes(
self,
test_extractor: AdExtractor
test_extractor:AdExtractor
) -> None:
"""Test extraction of description without any affixes in config."""
# Mock the page
@@ -399,24 +399,24 @@ class TestAdExtractorContent:
raw_description = "Original Description"
with patch.multiple(test_extractor,
web_text=AsyncMock(side_effect=[
web_text = AsyncMock(side_effect = [
"Test Title", # Title
raw_description, # Description without affixes
"03.02.2025" # Creation date
]),
_extract_category_from_ad_page=AsyncMock(return_value="160"),
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}),
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")),
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)),
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False),
_download_images_from_ad_page=AsyncMock(return_value=[]),
_extract_contact_from_ad_page=AsyncMock(return_value={})
_extract_category_from_ad_page = AsyncMock(return_value = "160"),
_extract_special_attributes_from_ad_page = AsyncMock(return_value = {}),
_extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")),
_extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)),
_extract_sell_directly_from_ad_page = AsyncMock(return_value = False),
_download_images_from_ad_page = AsyncMock(return_value = []),
_extract_contact_from_ad_page = AsyncMock(return_value = {})
):
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
assert info["description"] == raw_description
@pytest.mark.asyncio
async def test_extract_sell_directly(self, test_extractor: AdExtractor) -> None:
async def test_extract_sell_directly(self, test_extractor:AdExtractor) -> None:
"""Test extraction of sell directly option."""
test_cases = [
("Direkt kaufen", True),
@@ -424,11 +424,11 @@ class TestAdExtractorContent:
]
for text, expected in test_cases:
with patch.object(test_extractor, 'web_text', new_callable=AsyncMock, return_value=text):
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = text):
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is expected
with patch.object(test_extractor, 'web_text', new_callable=AsyncMock, side_effect=TimeoutError):
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError):
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
@@ -451,15 +451,15 @@ class TestAdExtractorCategory:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_category(self, extractor: AdExtractor) -> None:
async def test_extract_category(self, extractor:AdExtractor) -> None:
"""Test category extraction from breadcrumb."""
category_line = MagicMock()
first_part = MagicMock()
first_part.attrs = {'href': '/s-familie-kind-baby/c17'}
first_part.attrs = {"href": "/s-familie-kind-baby/c17"}
second_part = MagicMock()
second_part.attrs = {'href': '/s-spielzeug/c23'}
second_part.attrs = {"href": "/s-spielzeug/c23"}
with patch.object(extractor, 'web_find', new_callable = AsyncMock) as mock_web_find:
with patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find:
mock_web_find.side_effect = [
category_line,
first_part,
@@ -469,15 +469,15 @@ class TestAdExtractorCategory:
result = await extractor._extract_category_from_ad_page()
assert result == "17/23"
mock_web_find.assert_any_call(By.ID, 'vap-brdcrmb')
mock_web_find.assert_any_call(By.CSS_SELECTOR, 'a:nth-of-type(2)', parent = category_line)
mock_web_find.assert_any_call(By.CSS_SELECTOR, 'a:nth-of-type(3)', parent = category_line)
mock_web_find.assert_any_call(By.ID, "vap-brdcrmb")
mock_web_find.assert_any_call(By.CSS_SELECTOR, "a:nth-of-type(2)", parent = category_line)
mock_web_find.assert_any_call(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line)
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_special_attributes_empty(self, extractor: AdExtractor) -> None:
async def test_extract_special_attributes_empty(self, extractor:AdExtractor) -> None:
"""Test extraction of special attributes when empty."""
with patch.object(extractor, 'web_execute', new_callable = AsyncMock) as mock_web_execute:
with patch.object(extractor, "web_execute", new_callable = AsyncMock) as mock_web_execute:
mock_web_execute.return_value = {
"universalAnalyticsOpts": {
"dimensions": {
@@ -507,11 +507,11 @@ class TestAdExtractorContact:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_contact_info(self, extractor: AdExtractor) -> None:
async def test_extract_contact_info(self, extractor:AdExtractor) -> None:
"""Test extraction of contact information."""
with patch.object(extractor, 'page', MagicMock()), \
patch.object(extractor, 'web_text', new_callable = AsyncMock) as mock_web_text, \
patch.object(extractor, 'web_find', new_callable = AsyncMock) as mock_web_find:
with patch.object(extractor, "page", MagicMock()), \
patch.object(extractor, "web_text", new_callable = AsyncMock) as mock_web_text, \
patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find:
mock_web_text.side_effect = [
"12345 Berlin - Mitte",
@@ -535,22 +535,22 @@ class TestAdExtractorContact:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_contact_info_timeout(self, extractor: AdExtractor) -> None:
async def test_extract_contact_info_timeout(self, extractor:AdExtractor) -> None:
"""Test contact info extraction when elements are not found."""
with patch.object(extractor, 'page', MagicMock()), \
patch.object(extractor, 'web_text', new_callable = AsyncMock, side_effect = TimeoutError()), \
patch.object(extractor, 'web_find', new_callable = AsyncMock, side_effect = TimeoutError()), \
with patch.object(extractor, "page", MagicMock()), \
patch.object(extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError()), \
patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError()), \
pytest.raises(TimeoutError):
await extractor._extract_contact_from_ad_page()
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_contact_info_with_phone(self, extractor: AdExtractor) -> None:
async def test_extract_contact_info_with_phone(self, extractor:AdExtractor) -> None:
"""Test extraction of contact information including phone number."""
with patch.object(extractor, 'page', MagicMock()), \
patch.object(extractor, 'web_text', new_callable = AsyncMock) as mock_web_text, \
patch.object(extractor, 'web_find', new_callable = AsyncMock) as mock_web_find:
with patch.object(extractor, "page", MagicMock()), \
patch.object(extractor, "web_text", new_callable = AsyncMock) as mock_web_text, \
patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find:
mock_web_text.side_effect = [
"12345 Berlin - Mitte",
@@ -588,19 +588,19 @@ class TestAdExtractorDownload:
return AdExtractor(browser_mock, config_mock)
@pytest.mark.asyncio
async def test_download_ad_existing_directory(self, extractor: AdExtractor) -> None:
async def test_download_ad_existing_directory(self, extractor:AdExtractor) -> None:
"""Test downloading an ad when the directory already exists."""
with patch('os.path.exists') as mock_exists, \
patch('os.path.isdir') as mock_isdir, \
patch('os.makedirs') as mock_makedirs, \
patch('os.mkdir') as mock_mkdir, \
patch('shutil.rmtree') as mock_rmtree, \
patch('kleinanzeigen_bot.extract.dicts.save_dict', autospec = True) as mock_save_dict, \
patch.object(extractor, '_extract_ad_page_info', new_callable = AsyncMock) as mock_extract:
with patch("os.path.exists") as mock_exists, \
patch("os.path.isdir") as mock_isdir, \
patch("os.makedirs") as mock_makedirs, \
patch("os.mkdir") as mock_mkdir, \
patch("shutil.rmtree") as mock_rmtree, \
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
patch.object(extractor, "_extract_ad_page_info", new_callable = AsyncMock) as mock_extract:
base_dir = 'downloaded-ads'
ad_dir = os.path.join(base_dir, 'ad_12345')
yaml_path = os.path.join(ad_dir, 'ad_12345.yaml')
base_dir = "downloaded-ads"
ad_dir = os.path.join(base_dir, "ad_12345")
yaml_path = os.path.join(ad_dir, "ad_12345.yaml")
# Configure mocks for directory checks
existing_paths = {base_dir, ad_dir}
@@ -632,32 +632,32 @@ class TestAdExtractorDownload:
# Workaround for hard-coded path in download_ad
actual_call = mock_save_dict.call_args
assert actual_call is not None
actual_path = actual_call[0][0].replace('/', os.path.sep)
actual_path = actual_call[0][0].replace("/", os.path.sep)
assert actual_path == yaml_path
assert actual_call[0][1] == mock_extract.return_value
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_download_images_no_images(self, extractor: AdExtractor) -> None:
async def test_download_images_no_images(self, extractor:AdExtractor) -> None:
"""Test image download when no images are found."""
with patch.object(extractor, 'web_find', new_callable = AsyncMock, side_effect = TimeoutError):
with patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError):
image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345)
assert len(image_paths) == 0
@pytest.mark.asyncio
async def test_download_ad(self, extractor: AdExtractor) -> None:
async def test_download_ad(self, extractor:AdExtractor) -> None:
"""Test downloading an entire ad."""
with patch('os.path.exists') as mock_exists, \
patch('os.path.isdir') as mock_isdir, \
patch('os.makedirs') as mock_makedirs, \
patch('os.mkdir') as mock_mkdir, \
patch('shutil.rmtree') as mock_rmtree, \
patch('kleinanzeigen_bot.extract.dicts.save_dict', autospec = True) as mock_save_dict, \
patch.object(extractor, '_extract_ad_page_info', new_callable = AsyncMock) as mock_extract:
with patch("os.path.exists") as mock_exists, \
patch("os.path.isdir") as mock_isdir, \
patch("os.makedirs") as mock_makedirs, \
patch("os.mkdir") as mock_mkdir, \
patch("shutil.rmtree") as mock_rmtree, \
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
patch.object(extractor, "_extract_ad_page_info", new_callable = AsyncMock) as mock_extract:
base_dir = 'downloaded-ads'
ad_dir = os.path.join(base_dir, 'ad_12345')
yaml_path = os.path.join(ad_dir, 'ad_12345.yaml')
base_dir = "downloaded-ads"
ad_dir = os.path.join(base_dir, "ad_12345")
yaml_path = os.path.join(ad_dir, "ad_12345.yaml")
# Configure mocks for directory checks
mock_exists.return_value = False
@@ -690,6 +690,6 @@ class TestAdExtractorDownload:
# Get the actual call arguments
actual_call = mock_save_dict.call_args
assert actual_call is not None
actual_path = actual_call[0][0].replace('/', os.path.sep)
actual_path = actual_call[0][0].replace("/", os.path.sep)
assert actual_path == yaml_path
assert actual_call[0][1] == mock_extract.return_value

View File

@@ -13,7 +13,7 @@ from kleinanzeigen_bot.utils import i18n
("fr_CA", ("fr", "CA", "UTF-8")), # Test with language + region, no encoding
("pt_BR.iso8859-1", ("pt", "BR", "ISO8859-1")), # Test with language + region + encoding
])
def test_detect_locale(monkeypatch: MonkeyPatch, lang: str | None, expected: i18n.Locale) -> None:
def test_detect_locale(monkeypatch:MonkeyPatch, lang:str | None, expected:i18n.Locale) -> None:
"""
Pytest test case to verify detect_system_language() behavior under various LANG values.
"""
@@ -49,7 +49,7 @@ def test_pluralize(
noun:str,
count:int,
prefix_with_count:bool,
expected: str
expected:str
) -> None:
i18n.set_current_locale(i18n.Locale(lang, "US", "UTF_8"))

File diff suppressed because it is too large Load Diff

View File

@@ -26,12 +26,12 @@ from ruamel.yaml import YAML
from kleinanzeigen_bot import resources
# Messages that are intentionally not translated (internal/debug messages)
EXCLUDED_MESSAGES: dict[str, set[str]] = {
EXCLUDED_MESSAGES:dict[str, set[str]] = {
"kleinanzeigen_bot/__init__.py": {"############################################"}
}
# Special modules that are known to be needed even if not in messages_by_file
KNOWN_NEEDED_MODULES = {'getopt.py'}
KNOWN_NEEDED_MODULES = {"getopt.py"}
# Type aliases for better readability
ModulePath = str
@@ -45,12 +45,12 @@ MissingDict = dict[FunctionName, dict[Message, set[Message]]]
@dataclass
class MessageLocation:
"""Represents the location of a message in the codebase."""
module: str
function: str
message: str
module:str
function:str
message:str
def _get_function_name(node: ast.AST) -> str:
def _get_function_name(node:ast.AST) -> str:
"""
Get the name of the function containing this AST node.
This matches i18n.py's behavior which only uses the function name for translation lookups.
@@ -63,14 +63,14 @@ def _get_function_name(node: ast.AST) -> str:
The function name or "module" for module-level code
"""
def find_parent_context(n: ast.AST) -> tuple[str | None, str | None]:
def find_parent_context(n:ast.AST) -> tuple[str | None, str | None]:
"""Find the containing class and function names."""
class_name = None
function_name = None
current = n
while hasattr(current, '_parent'):
current = getattr(current, '_parent')
while hasattr(current, "_parent"):
current = getattr(current, "_parent")
if isinstance(current, ast.ClassDef) and not class_name:
class_name = current.name
elif isinstance(current, ast.FunctionDef) or isinstance(current, ast.AsyncFunctionDef) and not function_name:
@@ -84,7 +84,7 @@ def _get_function_name(node: ast.AST) -> str:
return "module" # For module-level code
def _extract_log_messages(file_path: str, exclude_debug:bool = False) -> MessageDict:
def _extract_log_messages(file_path:str, exclude_debug:bool = False) -> MessageDict:
"""
Extract all translatable messages from a Python file with their function context.
@@ -94,27 +94,27 @@ def _extract_log_messages(file_path: str, exclude_debug:bool = False) -> Message
Returns:
Dictionary mapping function names to their messages
"""
with open(file_path, 'r', encoding = 'utf-8') as file:
with open(file_path, "r", encoding = "utf-8") as file:
tree = ast.parse(file.read(), filename = file_path)
# Add parent references for context tracking
for parent in ast.walk(tree):
for child in ast.iter_child_nodes(parent):
setattr(child, '_parent', parent)
setattr(child, "_parent", parent)
messages: MessageDict = defaultdict(lambda: defaultdict(set))
messages:MessageDict = defaultdict(lambda: defaultdict(set))
def add_message(function: str, msg: str) -> None:
def add_message(function:str, msg:str) -> None:
"""Add a message to the messages dictionary."""
if function not in messages:
messages[function] = defaultdict(set)
if msg not in messages[function]:
messages[function][msg] = {msg}
def extract_string_value(node: ast.AST) -> str | None:
def extract_string_value(node:ast.AST) -> str | None:
"""Safely extract string value from an AST node."""
if isinstance(node, ast.Constant):
value = getattr(node, 'value', None)
value = getattr(node, "value", None)
return value if isinstance(value, str) else None
return None
@@ -127,24 +127,24 @@ def _extract_log_messages(file_path: str, exclude_debug:bool = False) -> Message
# Extract messages from various call types
if (isinstance(node.func, ast.Attribute) and
isinstance(node.func.value, ast.Name) and
node.func.value.id in {'LOG', 'logger', 'logging'} and
node.func.attr in {None if exclude_debug else 'debug', 'info', 'warning', 'error', 'exception', 'critical'}):
node.func.value.id in {"LOG", "logger", "logging"} and
node.func.attr in {None if exclude_debug else "debug", "info", "warning", "error", "exception", "critical"}):
if node.args:
msg = extract_string_value(node.args[0])
if msg:
add_message(function_name, msg)
# Handle gettext calls
elif ((isinstance(node.func, ast.Name) and node.func.id == '_') or
(isinstance(node.func, ast.Attribute) and node.func.attr == 'gettext')):
elif ((isinstance(node.func, ast.Name) and node.func.id == "_") or
(isinstance(node.func, ast.Attribute) and node.func.attr == "gettext")):
if node.args:
msg = extract_string_value(node.args[0])
if msg:
add_message(function_name, msg)
# Handle other translatable function calls
elif isinstance(node.func, ast.Name) and node.func.id in {'ainput', 'pluralize', 'ensure'}:
arg_index = 0 if node.func.id == 'ainput' else 1
elif isinstance(node.func, ast.Name) and node.func.id in {"ainput", "pluralize", "ensure"}:
arg_index = 0 if node.func.id == "ainput" else 1
if len(node.args) > arg_index:
msg = extract_string_value(node.args[arg_index])
if msg:
@@ -162,10 +162,10 @@ def _get_all_log_messages(exclude_debug:bool = False) -> dict[str, MessageDict]:
Returns:
Dictionary mapping module paths to their function messages
"""
src_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src', 'kleinanzeigen_bot')
src_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src", "kleinanzeigen_bot")
print(f"\nScanning for messages in directory: {src_dir}")
messages_by_file: dict[str, MessageDict] = {
messages_by_file:dict[str, MessageDict] = {
# Special case for getopt.py which is imported
"getopt.py": {
"do_longs": {
@@ -187,15 +187,15 @@ def _get_all_log_messages(exclude_debug:bool = False) -> dict[str, MessageDict]:
for root, _, filenames in os.walk(src_dir):
for filename in filenames:
if filename.endswith('.py'):
if filename.endswith(".py"):
file_path = os.path.join(root, filename)
relative_path = os.path.relpath(file_path, src_dir)
if relative_path.startswith('resources/'):
if relative_path.startswith("resources/"):
continue
messages = _extract_log_messages(file_path, exclude_debug)
if messages:
module_path = os.path.join('kleinanzeigen_bot', relative_path)
module_path = module_path.replace(os.sep, '/')
module_path = os.path.join("kleinanzeigen_bot", relative_path)
module_path = module_path.replace(os.sep, "/")
messages_by_file[module_path] = messages
return messages_by_file
@@ -217,7 +217,7 @@ def _get_available_languages() -> list[str]:
return sorted(languages)
def _get_translations_for_language(lang: str) -> TranslationDict:
def _get_translations_for_language(lang:str) -> TranslationDict:
"""
Get translations for a specific language from its YAML file.
@@ -227,7 +227,7 @@ def _get_translations_for_language(lang: str) -> TranslationDict:
Returns:
Dictionary containing all translations for the language
"""
yaml = YAML(typ = 'safe')
yaml = YAML(typ = "safe")
translation_file = f"translations.{lang}.yaml"
print(f"Loading translations from {translation_file}")
content = files(resources).joinpath(translation_file).read_text()
@@ -235,10 +235,10 @@ def _get_translations_for_language(lang: str) -> TranslationDict:
return translations
def _find_translation(translations: TranslationDict,
module: str,
function: str,
message: str) -> bool:
def _find_translation(translations:TranslationDict,
module:str,
function:str,
message:str) -> bool:
"""
Check if a translation exists for a given message in the exact location where i18n.py will look.
This matches the lookup logic in i18n.py which uses dicts.safe_get().
@@ -253,11 +253,11 @@ def _find_translation(translations: TranslationDict,
True if translation exists in the correct location, False otherwise
"""
# Special case for getopt.py
if module == 'getopt.py':
if module == "getopt.py":
return bool(translations.get(module, {}).get(function, {}).get(message))
# Add kleinanzeigen_bot/ prefix if not present
module_path = f'kleinanzeigen_bot/{module}' if not module.startswith('kleinanzeigen_bot/') else module
module_path = f'kleinanzeigen_bot/{module}' if not module.startswith("kleinanzeigen_bot/") else module
# Check if module exists in translations
module_trans = translations.get(module_path, {})
@@ -277,10 +277,10 @@ def _find_translation(translations: TranslationDict,
return has_translation
def _message_exists_in_code(code_messages: dict[str, MessageDict],
module: str,
function: str,
message: str) -> bool:
def _message_exists_in_code(code_messages:dict[str, MessageDict],
module:str,
function:str,
message:str) -> bool:
"""
Check if a message exists in the code at the given location.
This is the reverse of _find_translation - it checks if a translation's message
@@ -296,11 +296,11 @@ def _message_exists_in_code(code_messages: dict[str, MessageDict],
True if message exists in the code, False otherwise
"""
# Special case for getopt.py
if module == 'getopt.py':
if module == "getopt.py":
return bool(code_messages.get(module, {}).get(function, {}).get(message))
# Remove kleinanzeigen_bot/ prefix if present for code message lookup
module_path = module[len('kleinanzeigen_bot/'):] if module.startswith('kleinanzeigen_bot/') else module
module_path = module[len("kleinanzeigen_bot/"):] if module.startswith("kleinanzeigen_bot/") else module
module_path = f'kleinanzeigen_bot/{module_path}'
# Check if module exists in code messages
@@ -318,7 +318,7 @@ def _message_exists_in_code(code_messages: dict[str, MessageDict],
@pytest.mark.parametrize("lang", _get_available_languages())
def test_all_log_messages_have_translations(lang: str) -> None:
def test_all_log_messages_have_translations(lang:str) -> None:
"""
Test that all translatable messages in the code have translations for each language.
@@ -345,7 +345,7 @@ def test_all_log_messages_have_translations(lang: str) -> None:
def make_inner_dict() -> defaultdict[str, set[str]]:
return defaultdict(set)
by_module: defaultdict[str, defaultdict[str, set[str]]] = defaultdict(make_inner_dict)
by_module:defaultdict[str, defaultdict[str, set[str]]] = defaultdict(make_inner_dict)
for loc in missing_translations:
assert isinstance(loc.module, str), "Module must be a string"
@@ -364,7 +364,7 @@ def test_all_log_messages_have_translations(lang: str) -> None:
@pytest.mark.parametrize("lang", _get_available_languages())
def test_no_obsolete_translations(lang: str) -> None:
def test_no_obsolete_translations(lang:str) -> None:
"""
Test that all translations in each language YAML file are actually used in the code.
@@ -376,7 +376,7 @@ def test_no_obsolete_translations(lang: str) -> None:
"""
messages_by_file = _get_all_log_messages(exclude_debug = False)
translations = _get_translations_for_language(lang)
obsolete_items: list[tuple[str, str, str]] = []
obsolete_items:list[tuple[str, str, str]] = []
for module, module_trans in translations.items():
if not isinstance(module_trans, dict):
@@ -402,7 +402,7 @@ def test_no_obsolete_translations(lang: str) -> None:
obsolete_str = f"\nObsolete translations found for language [{lang}]:\n"
# Group by module and function for better readability
by_module: defaultdict[str, defaultdict[str, list[str]]] = defaultdict(lambda: defaultdict(list))
by_module:defaultdict[str, defaultdict[str, list[str]]] = defaultdict(lambda: defaultdict(list))
for module, function, message in obsolete_items:
by_module[module][function].append(message)