mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
refact: apply consistent formatting
This commit is contained in:
2
.github/PULL_REQUEST_TEMPLATE.md
vendored
2
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -20,8 +20,8 @@ Select the type(s) of change(s) included in this pull request:
|
||||
Before requesting a review, confirm the following:
|
||||
- [ ] I have reviewed my changes to ensure they meet the project's standards.
|
||||
- [ ] I have tested my changes and ensured that all tests pass (`pdm run test`).
|
||||
- [ ] I have formatted the code (`pdm run format`).
|
||||
- [ ] I have verified that linting passes (`pdm run lint`).
|
||||
- [ ] I have run security scans and addressed any identified issues (`pdm run audit`).
|
||||
- [ ] I have updated documentation where necessary.
|
||||
|
||||
By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
|
||||
|
||||
@@ -82,7 +82,7 @@ app = "python -m kleinanzeigen_bot"
|
||||
compile.cmd = "python -O -m PyInstaller pyinstaller.spec --clean"
|
||||
compile.env = {PYTHONHASHSEED = "1", SOURCE_DATE_EPOCH = "0"} # https://pyinstaller.org/en/stable/advanced-topics.html#creating-a-reproducible-build
|
||||
debug = "python -m pdb -m kleinanzeigen_bot"
|
||||
format = "autopep8 --recursive --in-place src tests --verbose"
|
||||
format = {shell = "autopep8 --recursive --in-place scripts src tests --verbose && python scripts/post_autopep8.py scripts src tests" }
|
||||
lint = {shell = "ruff check && mypy && basedpyright" }
|
||||
fix = {shell = "ruff check --fix" }
|
||||
test = "python -m pytest --capture=tee-sys -v"
|
||||
@@ -113,7 +113,7 @@ aggressive = 3
|
||||
# https://docs.astral.sh/ruff/configuration/
|
||||
#####################
|
||||
[tool.ruff]
|
||||
include = ["pyproject.toml", "src/**/*.py", "tests/**/*.py"]
|
||||
include = ["pyproject.toml", "scripts/**/*.py", "src/**/*.py", "tests/**/*.py"]
|
||||
line-length = 160
|
||||
indent-width = 4
|
||||
target-version = "py310"
|
||||
@@ -208,14 +208,10 @@ ignore = [
|
||||
"TC006", # Add quotes to type expression in `typing.cast()`
|
||||
]
|
||||
|
||||
[tool.ruff.format]
|
||||
quote-style = "double"
|
||||
indent-style = "space"
|
||||
line-ending = "native"
|
||||
docstring-code-format = false
|
||||
skip-magic-trailing-comma = false
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"scripts/**/*.py" = [
|
||||
"INP001", # File `...` is part of an implicit namespace package. Add an `__init__.py`.
|
||||
]
|
||||
"tests/**/*.py" = [
|
||||
"ARG",
|
||||
"B",
|
||||
@@ -247,7 +243,7 @@ max-statements = 150 # max. number of statements in function / method body (R091
|
||||
# https://mypy.readthedocs.io/en/stable/config_file.html
|
||||
#mypy_path = "$MYPY_CONFIG_FILE_DIR/tests/stubs"
|
||||
python_version = "3.10"
|
||||
files = "src,tests"
|
||||
files = "scripts,src,tests"
|
||||
strict = true
|
||||
disallow_untyped_calls = false
|
||||
disallow_untyped_defs = true
|
||||
@@ -264,7 +260,7 @@ verbosity = 0
|
||||
#####################
|
||||
[tool.basedpyright]
|
||||
# https://docs.basedpyright.com/latest/configuration/config-files/
|
||||
include = ["src", "tests"]
|
||||
include = ["scripts", "src", "tests"]
|
||||
defineConstant = { DEBUG = false }
|
||||
pythonVersion = "3.10"
|
||||
typeCheckingMode = "standard"
|
||||
|
||||
317
scripts/post_autopep8.py
Normal file
317
scripts/post_autopep8.py
Normal file
@@ -0,0 +1,317 @@
|
||||
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||
import ast, logging, re, sys # isort: skip
|
||||
from pathlib import Path
|
||||
from typing import Final, List, Protocol, Tuple
|
||||
|
||||
from typing_extensions import override
|
||||
|
||||
# Configure basic logging
|
||||
logging.basicConfig(level = logging.INFO, format = "%(levelname)s: %(message)s")
|
||||
LOG:Final[logging.Logger] = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FormatterRule(Protocol):
|
||||
"""
|
||||
A code processor that can modify source lines based on the AST.
|
||||
"""
|
||||
|
||||
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
|
||||
...
|
||||
|
||||
|
||||
class NoSpaceAfterColonInTypeAnnotationRule(FormatterRule):
|
||||
"""
|
||||
Removes whitespace between the colon (:) and the type annotation in variable and function parameter declarations.
|
||||
|
||||
This rule enforces `a:int` instead of `a: int`.
|
||||
It is the opposite behavior of autopep8 rule E231.
|
||||
|
||||
Example:
|
||||
# Before
|
||||
def foo(a: int, b : str) -> None:
|
||||
pass
|
||||
|
||||
# After
|
||||
def foo(a:int, b:str) -> None:
|
||||
pass
|
||||
"""
|
||||
|
||||
@override
|
||||
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
|
||||
ann_positions:List[Tuple[int, int]] = []
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.arg) and node.annotation is not None:
|
||||
ann_positions.append((node.annotation.lineno - 1, node.annotation.col_offset))
|
||||
elif isinstance(node, ast.AnnAssign) and node.annotation is not None:
|
||||
ann = node.annotation
|
||||
ann_positions.append((ann.lineno - 1, ann.col_offset))
|
||||
|
||||
if not ann_positions:
|
||||
return lines
|
||||
|
||||
new_lines:List[str] = []
|
||||
for idx, line in enumerate(lines):
|
||||
if line.lstrip().startswith("#"):
|
||||
new_lines.append(line)
|
||||
continue
|
||||
|
||||
chars = list(line)
|
||||
offsets = [col for (lin, col) in ann_positions if lin == idx]
|
||||
for col in sorted(offsets, reverse = True):
|
||||
prefix = "".join(chars[:col])
|
||||
colon_idx = prefix.rfind(":")
|
||||
if colon_idx == -1:
|
||||
continue
|
||||
j = colon_idx + 1
|
||||
while j < len(chars) and chars[j].isspace():
|
||||
del chars[j]
|
||||
new_lines.append("".join(chars))
|
||||
|
||||
return new_lines
|
||||
|
||||
|
||||
class EqualSignSpacingInDefaultsAndNamedArgsRule(FormatterRule):
|
||||
"""
|
||||
Ensures that the '=' sign in default values for function parameters and keyword arguments in function calls
|
||||
is surrounded by exactly one space on each side.
|
||||
|
||||
This rule enforces `a:int = 3` instead of `a:int=3`, and `x = 42` instead of `x=42` or `x =42`.
|
||||
It is the opposite behavior of autopep8 rule E251.
|
||||
|
||||
Example:
|
||||
# Before
|
||||
def foo(a:int=3, b :str= "bar"):
|
||||
pass
|
||||
|
||||
foo(x=42,y = "hello")
|
||||
|
||||
# After
|
||||
def foo(a:int = 3, b:str = "bar"):
|
||||
pass
|
||||
|
||||
foo(x = 42, y = "hello")
|
||||
"""
|
||||
|
||||
@override
|
||||
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
|
||||
equals_positions:List[Tuple[int, int]] = []
|
||||
for node in ast.walk(tree):
|
||||
# --- Defaults in function definitions, async defs & lambdas ---
|
||||
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.Lambda)):
|
||||
# positional defaults
|
||||
equals_positions.extend(
|
||||
(d.lineno - 1, d.col_offset)
|
||||
for d in node.args.defaults
|
||||
if d is not None
|
||||
)
|
||||
# keyword-only defaults (only on defs, not lambdas)
|
||||
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||
equals_positions.extend(
|
||||
(d.lineno - 1, d.col_offset)
|
||||
for d in node.args.kw_defaults
|
||||
if d is not None
|
||||
)
|
||||
|
||||
# --- Keyword arguments in calls ---
|
||||
if isinstance(node, ast.Call):
|
||||
equals_positions.extend(
|
||||
(kw.value.lineno - 1, kw.value.col_offset)
|
||||
for kw in node.keywords
|
||||
if kw.arg is not None
|
||||
)
|
||||
|
||||
if not equals_positions:
|
||||
return lines
|
||||
|
||||
new_lines:List[str] = []
|
||||
for line_idx, line in enumerate(lines):
|
||||
if line.lstrip().startswith("#"):
|
||||
new_lines.append(line)
|
||||
continue
|
||||
|
||||
chars = list(line)
|
||||
equals_offsets = [col for (lineno, col) in equals_positions if lineno == line_idx]
|
||||
for col in sorted(equals_offsets, reverse = True):
|
||||
prefix = "".join(chars[:col])
|
||||
equal_sign_idx = prefix.rfind("=")
|
||||
if equal_sign_idx == -1:
|
||||
continue
|
||||
|
||||
# remove spaces before '='
|
||||
left_index = equal_sign_idx - 1
|
||||
while left_index >= 0 and chars[left_index].isspace():
|
||||
del chars[left_index]
|
||||
equal_sign_idx -= 1
|
||||
left_index -= 1
|
||||
|
||||
# remove spaces after '='
|
||||
right_index = equal_sign_idx + 1
|
||||
while right_index < len(chars) and chars[right_index].isspace():
|
||||
del chars[right_index]
|
||||
|
||||
# insert single spaces
|
||||
chars.insert(equal_sign_idx, " ")
|
||||
chars.insert(equal_sign_idx + 2, " ")
|
||||
new_lines.append("".join(chars))
|
||||
|
||||
return new_lines
|
||||
|
||||
|
||||
class PreferDoubleQuotesRule(FormatterRule):
|
||||
"""
|
||||
Ensures string literals use double quotes unless the content contains a double quote.
|
||||
|
||||
Example:
|
||||
# Before
|
||||
foo = 'hello'
|
||||
bar = 'a "quote" inside'
|
||||
|
||||
# After
|
||||
foo = "hello"
|
||||
bar = 'a "quote" inside' # kept as-is, because it contains a double quote
|
||||
"""
|
||||
|
||||
@override
|
||||
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
|
||||
new_lines = lines.copy()
|
||||
|
||||
# Track how much each line has shifted so far
|
||||
line_shifts:dict[int, int] = dict.fromkeys(range(len(lines)), 0)
|
||||
|
||||
# Build a parent map for f-string detection
|
||||
parent_map:dict[ast.AST, ast.AST] = {}
|
||||
for parent in ast.walk(tree):
|
||||
for child in ast.iter_child_nodes(parent):
|
||||
parent_map[child] = parent
|
||||
|
||||
def is_in_fstring(node:ast.AST) -> bool:
|
||||
p = parent_map.get(node)
|
||||
while p:
|
||||
if isinstance(p, ast.JoinedStr):
|
||||
return True
|
||||
p = parent_map.get(p)
|
||||
return False
|
||||
|
||||
# Regex to locate a single- or triple-quoted literal:
|
||||
# (?P<prefix>[rRbuUfF]*) optional string flags (r, b, u, f, etc.), case-insensitive
|
||||
# (?P<quote>'{3}|') the opening delimiter: either three single-quotes (''') or one ('),
|
||||
# but never two in a row (so we won't mis-interpret adjacent quotes)
|
||||
# (?P<content>.*?) the literal's content, non-greedy up to the next same delimiter
|
||||
# (?P=quote) the matching closing delimiter (same length as the opener)
|
||||
literal_re = re.compile(
|
||||
r"(?P<prefix>[rRbuUfF]*)(?P<quote>'{3}|')(?P<content>.*?)(?P=quote)",
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
for node in ast.walk(tree):
|
||||
# only handle simple string constants
|
||||
if not (isinstance(node, ast.Constant) and isinstance(node.value, str)):
|
||||
continue
|
||||
|
||||
# skip anything inside an f-string, at any depth
|
||||
if is_in_fstring(node):
|
||||
continue
|
||||
|
||||
starting_line_number = getattr(node, "lineno", None)
|
||||
starting_col_offset = getattr(node, "col_offset", None)
|
||||
if starting_line_number is None or starting_col_offset is None:
|
||||
continue
|
||||
|
||||
start_line = starting_line_number - 1
|
||||
shift = line_shifts[start_line]
|
||||
raw = new_lines[start_line]
|
||||
# apply shift so we match against current edited line
|
||||
idx = starting_col_offset + shift
|
||||
if idx >= len(raw) or raw[idx] not in ("'", "r", "u", "b", "f", "R", "U", "B", "F"):
|
||||
continue
|
||||
|
||||
# match literal at that column
|
||||
m = literal_re.match(raw[idx:])
|
||||
if not m:
|
||||
continue
|
||||
|
||||
prefix = m.group("prefix")
|
||||
quote = m.group("quote") # either "'" or "'''"
|
||||
content = m.group("content") # what's inside
|
||||
|
||||
# skip if content has a double-quote already
|
||||
if '"' in content:
|
||||
continue
|
||||
|
||||
# build new literal with the same prefix, but double‐quote delimiter
|
||||
delim = '"' * len(quote)
|
||||
escaped = content.replace(delim, "\\" + delim)
|
||||
new_literal = f"{prefix}{delim}{escaped}{delim}"
|
||||
|
||||
literal_len = m.end() # how many chars we're replacing
|
||||
before = raw[:idx]
|
||||
after = raw[idx + literal_len:]
|
||||
new_lines[start_line] = before + new_literal + after
|
||||
|
||||
# record shift delta for any further edits on this line
|
||||
line_shifts[start_line] += len(new_literal) - literal_len
|
||||
|
||||
return new_lines
|
||||
|
||||
|
||||
FORMATTER_RULES:List[FormatterRule] = [
|
||||
NoSpaceAfterColonInTypeAnnotationRule(),
|
||||
EqualSignSpacingInDefaultsAndNamedArgsRule(),
|
||||
PreferDoubleQuotesRule(),
|
||||
]
|
||||
|
||||
|
||||
def format_file(path:Path) -> None:
|
||||
# Read without newline conversion
|
||||
with path.open("r", encoding = "utf-8", newline = "") as rf:
|
||||
original_text = rf.read()
|
||||
|
||||
# Initial parse
|
||||
try:
|
||||
tree = ast.parse(original_text)
|
||||
except SyntaxError as e:
|
||||
LOG.error(
|
||||
"Syntax error parsing %s[%d:%d]: %r -> %s",
|
||||
path, e.lineno, e.offset, (e.text or "").rstrip(), e.msg
|
||||
)
|
||||
return
|
||||
|
||||
lines = original_text.splitlines(keepends = True)
|
||||
formatted_text = original_text
|
||||
success = True
|
||||
for rule in FORMATTER_RULES:
|
||||
lines = rule.apply(tree, lines, path)
|
||||
formatted_text = "".join(lines)
|
||||
|
||||
# Re-parse the updated text
|
||||
try:
|
||||
tree = ast.parse(formatted_text)
|
||||
except SyntaxError as e:
|
||||
LOG.error(
|
||||
"Syntax error after %s at %s[%d:%d]: %r -> %s",
|
||||
rule.__class__.__name__, path, e.lineno, e.offset, (e.text or "").rstrip(), e.msg
|
||||
)
|
||||
success = False
|
||||
break
|
||||
|
||||
if success and formatted_text != original_text:
|
||||
with path.open("w", encoding = "utf-8", newline = "") as wf:
|
||||
wf.write(formatted_text)
|
||||
LOG.info("Formatted [%s].", path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2: # noqa: PLR2004 Magic value used in comparison
|
||||
script_path = Path(sys.argv[0])
|
||||
print(f"Usage: python {script_path} <directory1> [<directory2> ...]")
|
||||
sys.exit(1)
|
||||
|
||||
for dir_arg in sys.argv[1:]:
|
||||
root = Path(dir_arg)
|
||||
if not root.exists():
|
||||
LOG.warning("Directory [%s] does not exist, skipping...", root)
|
||||
continue
|
||||
for py_file in root.rglob("*.py"):
|
||||
format_file(py_file)
|
||||
@@ -83,11 +83,11 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
self.configure_file_logging()
|
||||
self.load_config()
|
||||
|
||||
if not (self.ads_selector in {'all', 'new', 'due', 'changed'} or
|
||||
any(selector in self.ads_selector.split(',') for selector in ('all', 'new', 'due', 'changed')) or
|
||||
re.compile(r'\d+[,\d+]*').search(self.ads_selector)):
|
||||
if not (self.ads_selector in {"all", "new", "due", "changed"} or
|
||||
any(selector in self.ads_selector.split(",") for selector in ("all", "new", "due", "changed")) or
|
||||
re.compile(r"\d+[,\d+]*").search(self.ads_selector)):
|
||||
LOG.warning('You provided no ads selector. Defaulting to "due".')
|
||||
self.ads_selector = 'due'
|
||||
self.ads_selector = "due"
|
||||
|
||||
if ads := self.load_ads():
|
||||
await self.create_browser_session()
|
||||
@@ -111,9 +111,9 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
case "download":
|
||||
self.configure_file_logging()
|
||||
# ad IDs depends on selector
|
||||
if not (self.ads_selector in {'all', 'new'} or re.compile(r'\d+[,\d+]*').search(self.ads_selector)):
|
||||
if not (self.ads_selector in {"all", "new"} or re.compile(r"\d+[,\d+]*").search(self.ads_selector)):
|
||||
LOG.warning('You provided no ads selector. Defaulting to "new".')
|
||||
self.ads_selector = 'new'
|
||||
self.ads_selector = "new"
|
||||
self.load_config()
|
||||
await self.create_browser_session()
|
||||
await self.login()
|
||||
@@ -265,7 +265,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
LOG.info("App version: %s", self.get_version())
|
||||
LOG.info("Python version: %s", sys.version)
|
||||
|
||||
def __check_ad_republication(self, ad_cfg: dict[str, Any], ad_file_relative: str) -> bool:
|
||||
def __check_ad_republication(self, ad_cfg:dict[str, Any], ad_file_relative:str) -> bool:
|
||||
"""
|
||||
Check if an ad needs to be republished based on republication interval.
|
||||
Returns True if the ad should be republished based on the interval.
|
||||
@@ -295,7 +295,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
|
||||
return True
|
||||
|
||||
def __check_ad_changed(self, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any], ad_file_relative: str) -> bool:
|
||||
def __check_ad_changed(self, ad_cfg:dict[str, Any], ad_cfg_orig:dict[str, Any], ad_file_relative:str) -> bool:
|
||||
"""
|
||||
Check if an ad has been changed since last publication.
|
||||
Returns True if the ad has been changed.
|
||||
@@ -327,7 +327,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
data_root_dir = os.path.dirname(self.config_file_path)
|
||||
for file_pattern in self.config["ad_files"]:
|
||||
for ad_file in glob.glob(file_pattern, root_dir = data_root_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB):
|
||||
if not str(ad_file).endswith('ad_fields.yaml'):
|
||||
if not str(ad_file).endswith("ad_fields.yaml"):
|
||||
ad_files[abspath(ad_file, relative_to = data_root_dir)] = ad_file
|
||||
LOG.info(" -> found %s", pluralize("ad config file", ad_files))
|
||||
if not ad_files:
|
||||
@@ -335,13 +335,13 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
|
||||
ids = []
|
||||
use_specific_ads = False
|
||||
selectors = self.ads_selector.split(',')
|
||||
selectors = self.ads_selector.split(",")
|
||||
|
||||
if re.compile(r'\d+[,\d+]*').search(self.ads_selector):
|
||||
ids = [int(n) for n in self.ads_selector.split(',')]
|
||||
if re.compile(r"\d+[,\d+]*").search(self.ads_selector):
|
||||
ids = [int(n) for n in self.ads_selector.split(",")]
|
||||
use_specific_ads = True
|
||||
LOG.info('Start fetch task for the ad(s) with id(s):')
|
||||
LOG.info(' | '.join([str(id_) for id_ in ids]))
|
||||
LOG.info("Start fetch task for the ad(s) with id(s):")
|
||||
LOG.info(" | ".join([str(id_) for id_ in ids]))
|
||||
|
||||
ad_fields = dicts.load_dict_from_module(resources, "ad_fields.yaml")
|
||||
ads = []
|
||||
@@ -548,7 +548,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
async def is_logged_in(self) -> bool:
|
||||
try:
|
||||
user_info = await self.web_text(By.CLASS_NAME, "mr-medium")
|
||||
if self.config['login']['username'].lower() in user_info.lower():
|
||||
if self.config["login"]["username"].lower() in user_info.lower():
|
||||
return True
|
||||
except TimeoutError:
|
||||
return False
|
||||
@@ -570,7 +570,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
LOG.info("DONE: Deleted %s", pluralize("ad", count))
|
||||
LOG.info("############################################")
|
||||
|
||||
async def delete_ad(self, ad_cfg: dict[str, Any], published_ads: list[dict[str, Any]], *, delete_old_ads_by_title: bool) -> bool:
|
||||
async def delete_ad(self, ad_cfg:dict[str, Any], published_ads:list[dict[str, Any]], *, delete_old_ads_by_title:bool) -> bool:
|
||||
LOG.info("Deleting ad '%s' if already present...", ad_cfg["title"])
|
||||
|
||||
await self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
|
||||
@@ -627,7 +627,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
LOG.info("DONE: (Re-)published %s", pluralize("ad", count))
|
||||
LOG.info("############################################")
|
||||
|
||||
async def publish_ad(self, ad_file:str, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any], published_ads: list[dict[str, Any]]) -> None:
|
||||
async def publish_ad(self, ad_file:str, ad_cfg:dict[str, Any], ad_cfg_orig:dict[str, Any], published_ads:list[dict[str, Any]]) -> None:
|
||||
"""
|
||||
@param ad_cfg: the effective ad config (i.e. with default values applied etc.)
|
||||
@param ad_cfg_orig: the ad config as present in the YAML file
|
||||
@@ -657,7 +657,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
#############################
|
||||
# set category
|
||||
#############################
|
||||
await self.__set_category(ad_cfg['category'], ad_file)
|
||||
await self.__set_category(ad_cfg["category"], ad_file)
|
||||
|
||||
#############################
|
||||
# set special attributes
|
||||
@@ -674,7 +674,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
try:
|
||||
await self.web_select(By.XPATH, "//select[contains(@id, '.versand_s')]", shipping_value)
|
||||
except TimeoutError:
|
||||
LOG.warning("Failed to set shipping attribute for type '%s'!", ad_cfg['shipping_type'])
|
||||
LOG.warning("Failed to set shipping attribute for type '%s'!", ad_cfg["shipping_type"])
|
||||
else:
|
||||
await self.__set_shipping(ad_cfg)
|
||||
|
||||
@@ -698,9 +698,9 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
if ad_cfg["shipping_type"] == "SHIPPING":
|
||||
if sell_directly and ad_cfg["shipping_options"] and price_type in {"FIXED", "NEGOTIABLE"}:
|
||||
if not await self.web_check(By.ID, "radio-buy-now-yes", Is.SELECTED):
|
||||
await self.web_click(By.ID, 'radio-buy-now-yes')
|
||||
await self.web_click(By.ID, "radio-buy-now-yes")
|
||||
elif not await self.web_check(By.ID, "radio-buy-now-no", Is.SELECTED):
|
||||
await self.web_click(By.ID, 'radio-buy-now-no')
|
||||
await self.web_click(By.ID, "radio-buy-now-no")
|
||||
except TimeoutError as ex:
|
||||
LOG.debug(ex, exc_info = True)
|
||||
|
||||
@@ -832,7 +832,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
|
||||
dicts.save_dict(ad_file, ad_cfg_orig)
|
||||
|
||||
async def __set_condition(self, condition_value: str) -> None:
|
||||
async def __set_condition(self, condition_value:str) -> None:
|
||||
condition_mapping = {
|
||||
"new_with_tag": "Neu mit Etikett",
|
||||
"new": "Neu",
|
||||
@@ -862,7 +862,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
except TimeoutError as ex:
|
||||
raise TimeoutError(_("Unable to close condition dialog!")) from ex
|
||||
|
||||
async def __set_category(self, category: str | None, ad_file:str) -> None:
|
||||
async def __set_category(self, category:str | None, ad_file:str) -> None:
|
||||
# click on something to trigger automatic category detection
|
||||
await self.web_click(By.ID, "pstad-descrptn")
|
||||
|
||||
@@ -884,9 +884,9 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
else:
|
||||
ensure(is_category_auto_selected, f"No category specified in [{ad_file}] and automatic category detection failed")
|
||||
|
||||
async def __set_special_attributes(self, ad_cfg: dict[str, Any]) -> None:
|
||||
async def __set_special_attributes(self, ad_cfg:dict[str, Any]) -> None:
|
||||
if ad_cfg["special_attributes"]:
|
||||
LOG.debug('Found %i special attributes', len(ad_cfg["special_attributes"]))
|
||||
LOG.debug("Found %i special attributes", len(ad_cfg["special_attributes"]))
|
||||
for special_attribute_key, special_attribute_value in ad_cfg["special_attributes"].items():
|
||||
|
||||
if special_attribute_key == "condition_s":
|
||||
@@ -911,10 +911,10 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
|
||||
try:
|
||||
elem_id = special_attr_elem.attrs.id
|
||||
if special_attr_elem.local_name == 'select':
|
||||
if special_attr_elem.local_name == "select":
|
||||
LOG.debug("Attribute field '%s' seems to be a select...", special_attribute_key)
|
||||
await self.web_select(By.ID, elem_id, special_attribute_value)
|
||||
elif special_attr_elem.attrs.type == 'checkbox':
|
||||
elif special_attr_elem.attrs.type == "checkbox":
|
||||
LOG.debug("Attribute field '%s' seems to be a checkbox...", special_attribute_key)
|
||||
await self.web_click(By.ID, elem_id)
|
||||
else:
|
||||
@@ -925,7 +925,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
raise TimeoutError(f"Failed to set special attribute [{special_attribute_key}]") from ex
|
||||
LOG.debug("Successfully set attribute field [%s] to [%s]...", special_attribute_key, special_attribute_value)
|
||||
|
||||
async def __set_shipping(self, ad_cfg: dict[str, Any]) -> None:
|
||||
async def __set_shipping(self, ad_cfg:dict[str, Any]) -> None:
|
||||
if ad_cfg["shipping_type"] == "PICKUP":
|
||||
try:
|
||||
await self.web_click(By.XPATH,
|
||||
@@ -960,7 +960,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
LOG.debug(ex, exc_info = True)
|
||||
raise TimeoutError(_("Unable to close shipping dialog!")) from ex
|
||||
|
||||
async def __set_shipping_options(self, ad_cfg: dict[str, Any]) -> None:
|
||||
async def __set_shipping_options(self, ad_cfg:dict[str, Any]) -> None:
|
||||
shipping_options_mapping = {
|
||||
"DHL_2": ("Klein", "Paket 2 kg"),
|
||||
"Hermes_Päckchen": ("Klein", "Päckchen"),
|
||||
@@ -980,7 +980,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
except KeyError as ex:
|
||||
raise KeyError(f"Unknown shipping option(s), please refer to the documentation/README: {ad_cfg['shipping_options']}") from ex
|
||||
|
||||
shipping_sizes, shipping_packages = zip(*mapped_shipping_options, strict=False)
|
||||
shipping_sizes, shipping_packages = zip(*mapped_shipping_options, strict = False)
|
||||
|
||||
try:
|
||||
shipping_size, = set(shipping_sizes)
|
||||
@@ -1025,7 +1025,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
except TimeoutError as ex:
|
||||
raise TimeoutError(_("Unable to close shipping dialog!")) from ex
|
||||
|
||||
async def __upload_images(self, ad_cfg: dict[str, Any]) -> None:
|
||||
async def __upload_images(self, ad_cfg:dict[str, Any]) -> None:
|
||||
LOG.info(" -> found %s", pluralize("image", ad_cfg["images"]))
|
||||
image_upload:Element = await self.web_find(By.CSS_SELECTOR, "input[type=file]")
|
||||
|
||||
@@ -1036,7 +1036,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
|
||||
async def assert_free_ad_limit_not_reached(self) -> None:
|
||||
try:
|
||||
await self.web_find(By.XPATH, '/html/body/div[1]/form/fieldset[6]/div[1]/header', timeout = 2)
|
||||
await self.web_find(By.XPATH, "/html/body/div[1]/form/fieldset[6]/div[1]/header", timeout = 2)
|
||||
raise AssertionError(f"Cannot publish more ads. The monthly limit of free ads of account {self.config['login']['username']} is reached.")
|
||||
except TimeoutError:
|
||||
pass
|
||||
@@ -1050,13 +1050,13 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
ad_extractor = extract.AdExtractor(self.browser, self.config)
|
||||
|
||||
# use relevant download routine
|
||||
if self.ads_selector in {'all', 'new'}: # explore ads overview for these two modes
|
||||
LOG.info('Scanning your ad overview...')
|
||||
if self.ads_selector in {"all", "new"}: # explore ads overview for these two modes
|
||||
LOG.info("Scanning your ad overview...")
|
||||
own_ad_urls = await ad_extractor.extract_own_ads_urls()
|
||||
LOG.info('%s found.', pluralize("ad", len(own_ad_urls)))
|
||||
LOG.info("%s found.", pluralize("ad", len(own_ad_urls)))
|
||||
|
||||
if self.ads_selector == 'all': # download all of your adds
|
||||
LOG.info('Starting download of all ads...')
|
||||
if self.ads_selector == "all": # download all of your adds
|
||||
LOG.info("Starting download of all ads...")
|
||||
|
||||
success_count = 0
|
||||
# call download function for each ad page
|
||||
@@ -1067,12 +1067,12 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
success_count += 1
|
||||
LOG.info("%d of %d ads were downloaded from your profile.", success_count, len(own_ad_urls))
|
||||
|
||||
elif self.ads_selector == 'new': # download only unsaved ads
|
||||
elif self.ads_selector == "new": # download only unsaved ads
|
||||
# check which ads already saved
|
||||
saved_ad_ids = []
|
||||
ads = self.load_ads(ignore_inactive = False, check_id = False) # do not skip because of existing IDs
|
||||
for ad in ads:
|
||||
ad_id = int(ad[2]['id'])
|
||||
ad_id = int(ad[2]["id"])
|
||||
saved_ad_ids.append(ad_id)
|
||||
|
||||
# determine ad IDs from links
|
||||
@@ -1083,28 +1083,28 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
for ad_url, ad_id in ad_id_by_url.items():
|
||||
# check if ad with ID already saved
|
||||
if ad_id in saved_ad_ids:
|
||||
LOG.info('The ad with id %d has already been saved.', ad_id)
|
||||
LOG.info("The ad with id %d has already been saved.", ad_id)
|
||||
continue
|
||||
|
||||
if await ad_extractor.naviagte_to_ad_page(ad_url):
|
||||
await ad_extractor.download_ad(ad_id)
|
||||
new_count += 1
|
||||
LOG.info('%s were downloaded from your profile.', pluralize("new ad", new_count))
|
||||
LOG.info("%s were downloaded from your profile.", pluralize("new ad", new_count))
|
||||
|
||||
elif re.compile(r'\d+[,\d+]*').search(self.ads_selector): # download ad(s) with specific id(s)
|
||||
ids = [int(n) for n in self.ads_selector.split(',')]
|
||||
LOG.info('Starting download of ad(s) with the id(s):')
|
||||
LOG.info(' | '.join([str(ad_id) for ad_id in ids]))
|
||||
elif re.compile(r"\d+[,\d+]*").search(self.ads_selector): # download ad(s) with specific id(s)
|
||||
ids = [int(n) for n in self.ads_selector.split(",")]
|
||||
LOG.info("Starting download of ad(s) with the id(s):")
|
||||
LOG.info(" | ".join([str(ad_id) for ad_id in ids]))
|
||||
|
||||
for ad_id in ids: # call download routine for every id
|
||||
exists = await ad_extractor.naviagte_to_ad_page(ad_id)
|
||||
if exists:
|
||||
await ad_extractor.download_ad(ad_id)
|
||||
LOG.info('Downloaded ad with id %d', ad_id)
|
||||
LOG.info("Downloaded ad with id %d", ad_id)
|
||||
else:
|
||||
LOG.error('The page with the id %d does not exist!', ad_id)
|
||||
LOG.error("The page with the id %d does not exist!", ad_id)
|
||||
|
||||
def __get_description_with_affixes(self, ad_cfg: dict[str, Any]) -> str:
|
||||
def __get_description_with_affixes(self, ad_cfg:dict[str, Any]) -> str:
|
||||
"""Get the complete description with prefix and suffix applied.
|
||||
|
||||
Precedence (highest to lowest):
|
||||
|
||||
@@ -9,7 +9,7 @@ from .utils import dicts
|
||||
MAX_DESCRIPTION_LENGTH:Final[int] = 4000
|
||||
|
||||
|
||||
def calculate_content_hash(ad_cfg: dict[str, Any]) -> str:
|
||||
def calculate_content_hash(ad_cfg:dict[str, Any]) -> str:
|
||||
"""Calculate a hash for user-modifiable fields of the ad."""
|
||||
|
||||
# Relevant fields for the hash
|
||||
@@ -40,7 +40,7 @@ def calculate_content_hash(ad_cfg: dict[str, Any]) -> str:
|
||||
return hashlib.sha256(content_str.encode()).hexdigest()
|
||||
|
||||
|
||||
def get_description_affixes(config: dict[str, Any], *, prefix: bool = True) -> str:
|
||||
def get_description_affixes(config:dict[str, Any], *, prefix:bool = True) -> str:
|
||||
"""Get prefix or suffix for description with proper precedence.
|
||||
|
||||
This function handles both the new flattened format and legacy nested format:
|
||||
|
||||
@@ -36,22 +36,22 @@ class AdExtractor(WebScrapingMixin):
|
||||
"""
|
||||
|
||||
# create sub-directory for ad(s) to download (if necessary):
|
||||
relative_directory = 'downloaded-ads'
|
||||
relative_directory = "downloaded-ads"
|
||||
# make sure configured base directory exists
|
||||
if not os.path.exists(relative_directory) or not os.path.isdir(relative_directory):
|
||||
os.mkdir(relative_directory)
|
||||
LOG.info('Created ads directory at ./%s.', relative_directory)
|
||||
LOG.info("Created ads directory at ./%s.", relative_directory)
|
||||
|
||||
new_base_dir = os.path.join(relative_directory, f'ad_{ad_id}')
|
||||
if os.path.exists(new_base_dir):
|
||||
LOG.info('Deleting current folder of ad %s...', ad_id)
|
||||
LOG.info("Deleting current folder of ad %s...", ad_id)
|
||||
shutil.rmtree(new_base_dir)
|
||||
os.mkdir(new_base_dir)
|
||||
LOG.info('New directory for ad created at %s.', new_base_dir)
|
||||
LOG.info("New directory for ad created at %s.", new_base_dir)
|
||||
|
||||
# call extraction function
|
||||
info = await self._extract_ad_page_info(new_base_dir, ad_id)
|
||||
ad_file_path = new_base_dir + '/' + f'ad_{ad_id}.yaml'
|
||||
ad_file_path = new_base_dir + "/" + f'ad_{ad_id}.yaml'
|
||||
dicts.save_dict(ad_file_path, info)
|
||||
|
||||
async def _download_images_from_ad_page(self, directory:str, ad_id:int) -> list[str]:
|
||||
@@ -67,18 +67,18 @@ class AdExtractor(WebScrapingMixin):
|
||||
img_paths = []
|
||||
try:
|
||||
# download all images from box
|
||||
image_box = await self.web_find(By.CLASS_NAME, 'galleryimage-large')
|
||||
image_box = await self.web_find(By.CLASS_NAME, "galleryimage-large")
|
||||
|
||||
n_images = len(await self.web_find_all(By.CSS_SELECTOR, '.galleryimage-element[data-ix]', parent = image_box))
|
||||
LOG.info('Found %s.', i18n.pluralize("image", n_images))
|
||||
n_images = len(await self.web_find_all(By.CSS_SELECTOR, ".galleryimage-element[data-ix]", parent = image_box))
|
||||
LOG.info("Found %s.", i18n.pluralize("image", n_images))
|
||||
|
||||
img_element:Element = await self.web_find(By.CSS_SELECTOR, 'div:nth-child(1) > img', parent = image_box)
|
||||
img_fn_prefix = 'ad_' + str(ad_id) + '__img'
|
||||
img_element:Element = await self.web_find(By.CSS_SELECTOR, "div:nth-child(1) > img", parent = image_box)
|
||||
img_fn_prefix = "ad_" + str(ad_id) + "__img"
|
||||
|
||||
img_nr = 1
|
||||
dl_counter = 0
|
||||
while img_nr <= n_images: # scrolling + downloading
|
||||
current_img_url = img_element.attrs['src'] # URL of the image
|
||||
current_img_url = img_element.attrs["src"] # URL of the image
|
||||
if current_img_url is None:
|
||||
continue
|
||||
|
||||
@@ -86,43 +86,43 @@ class AdExtractor(WebScrapingMixin):
|
||||
content_type = response.info().get_content_type()
|
||||
file_ending = mimetypes.guess_extension(content_type)
|
||||
img_path = f"{directory}/{img_fn_prefix}{img_nr}{file_ending}"
|
||||
with open(img_path, 'wb') as f:
|
||||
with open(img_path, "wb") as f:
|
||||
shutil.copyfileobj(response, f)
|
||||
dl_counter += 1
|
||||
img_paths.append(img_path.rsplit('/', maxsplit = 1)[-1])
|
||||
img_paths.append(img_path.rsplit("/", maxsplit = 1)[-1])
|
||||
|
||||
# navigate to next image (if exists)
|
||||
if img_nr < n_images:
|
||||
try:
|
||||
# click next button, wait, and re-establish reference
|
||||
await (await self.web_find(By.CLASS_NAME, 'galleryimage--navigation--next')).click()
|
||||
await (await self.web_find(By.CLASS_NAME, "galleryimage--navigation--next")).click()
|
||||
new_div = await self.web_find(By.CSS_SELECTOR, f'div.galleryimage-element:nth-child({img_nr + 1})')
|
||||
img_element = await self.web_find(By.TAG_NAME, 'img', parent = new_div)
|
||||
img_element = await self.web_find(By.TAG_NAME, "img", parent = new_div)
|
||||
except TimeoutError:
|
||||
LOG.error('NEXT button in image gallery somehow missing, aborting image fetching.')
|
||||
LOG.error("NEXT button in image gallery somehow missing, aborting image fetching.")
|
||||
break
|
||||
img_nr += 1
|
||||
LOG.info('Downloaded %s.', i18n.pluralize("image", dl_counter))
|
||||
LOG.info("Downloaded %s.", i18n.pluralize("image", dl_counter))
|
||||
|
||||
except TimeoutError: # some ads do not require images
|
||||
LOG.warning('No image area found. Continuing without downloading images.')
|
||||
LOG.warning("No image area found. Continuing without downloading images.")
|
||||
|
||||
return img_paths
|
||||
|
||||
def extract_ad_id_from_ad_url(self, url: str) -> int:
|
||||
def extract_ad_id_from_ad_url(self, url:str) -> int:
|
||||
"""
|
||||
Extracts the ID of an ad, given by its reference link.
|
||||
|
||||
:param url: the URL to the ad page
|
||||
:return: the ad ID, a (ten-digit) integer number
|
||||
"""
|
||||
num_part = url.split('/')[-1] # suffix
|
||||
id_part = num_part.split('-')[0]
|
||||
num_part = url.split("/")[-1] # suffix
|
||||
id_part = num_part.split("-")[0]
|
||||
|
||||
try:
|
||||
path = url.split('?', 1)[0] # Remove query string if present
|
||||
last_segment = path.rstrip('/').split('/')[-1] # Get last path component
|
||||
id_part = last_segment.split('-')[0] # Extract part before first hyphen
|
||||
path = url.split("?", 1)[0] # Remove query string if present
|
||||
last_segment = path.rstrip("/").split("/")[-1] # Get last path component
|
||||
id_part = last_segment.split("-")[0] # Extract part before first hyphen
|
||||
return int(id_part)
|
||||
except (IndexError, ValueError) as ex:
|
||||
LOG.warning("Failed to extract ad ID from URL '%s': %s", url, ex)
|
||||
@@ -135,41 +135,41 @@ class AdExtractor(WebScrapingMixin):
|
||||
:return: the links to your ad pages
|
||||
"""
|
||||
# navigate to "your ads" page
|
||||
await self.web_open('https://www.kleinanzeigen.de/m-meine-anzeigen.html')
|
||||
await self.web_open("https://www.kleinanzeigen.de/m-meine-anzeigen.html")
|
||||
await self.web_sleep(2000, 3000) # Consider replacing with explicit waits later
|
||||
|
||||
# Try to find the main ad list container first
|
||||
try:
|
||||
ad_list_container = await self.web_find(By.ID, 'my-manageitems-adlist')
|
||||
ad_list_container = await self.web_find(By.ID, "my-manageitems-adlist")
|
||||
except TimeoutError:
|
||||
LOG.warning('Ad list container #my-manageitems-adlist not found. Maybe no ads present?')
|
||||
LOG.warning("Ad list container #my-manageitems-adlist not found. Maybe no ads present?")
|
||||
return []
|
||||
|
||||
# --- Pagination handling ---
|
||||
multi_page = False
|
||||
try:
|
||||
# Correct selector: Use uppercase '.Pagination'
|
||||
pagination_section = await self.web_find(By.CSS_SELECTOR, '.Pagination', timeout=10) # Increased timeout slightly
|
||||
pagination_section = await self.web_find(By.CSS_SELECTOR, ".Pagination", timeout = 10) # Increased timeout slightly
|
||||
# Correct selector: Use 'aria-label'
|
||||
# Also check if the button is actually present AND potentially enabled (though enabled check isn't strictly necessary here, only for clicking later)
|
||||
next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent=pagination_section)
|
||||
next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section)
|
||||
if next_buttons:
|
||||
# Check if at least one 'Nächste' button is not disabled (optional but good practice)
|
||||
enabled_next_buttons = [btn for btn in next_buttons if not btn.attrs.get('disabled')]
|
||||
enabled_next_buttons = [btn for btn in next_buttons if not btn.attrs.get("disabled")]
|
||||
if enabled_next_buttons:
|
||||
multi_page = True
|
||||
LOG.info('Multiple ad pages detected.')
|
||||
LOG.info("Multiple ad pages detected.")
|
||||
else:
|
||||
LOG.info('Next button found but is disabled. Assuming single effective page.')
|
||||
LOG.info("Next button found but is disabled. Assuming single effective page.")
|
||||
|
||||
else:
|
||||
LOG.info('No "Naechste" button found within pagination. Assuming single page.')
|
||||
except TimeoutError:
|
||||
# This will now correctly trigger only if the '.Pagination' div itself is not found
|
||||
LOG.info('No pagination controls found. Assuming single page.')
|
||||
LOG.info("No pagination controls found. Assuming single page.")
|
||||
except Exception as e:
|
||||
LOG.exception("Error during pagination detection: %s", e)
|
||||
LOG.info('Assuming single page due to error during pagination check.')
|
||||
LOG.info("Assuming single page due to error during pagination check.")
|
||||
# --- End Pagination Handling ---
|
||||
|
||||
refs:list[str] = []
|
||||
@@ -182,8 +182,8 @@ class AdExtractor(WebScrapingMixin):
|
||||
|
||||
# Re-find the ad list container on the current page/state
|
||||
try:
|
||||
ad_list_container = await self.web_find(By.ID, 'my-manageitems-adlist')
|
||||
list_items = await self.web_find_all(By.CLASS_NAME, 'cardbox', parent=ad_list_container)
|
||||
ad_list_container = await self.web_find(By.ID, "my-manageitems-adlist")
|
||||
list_items = await self.web_find_all(By.CLASS_NAME, "cardbox", parent = ad_list_container)
|
||||
LOG.info("Found %s ad items on page %s.", len(list_items), current_page)
|
||||
except TimeoutError:
|
||||
LOG.warning("Could not find ad list container or items on page %s.", current_page)
|
||||
@@ -192,7 +192,7 @@ class AdExtractor(WebScrapingMixin):
|
||||
# Extract references using the CORRECTED selector
|
||||
try:
|
||||
page_refs = [
|
||||
(await self.web_find(By.CSS_SELECTOR, 'div.manageitems-item-ad h3 a.text-onSurface', parent=li)).attrs['href']
|
||||
(await self.web_find(By.CSS_SELECTOR, "div.manageitems-item-ad h3 a.text-onSurface", parent = li)).attrs["href"]
|
||||
for li in list_items
|
||||
]
|
||||
refs.extend(page_refs)
|
||||
@@ -207,12 +207,12 @@ class AdExtractor(WebScrapingMixin):
|
||||
# --- Navigate to next page ---
|
||||
try:
|
||||
# Find the pagination section again (scope might have changed after scroll/wait)
|
||||
pagination_section = await self.web_find(By.CSS_SELECTOR, '.Pagination', timeout=5)
|
||||
pagination_section = await self.web_find(By.CSS_SELECTOR, ".Pagination", timeout = 5)
|
||||
# Find the "Next" button using the correct aria-label selector and ensure it's not disabled
|
||||
next_button_element = None
|
||||
possible_next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent=pagination_section)
|
||||
possible_next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section)
|
||||
for btn in possible_next_buttons:
|
||||
if not btn.attrs.get('disabled'): # Check if the button is enabled
|
||||
if not btn.attrs.get("disabled"): # Check if the button is enabled
|
||||
next_button_element = btn
|
||||
break # Found an enabled next button
|
||||
|
||||
@@ -235,7 +235,7 @@ class AdExtractor(WebScrapingMixin):
|
||||
# --- End Navigation ---
|
||||
|
||||
if not refs:
|
||||
LOG.warning('No ad URLs were extracted.')
|
||||
LOG.warning("No ad URLs were extracted.")
|
||||
|
||||
return refs
|
||||
|
||||
@@ -246,27 +246,27 @@ class AdExtractor(WebScrapingMixin):
|
||||
"""
|
||||
if reflect.is_integer(id_or_url):
|
||||
# navigate to start page, otherwise page can be None!
|
||||
await self.web_open('https://www.kleinanzeigen.de/')
|
||||
await self.web_open("https://www.kleinanzeigen.de/")
|
||||
# enter the ad ID into the search bar
|
||||
await self.web_input(By.ID, "site-search-query", id_or_url)
|
||||
# navigate to ad page and wait
|
||||
await self.web_check(By.ID, 'site-search-submit', Is.CLICKABLE)
|
||||
submit_button = await self.web_find(By.ID, 'site-search-submit')
|
||||
await self.web_check(By.ID, "site-search-submit", Is.CLICKABLE)
|
||||
submit_button = await self.web_find(By.ID, "site-search-submit")
|
||||
await submit_button.click()
|
||||
else:
|
||||
await self.web_open(str(id_or_url)) # navigate to URL directly given
|
||||
await self.web_sleep()
|
||||
|
||||
# handle the case that invalid ad ID given
|
||||
if self.page.url.endswith('k0'):
|
||||
LOG.error('There is no ad under the given ID.')
|
||||
if self.page.url.endswith("k0"):
|
||||
LOG.error("There is no ad under the given ID.")
|
||||
return False
|
||||
|
||||
# close (warning) popup, if given
|
||||
try:
|
||||
await self.web_find(By.ID, 'vap-ovrly-secure')
|
||||
LOG.warning('A popup appeared!')
|
||||
await self.web_click(By.CLASS_NAME, 'mfp-close')
|
||||
await self.web_find(By.ID, "vap-ovrly-secure")
|
||||
LOG.warning("A popup appeared!")
|
||||
await self.web_click(By.CLASS_NAME, "mfp-close")
|
||||
await self.web_sleep()
|
||||
except TimeoutError:
|
||||
pass
|
||||
@@ -280,22 +280,22 @@ class AdExtractor(WebScrapingMixin):
|
||||
:param ad_id: the ad ID, already extracted by a calling function
|
||||
:return: a dictionary with the keys as given in an ad YAML, and their respective values
|
||||
"""
|
||||
info:dict[str, Any] = {'active': True}
|
||||
info:dict[str, Any] = {"active": True}
|
||||
|
||||
# extract basic info
|
||||
info['type'] = 'OFFER' if 's-anzeige' in self.page.url else 'WANTED'
|
||||
title:str = await self.web_text(By.ID, 'viewad-title')
|
||||
info["type"] = "OFFER" if "s-anzeige" in self.page.url else "WANTED"
|
||||
title:str = await self.web_text(By.ID, "viewad-title")
|
||||
LOG.info('Extracting information from ad with title "%s"', title)
|
||||
|
||||
info['category'] = await self._extract_category_from_ad_page()
|
||||
info['title'] = title
|
||||
info["category"] = await self._extract_category_from_ad_page()
|
||||
info["title"] = title
|
||||
|
||||
# Get raw description text
|
||||
raw_description = (await self.web_text(By.ID, 'viewad-description-text')).strip()
|
||||
raw_description = (await self.web_text(By.ID, "viewad-description-text")).strip()
|
||||
|
||||
# Get prefix and suffix from config
|
||||
prefix = get_description_affixes(self.config, prefix=True)
|
||||
suffix = get_description_affixes(self.config, prefix=False)
|
||||
prefix = get_description_affixes(self.config, prefix = True)
|
||||
suffix = get_description_affixes(self.config, prefix = False)
|
||||
|
||||
# Remove prefix and suffix if present
|
||||
description_text = raw_description
|
||||
@@ -304,38 +304,38 @@ class AdExtractor(WebScrapingMixin):
|
||||
if suffix and description_text.endswith(suffix.strip()):
|
||||
description_text = description_text[:-len(suffix.strip())]
|
||||
|
||||
info['description'] = description_text.strip()
|
||||
info["description"] = description_text.strip()
|
||||
|
||||
info['special_attributes'] = await self._extract_special_attributes_from_ad_page()
|
||||
if "art_s" in info['special_attributes']:
|
||||
info["special_attributes"] = await self._extract_special_attributes_from_ad_page()
|
||||
if "art_s" in info["special_attributes"]:
|
||||
# change e.g. category "161/172" to "161/172/lautsprecher_kopfhoerer"
|
||||
info['category'] = f"{info['category']}/{info['special_attributes']['art_s']}"
|
||||
del info['special_attributes']['art_s']
|
||||
if "schaden_s" in info['special_attributes']:
|
||||
info["category"] = f"{info['category']}/{info['special_attributes']['art_s']}"
|
||||
del info["special_attributes"]["art_s"]
|
||||
if "schaden_s" in info["special_attributes"]:
|
||||
# change f to 'nein' and 't' to 'ja'
|
||||
info['special_attributes']['schaden_s'] = info['special_attributes']['schaden_s'].translate(str.maketrans({'t': 'ja', 'f': 'nein'}))
|
||||
info['price'], info['price_type'] = await self._extract_pricing_info_from_ad_page()
|
||||
info['shipping_type'], info['shipping_costs'], info['shipping_options'] = await self._extract_shipping_info_from_ad_page()
|
||||
info['sell_directly'] = await self._extract_sell_directly_from_ad_page()
|
||||
info['images'] = await self._download_images_from_ad_page(directory, ad_id)
|
||||
info['contact'] = await self._extract_contact_from_ad_page()
|
||||
info['id'] = ad_id
|
||||
info["special_attributes"]["schaden_s"] = info["special_attributes"]["schaden_s"].translate(str.maketrans({"t": "ja", "f": "nein"}))
|
||||
info["price"], info["price_type"] = await self._extract_pricing_info_from_ad_page()
|
||||
info["shipping_type"], info["shipping_costs"], info["shipping_options"] = await self._extract_shipping_info_from_ad_page()
|
||||
info["sell_directly"] = await self._extract_sell_directly_from_ad_page()
|
||||
info["images"] = await self._download_images_from_ad_page(directory, ad_id)
|
||||
info["contact"] = await self._extract_contact_from_ad_page()
|
||||
info["id"] = ad_id
|
||||
|
||||
try: # try different locations known for creation date element
|
||||
creation_date = await self.web_text(By.XPATH,
|
||||
'/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/div[1]/span')
|
||||
"/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/div[1]/span")
|
||||
except TimeoutError:
|
||||
creation_date = await self.web_text(By.CSS_SELECTOR, '#viewad-extra-info > div:nth-child(1) > span:nth-child(2)')
|
||||
creation_date = await self.web_text(By.CSS_SELECTOR, "#viewad-extra-info > div:nth-child(1) > span:nth-child(2)")
|
||||
|
||||
# convert creation date to ISO format
|
||||
created_parts = creation_date.split('.')
|
||||
creation_date = created_parts[2] + '-' + created_parts[1] + '-' + created_parts[0] + ' 00:00:00'
|
||||
created_parts = creation_date.split(".")
|
||||
creation_date = created_parts[2] + "-" + created_parts[1] + "-" + created_parts[0] + " 00:00:00"
|
||||
creation_date = datetime.fromisoformat(creation_date).isoformat()
|
||||
info['created_on'] = creation_date
|
||||
info['updated_on'] = None # will be set later on
|
||||
info["created_on"] = creation_date
|
||||
info["updated_on"] = None # will be set later on
|
||||
|
||||
# Calculate the initial hash for the downloaded ad
|
||||
info['content_hash'] = calculate_content_hash(info)
|
||||
info["content_hash"] = calculate_content_hash(info)
|
||||
|
||||
return info
|
||||
|
||||
@@ -346,12 +346,12 @@ class AdExtractor(WebScrapingMixin):
|
||||
|
||||
:return: a category string of form abc/def, where a-f are digits
|
||||
"""
|
||||
category_line = await self.web_find(By.ID, 'vap-brdcrmb')
|
||||
category_first_part = await self.web_find(By.CSS_SELECTOR, 'a:nth-of-type(2)', parent = category_line)
|
||||
category_second_part = await self.web_find(By.CSS_SELECTOR, 'a:nth-of-type(3)', parent = category_line)
|
||||
cat_num_first = category_first_part.attrs['href'].split('/')[-1][1:]
|
||||
cat_num_second = category_second_part.attrs['href'].split('/')[-1][1:]
|
||||
category:str = cat_num_first + '/' + cat_num_second
|
||||
category_line = await self.web_find(By.ID, "vap-brdcrmb")
|
||||
category_first_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(2)", parent = category_line)
|
||||
category_second_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line)
|
||||
cat_num_first = category_first_part.attrs["href"].split("/")[-1][1:]
|
||||
cat_num_second = category_second_part.attrs["href"].split("/")[-1][1:]
|
||||
category:str = cat_num_first + "/" + cat_num_second
|
||||
|
||||
return category
|
||||
|
||||
@@ -368,7 +368,7 @@ class AdExtractor(WebScrapingMixin):
|
||||
special_attributes_str = belen_conf["universalAnalyticsOpts"]["dimensions"]["dimension108"]
|
||||
|
||||
special_attributes = dict(item.split(":") for item in special_attributes_str.split("|") if ":" in item)
|
||||
special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith('.versand_s') and k != "versand_s"}
|
||||
special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith(".versand_s") and k != "versand_s"}
|
||||
return special_attributes
|
||||
|
||||
async def _extract_pricing_info_from_ad_page(self) -> tuple[float | None, str]:
|
||||
@@ -378,24 +378,24 @@ class AdExtractor(WebScrapingMixin):
|
||||
:return: the price of the offer (optional); and the pricing type
|
||||
"""
|
||||
try:
|
||||
price_str:str = await self.web_text(By.ID, 'viewad-price')
|
||||
price_str:str = await self.web_text(By.ID, "viewad-price")
|
||||
price:int | None = None
|
||||
match price_str.split()[-1]:
|
||||
case '€':
|
||||
price_type = 'FIXED'
|
||||
case "€":
|
||||
price_type = "FIXED"
|
||||
# replace('.', '') is to remove the thousands separator before parsing as int
|
||||
price = int(price_str.replace('.', '').split()[0])
|
||||
case 'VB':
|
||||
price_type = 'NEGOTIABLE'
|
||||
price = int(price_str.replace(".", "").split()[0])
|
||||
case "VB":
|
||||
price_type = "NEGOTIABLE"
|
||||
if price_str != "VB": # can be either 'X € VB', or just 'VB'
|
||||
price = int(price_str.replace('.', '').split()[0])
|
||||
case 'verschenken':
|
||||
price_type = 'GIVE_AWAY'
|
||||
price = int(price_str.replace(".", "").split()[0])
|
||||
case "verschenken":
|
||||
price_type = "GIVE_AWAY"
|
||||
case _:
|
||||
price_type = 'NOT_APPLICABLE'
|
||||
price_type = "NOT_APPLICABLE"
|
||||
return price, price_type
|
||||
except TimeoutError: # no 'commercial' ad, has no pricing box etc.
|
||||
return None, 'NOT_APPLICABLE'
|
||||
return None, "NOT_APPLICABLE"
|
||||
|
||||
async def _extract_shipping_info_from_ad_page(self) -> tuple[str, float | None, list[str] | None]:
|
||||
"""
|
||||
@@ -403,17 +403,17 @@ class AdExtractor(WebScrapingMixin):
|
||||
|
||||
:return: the shipping type, and the shipping price (optional)
|
||||
"""
|
||||
ship_type, ship_costs, shipping_options = 'NOT_APPLICABLE', None, None
|
||||
ship_type, ship_costs, shipping_options = "NOT_APPLICABLE", None, None
|
||||
try:
|
||||
shipping_text = await self.web_text(By.CLASS_NAME, 'boxedarticle--details--shipping')
|
||||
shipping_text = await self.web_text(By.CLASS_NAME, "boxedarticle--details--shipping")
|
||||
# e.g. '+ Versand ab 5,49 €' OR 'Nur Abholung'
|
||||
if shipping_text == 'Nur Abholung':
|
||||
ship_type = 'PICKUP'
|
||||
elif shipping_text == 'Versand möglich':
|
||||
ship_type = 'SHIPPING'
|
||||
elif '€' in shipping_text:
|
||||
shipping_price_parts = shipping_text.split(' ')
|
||||
ship_type = 'SHIPPING'
|
||||
if shipping_text == "Nur Abholung":
|
||||
ship_type = "PICKUP"
|
||||
elif shipping_text == "Versand möglich":
|
||||
ship_type = "SHIPPING"
|
||||
elif "€" in shipping_text:
|
||||
shipping_price_parts = shipping_text.split(" ")
|
||||
ship_type = "SHIPPING"
|
||||
ship_costs = float(misc.parse_decimal(shipping_price_parts[-2]))
|
||||
|
||||
# reading shipping option from kleinanzeigen
|
||||
@@ -425,7 +425,7 @@ class AdExtractor(WebScrapingMixin):
|
||||
internal_shipping_opt = [x for x in shipping_costs if x["priceInEuroCent"] == ship_costs * 100]
|
||||
|
||||
if not internal_shipping_opt:
|
||||
return 'NOT_APPLICABLE', ship_costs, shipping_options
|
||||
return "NOT_APPLICABLE", ship_costs, shipping_options
|
||||
|
||||
# map to internal shipping identifiers used by kleinanzeigen-bot
|
||||
shipping_option_mapping = {
|
||||
@@ -440,13 +440,13 @@ class AdExtractor(WebScrapingMixin):
|
||||
"HERMES_004": "Hermes_L"
|
||||
}
|
||||
|
||||
shipping_option = shipping_option_mapping.get(internal_shipping_opt[0]['id'])
|
||||
shipping_option = shipping_option_mapping.get(internal_shipping_opt[0]["id"])
|
||||
if not shipping_option:
|
||||
return 'NOT_APPLICABLE', ship_costs, shipping_options
|
||||
return "NOT_APPLICABLE", ship_costs, shipping_options
|
||||
|
||||
shipping_options = [shipping_option]
|
||||
except TimeoutError: # no pricing box -> no shipping given
|
||||
ship_type = 'NOT_APPLICABLE'
|
||||
ship_type = "NOT_APPLICABLE"
|
||||
|
||||
return ship_type, ship_costs, shipping_options
|
||||
|
||||
@@ -457,7 +457,7 @@ class AdExtractor(WebScrapingMixin):
|
||||
:return: a boolean indicating whether the sell directly option is active (optional)
|
||||
"""
|
||||
try:
|
||||
buy_now_is_active:bool = 'Direkt kaufen' in (await self.web_text(By.ID, 'payment-buttons-sidebar'))
|
||||
buy_now_is_active:bool = "Direkt kaufen" in (await self.web_text(By.ID, "payment-buttons-sidebar"))
|
||||
return buy_now_is_active
|
||||
except TimeoutError:
|
||||
return None
|
||||
@@ -469,34 +469,34 @@ class AdExtractor(WebScrapingMixin):
|
||||
:return: a dictionary containing the address parts with their corresponding values
|
||||
"""
|
||||
contact:dict[str, (str | None)] = {}
|
||||
address_text = await self.web_text(By.ID, 'viewad-locality')
|
||||
address_text = await self.web_text(By.ID, "viewad-locality")
|
||||
# format: e.g. (Beispiel Allee 42,) 12345 Bundesland - Stadt
|
||||
try:
|
||||
street = (await self.web_text(By.ID, 'street-address'))[:-1] # trailing comma
|
||||
contact['street'] = street
|
||||
street = (await self.web_text(By.ID, "street-address"))[:-1] # trailing comma
|
||||
contact["street"] = street
|
||||
except TimeoutError:
|
||||
LOG.info('No street given in the contact.')
|
||||
LOG.info("No street given in the contact.")
|
||||
|
||||
(zipcode, location) = address_text.split(" ", 1)
|
||||
contact['zipcode'] = zipcode # e.g. 19372
|
||||
contact['location'] = location # e.g. Mecklenburg-Vorpommern - Steinbeck
|
||||
contact["zipcode"] = zipcode # e.g. 19372
|
||||
contact["location"] = location # e.g. Mecklenburg-Vorpommern - Steinbeck
|
||||
|
||||
contact_person_element:Element = await self.web_find(By.ID, 'viewad-contact')
|
||||
name_element = await self.web_find(By.CLASS_NAME, 'iconlist-text', parent = contact_person_element)
|
||||
contact_person_element:Element = await self.web_find(By.ID, "viewad-contact")
|
||||
name_element = await self.web_find(By.CLASS_NAME, "iconlist-text", parent = contact_person_element)
|
||||
try:
|
||||
name = await self.web_text(By.TAG_NAME, 'a', parent = name_element)
|
||||
name = await self.web_text(By.TAG_NAME, "a", parent = name_element)
|
||||
except TimeoutError: # edge case: name without link
|
||||
name = await self.web_text(By.TAG_NAME, 'span', parent = name_element)
|
||||
contact['name'] = name
|
||||
name = await self.web_text(By.TAG_NAME, "span", parent = name_element)
|
||||
contact["name"] = name
|
||||
|
||||
if 'street' not in contact:
|
||||
contact['street'] = None
|
||||
if "street" not in contact:
|
||||
contact["street"] = None
|
||||
try: # phone number is unusual for non-professional sellers today
|
||||
phone_element = await self.web_find(By.ID, 'viewad-contact-phone')
|
||||
phone_number = await self.web_text(By.TAG_NAME, 'a', parent = phone_element)
|
||||
contact['phone'] = ''.join(phone_number.replace('-', ' ').split(' ')).replace('+49(0)', '0')
|
||||
phone_element = await self.web_find(By.ID, "viewad-contact-phone")
|
||||
phone_number = await self.web_text(By.TAG_NAME, "a", parent = phone_element)
|
||||
contact["phone"] = "".join(phone_number.replace("-", " ").split(" ")).replace("+49(0)", "0")
|
||||
except TimeoutError:
|
||||
contact['phone'] = None # phone seems to be a deprecated feature (for non-professional users)
|
||||
contact["phone"] = None # phone seems to be a deprecated feature (for non-professional users)
|
||||
# also see 'https://themen.kleinanzeigen.de/hilfe/deine-anzeigen/Telefon/
|
||||
|
||||
return contact
|
||||
|
||||
@@ -96,7 +96,7 @@ def save_dict(filepath:str, content:dict[str, Any]) -> None:
|
||||
yaml.indent(mapping = 2, sequence = 4, offset = 2)
|
||||
yaml.representer.add_representer(str, # use YAML | block style for multi-line strings
|
||||
lambda dumper, data:
|
||||
dumper.represent_scalar('tag:yaml.org,2002:str', data, style = '|' if '\n' in data else None)
|
||||
dumper.represent_scalar("tag:yaml.org,2002:str", data, style = "|" if "\n" in data else None)
|
||||
)
|
||||
yaml.allow_duplicate_keys = False
|
||||
yaml.explicit_start = False
|
||||
|
||||
@@ -3,14 +3,14 @@
|
||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||
import sys, traceback # isort: skip
|
||||
from types import FrameType, TracebackType
|
||||
from typing import Any, Final
|
||||
from typing import Final
|
||||
|
||||
from . import loggers
|
||||
|
||||
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
|
||||
|
||||
|
||||
def on_exception(ex_type: type[BaseException] | None, ex_value: BaseException | None, ex_traceback: TracebackType | None) -> None:
|
||||
def on_exception(ex_type:type[BaseException] | None, ex_value:BaseException | None, ex_traceback:TracebackType | None) -> None:
|
||||
if ex_type is None or ex_value is None:
|
||||
LOG.error("Unknown exception occurred (missing exception info): ex_type=%s, ex_value=%s", ex_type, ex_value)
|
||||
return
|
||||
|
||||
@@ -11,6 +11,6 @@ class KleinanzeigenBotError(RuntimeError):
|
||||
class CaptchaEncountered(KleinanzeigenBotError):
|
||||
"""Raised when a Captcha was detected and auto-restart is enabled."""
|
||||
|
||||
def __init__(self, restart_delay: timedelta) -> None:
|
||||
def __init__(self, restart_delay:timedelta) -> None:
|
||||
super().__init__()
|
||||
self.restart_delay = restart_delay
|
||||
|
||||
@@ -42,7 +42,7 @@ class Locale(NamedTuple):
|
||||
return f"{self.language}{region_part}{encoding_part}"
|
||||
|
||||
@staticmethod
|
||||
def of(locale_string: str) -> 'Locale':
|
||||
def of(locale_string:str) -> "Locale":
|
||||
"""
|
||||
>>> Locale.of("en_US.UTF-8")
|
||||
Locale(language='en', region='US', encoding='UTF-8')
|
||||
@@ -86,11 +86,11 @@ def _detect_locale() -> Locale:
|
||||
return Locale.of(lang) if lang else Locale("en", "US", "UTF-8")
|
||||
|
||||
|
||||
_CURRENT_LOCALE: Locale = _detect_locale()
|
||||
_TRANSLATIONS: dict[str, Any] | None = None
|
||||
_CURRENT_LOCALE:Locale = _detect_locale()
|
||||
_TRANSLATIONS:dict[str, Any] | None = None
|
||||
|
||||
|
||||
def translate(text:object, caller: inspect.FrameInfo | None) -> str:
|
||||
def translate(text:object, caller:inspect.FrameInfo | None) -> str:
|
||||
text = str(text)
|
||||
if not caller:
|
||||
return text
|
||||
@@ -105,7 +105,7 @@ def translate(text:object, caller: inspect.FrameInfo | None) -> str:
|
||||
if not _TRANSLATIONS:
|
||||
return text
|
||||
|
||||
module_name = caller.frame.f_globals.get('__name__') # pylint: disable=redefined-outer-name
|
||||
module_name = caller.frame.f_globals.get("__name__") # pylint: disable=redefined-outer-name
|
||||
file_basename = os.path.splitext(os.path.basename(caller.filename))[0]
|
||||
if module_name and module_name.endswith(f".{file_basename}"):
|
||||
module_name = module_name[:-(len(file_basename) + 1)]
|
||||
@@ -124,9 +124,9 @@ gettext.gettext = lambda message: translate(_original_gettext(message), reflect.
|
||||
for module_name, module in sys.modules.items():
|
||||
if module is None or module_name in sys.builtin_module_names:
|
||||
continue
|
||||
if hasattr(module, '_') and module._ is _original_gettext:
|
||||
if hasattr(module, "_") and module._ is _original_gettext:
|
||||
module._ = gettext.gettext # type: ignore[attr-defined]
|
||||
if hasattr(module, 'gettext') and module.gettext is _original_gettext:
|
||||
if hasattr(module, "gettext") and module.gettext is _original_gettext:
|
||||
module.gettext = gettext.gettext # type: ignore[attr-defined]
|
||||
|
||||
|
||||
@@ -190,8 +190,8 @@ def pluralize(noun:str, count:int | Sized, *, prefix_with_count:bool = True) ->
|
||||
# English
|
||||
if len(noun) < 2: # noqa: PLR2004 Magic value used in comparison
|
||||
return f"{prefix}{noun}s"
|
||||
if noun.endswith(('s', 'sh', 'ch', 'x', 'z')):
|
||||
if noun.endswith(("s", "sh", "ch", "x", "z")):
|
||||
return f"{prefix}{noun}es"
|
||||
if noun.endswith('y') and noun[-2].lower() not in "aeiou":
|
||||
if noun.endswith("y") and noun[-2].lower() not in "aeiou":
|
||||
return f"{prefix}{noun[:-1]}ies"
|
||||
return f"{prefix}{noun}s"
|
||||
|
||||
@@ -28,11 +28,11 @@ LOG_ROOT:Final[logging.Logger] = logging.getLogger()
|
||||
|
||||
class _MaxLevelFilter(logging.Filter):
|
||||
|
||||
def __init__(self, level: int) -> None:
|
||||
def __init__(self, level:int) -> None:
|
||||
super().__init__()
|
||||
self.level = level
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
def filter(self, record:logging.LogRecord) -> bool:
|
||||
return record.levelno <= self.level
|
||||
|
||||
|
||||
@@ -104,7 +104,7 @@ def configure_console_logging() -> None:
|
||||
class LogFileHandle:
|
||||
"""Encapsulates a log file handler with close and status methods."""
|
||||
|
||||
def __init__(self, file_path: str, handler: RotatingFileHandler, logger: logging.Logger) -> None:
|
||||
def __init__(self, file_path:str, handler:RotatingFileHandler, logger:logging.Logger) -> None:
|
||||
self.file_path = file_path
|
||||
self._handler:RotatingFileHandler | None = handler
|
||||
self._logger = logger
|
||||
@@ -146,14 +146,14 @@ def flush_all_handlers() -> None:
|
||||
handler.flush()
|
||||
|
||||
|
||||
def get_logger(name: str | None = None) -> logging.Logger:
|
||||
def get_logger(name:str | None = None) -> logging.Logger:
|
||||
"""
|
||||
Returns a localized logger
|
||||
"""
|
||||
|
||||
class TranslatingLogger(logging.Logger):
|
||||
|
||||
def _log(self, level: int, msg: object, *args: Any, **kwargs: Any) -> None:
|
||||
def _log(self, level:int, msg:object, *args:Any, **kwargs:Any) -> None:
|
||||
if level != DEBUG: # debug messages should not be translated
|
||||
msg = i18n.translate(msg, reflect.get_caller(2))
|
||||
super()._log(level, msg, *args, **kwargs)
|
||||
|
||||
@@ -10,7 +10,7 @@ from typing import Any, TypeVar
|
||||
from . import i18n
|
||||
|
||||
# https://mypy.readthedocs.io/en/stable/generics.html#generic-functions
|
||||
T = TypeVar('T')
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def ensure(condition:Any | bool | Callable[[], bool], error_message:str, timeout:float = 5, poll_requency:float = 0.5) -> None:
|
||||
@@ -49,7 +49,7 @@ def is_frozen() -> bool:
|
||||
return getattr(sys, "frozen", False)
|
||||
|
||||
|
||||
async def ainput(prompt: str) -> str:
|
||||
async def ainput(prompt:str) -> str:
|
||||
return await asyncio.to_thread(input, f'{prompt} ')
|
||||
|
||||
|
||||
@@ -84,10 +84,10 @@ def parse_decimal(number:float | int | str) -> decimal.Decimal:
|
||||
|
||||
|
||||
def parse_datetime(
|
||||
date: datetime | str | None,
|
||||
date:datetime | str | None,
|
||||
*,
|
||||
add_timezone_if_missing: bool = True,
|
||||
use_local_timezone: bool = True
|
||||
add_timezone_if_missing:bool = True,
|
||||
use_local_timezone:bool = True
|
||||
) -> datetime | None:
|
||||
"""
|
||||
Parses a datetime object or ISO-formatted string.
|
||||
@@ -152,22 +152,22 @@ def parse_duration(text:str) -> timedelta:
|
||||
>>> parse_duration("invalid input")
|
||||
datetime.timedelta(0)
|
||||
"""
|
||||
pattern = re.compile(r'(\d+)\s*([dhms])')
|
||||
pattern = re.compile(r"(\d+)\s*([dhms])")
|
||||
parts = pattern.findall(text.lower())
|
||||
kwargs: dict[str, int] = {}
|
||||
kwargs:dict[str, int] = {}
|
||||
for value, unit in parts:
|
||||
if unit == 'd':
|
||||
kwargs['days'] = kwargs.get('days', 0) + int(value)
|
||||
elif unit == 'h':
|
||||
kwargs['hours'] = kwargs.get('hours', 0) + int(value)
|
||||
elif unit == 'm':
|
||||
kwargs['minutes'] = kwargs.get('minutes', 0) + int(value)
|
||||
elif unit == 's':
|
||||
kwargs['seconds'] = kwargs.get('seconds', 0) + int(value)
|
||||
if unit == "d":
|
||||
kwargs["days"] = kwargs.get("days", 0) + int(value)
|
||||
elif unit == "h":
|
||||
kwargs["hours"] = kwargs.get("hours", 0) + int(value)
|
||||
elif unit == "m":
|
||||
kwargs["minutes"] = kwargs.get("minutes", 0) + int(value)
|
||||
elif unit == "s":
|
||||
kwargs["seconds"] = kwargs.get("seconds", 0) + int(value)
|
||||
return timedelta(**kwargs)
|
||||
|
||||
|
||||
def format_timedelta(td: timedelta) -> str:
|
||||
def format_timedelta(td:timedelta) -> str:
|
||||
"""
|
||||
Formats a timedelta into a human-readable string using the pluralize utility.
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import inspect
|
||||
from typing import Any
|
||||
|
||||
|
||||
def get_caller(depth: int = 1) -> inspect.FrameInfo | None:
|
||||
def get_caller(depth:int = 1) -> inspect.FrameInfo | None:
|
||||
stack = inspect.stack()
|
||||
try:
|
||||
for frame in stack[depth + 1:]:
|
||||
|
||||
@@ -165,7 +165,7 @@ class WebScrapingMixin:
|
||||
prefs_file = os.path.join(profile_dir, "Preferences")
|
||||
if not os.path.exists(prefs_file):
|
||||
LOG.info(" -> Setting chrome prefs [%s]...", prefs_file)
|
||||
with open(prefs_file, "w", encoding = 'UTF-8') as fd:
|
||||
with open(prefs_file, "w", encoding = "UTF-8") as fd:
|
||||
json.dump({
|
||||
"credentials_enable_service": False,
|
||||
"enable_do_not_track": True,
|
||||
@@ -234,16 +234,16 @@ class WebScrapingMixin:
|
||||
|
||||
case "Windows":
|
||||
browser_paths = [
|
||||
os.environ.get("PROGRAMFILES", "C:\\Program Files") + r'\Microsoft\Edge\Application\msedge.exe',
|
||||
os.environ.get("PROGRAMFILES(X86)", "C:\\Program Files (x86)") + r'\Microsoft\Edge\Application\msedge.exe',
|
||||
os.environ.get("PROGRAMFILES", "C:\\Program Files") + r"\Microsoft\Edge\Application\msedge.exe",
|
||||
os.environ.get("PROGRAMFILES(X86)", "C:\\Program Files (x86)") + r"\Microsoft\Edge\Application\msedge.exe",
|
||||
|
||||
os.environ["PROGRAMFILES"] + r'\Chromium\Application\chrome.exe',
|
||||
os.environ["PROGRAMFILES(X86)"] + r'\Chromium\Application\chrome.exe',
|
||||
os.environ["LOCALAPPDATA"] + r'\Chromium\Application\chrome.exe',
|
||||
os.environ["PROGRAMFILES"] + r"\Chromium\Application\chrome.exe",
|
||||
os.environ["PROGRAMFILES(X86)"] + r"\Chromium\Application\chrome.exe",
|
||||
os.environ["LOCALAPPDATA"] + r"\Chromium\Application\chrome.exe",
|
||||
|
||||
os.environ["PROGRAMFILES"] + r'\Chrome\Application\chrome.exe',
|
||||
os.environ["PROGRAMFILES(X86)"] + r'\Chrome\Application\chrome.exe',
|
||||
os.environ["LOCALAPPDATA"] + r'\Chrome\Application\chrome.exe',
|
||||
os.environ["PROGRAMFILES"] + r"\Chrome\Application\chrome.exe",
|
||||
os.environ["PROGRAMFILES(X86)"] + r"\Chrome\Application\chrome.exe",
|
||||
os.environ["LOCALAPPDATA"] + r"\Chrome\Application\chrome.exe",
|
||||
|
||||
shutil.which("msedge.exe"),
|
||||
shutil.which("chromium.exe"),
|
||||
@@ -259,8 +259,8 @@ class WebScrapingMixin:
|
||||
|
||||
raise AssertionError(_("Installed browser could not be detected"))
|
||||
|
||||
async def web_await(self, condition: Callable[[], T | Never | Coroutine[Any, Any, T | Never]], *,
|
||||
timeout:int | float = 5, timeout_error_message: str = "") -> T:
|
||||
async def web_await(self, condition:Callable[[], T | Never | Coroutine[Any, Any, T | Never]], *,
|
||||
timeout:int | float = 5, timeout_error_message:str = "") -> T:
|
||||
"""
|
||||
Blocks/waits until the given condition is met.
|
||||
|
||||
@@ -523,7 +523,7 @@ class WebScrapingMixin:
|
||||
return response
|
||||
# pylint: enable=dangerous-default-value
|
||||
|
||||
async def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10_000, *, scroll_back_top: bool = False) -> None:
|
||||
async def web_scroll_page_down(self, scroll_length:int = 10, scroll_speed:int = 10_000, *, scroll_back_top:bool = False) -> None:
|
||||
"""
|
||||
Smoothly scrolls the current web page down.
|
||||
|
||||
@@ -532,7 +532,7 @@ class WebScrapingMixin:
|
||||
:param scroll_back_top: whether to scroll the page back to the top after scrolling to the bottom
|
||||
"""
|
||||
current_y_pos = 0
|
||||
bottom_y_pos: int = await self.web_execute('document.body.scrollHeight') # get bottom position
|
||||
bottom_y_pos:int = await self.web_execute("document.body.scrollHeight") # get bottom position
|
||||
while current_y_pos < bottom_y_pos: # scroll in steps until bottom reached
|
||||
current_y_pos += scroll_length
|
||||
await self.web_execute(f'window.scrollTo(0, {current_y_pos})') # scroll one step
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
"""
|
||||
SPDX-FileCopyrightText: © Jens Bergmann and contributors
|
||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||
"""
|
||||
# SPDX-FileCopyrightText: © Jens Bergmann and contributors
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||
import os
|
||||
from typing import Any, Final
|
||||
from unittest.mock import MagicMock
|
||||
@@ -21,7 +19,7 @@ LOG.setLevel(loggers.DEBUG)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_data_dir(tmp_path: str) -> str:
|
||||
def test_data_dir(tmp_path:str) -> str:
|
||||
"""Provides a temporary directory for test data.
|
||||
|
||||
This fixture uses pytest's built-in tmp_path fixture to create a temporary
|
||||
@@ -41,33 +39,33 @@ def sample_config() -> dict[str, Any]:
|
||||
- Publishing settings
|
||||
"""
|
||||
return {
|
||||
'login': {
|
||||
'username': 'testuser',
|
||||
'password': 'testpass'
|
||||
"login": {
|
||||
"username": "testuser",
|
||||
"password": "testpass"
|
||||
},
|
||||
'browser': {
|
||||
'arguments': [],
|
||||
'binary_location': None,
|
||||
'extensions': [],
|
||||
'use_private_window': True,
|
||||
'user_data_dir': None,
|
||||
'profile_name': None
|
||||
"browser": {
|
||||
"arguments": [],
|
||||
"binary_location": None,
|
||||
"extensions": [],
|
||||
"use_private_window": True,
|
||||
"user_data_dir": None,
|
||||
"profile_name": None
|
||||
},
|
||||
'ad_defaults': {
|
||||
'description': {
|
||||
'prefix': 'Test Prefix',
|
||||
'suffix': 'Test Suffix'
|
||||
"ad_defaults": {
|
||||
"description": {
|
||||
"prefix": "Test Prefix",
|
||||
"suffix": "Test Suffix"
|
||||
}
|
||||
},
|
||||
'publishing': {
|
||||
'delete_old_ads': 'BEFORE_PUBLISH',
|
||||
'delete_old_ads_by_title': False
|
||||
"publishing": {
|
||||
"delete_old_ads": "BEFORE_PUBLISH",
|
||||
"delete_old_ads_by_title": False
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_bot(sample_config: dict[str, Any]) -> KleinanzeigenBot:
|
||||
def test_bot(sample_config:dict[str, Any]) -> KleinanzeigenBot:
|
||||
"""Provides a fresh KleinanzeigenBot instance for all test classes.
|
||||
|
||||
Dependencies:
|
||||
@@ -89,7 +87,7 @@ def browser_mock() -> MagicMock:
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def log_file_path(test_data_dir: str) -> str:
|
||||
def log_file_path(test_data_dir:str) -> str:
|
||||
"""Provides a temporary path for log files.
|
||||
|
||||
Dependencies:
|
||||
@@ -99,7 +97,7 @@ def log_file_path(test_data_dir: str) -> str:
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_extractor(browser_mock: MagicMock, sample_config: dict[str, Any]) -> AdExtractor:
|
||||
def test_extractor(browser_mock:MagicMock, sample_config:dict[str, Any]) -> AdExtractor:
|
||||
"""Provides a fresh AdExtractor instance for testing.
|
||||
|
||||
Dependencies:
|
||||
|
||||
@@ -122,9 +122,9 @@ def test_calculate_content_hash_with_none_values() -> None:
|
||||
)
|
||||
])
|
||||
def test_get_description_affixes(
|
||||
config: dict[str, Any],
|
||||
prefix: bool,
|
||||
expected: str
|
||||
config:dict[str, Any],
|
||||
prefix:bool,
|
||||
expected:str
|
||||
) -> None:
|
||||
"""Test get_description_affixes function with various inputs."""
|
||||
result = ads.get_description_affixes(config, prefix = prefix)
|
||||
@@ -157,7 +157,7 @@ def test_get_description_affixes(
|
||||
""
|
||||
)
|
||||
])
|
||||
def test_get_description_affixes_edge_cases(config: dict[str, Any], prefix: bool, expected: str) -> None:
|
||||
def test_get_description_affixes_edge_cases(config:dict[str, Any], prefix:bool, expected:str) -> None:
|
||||
"""Test edge cases for description affix handling."""
|
||||
assert ads.get_description_affixes(config, prefix = prefix) == expected
|
||||
|
||||
@@ -170,7 +170,7 @@ def test_get_description_affixes_edge_cases(config: dict[str, Any], prefix: bool
|
||||
(3.14, ""), # Test with a float
|
||||
(set(), ""), # Test with an empty set
|
||||
])
|
||||
def test_get_description_affixes_edge_cases_non_dict(config: Any, expected: str) -> None:
|
||||
def test_get_description_affixes_edge_cases_non_dict(config:Any, expected:str) -> None:
|
||||
"""Test get_description_affixes function with non-dict inputs."""
|
||||
result = ads.get_description_affixes(config, prefix=True)
|
||||
result = ads.get_description_affixes(config, prefix = True)
|
||||
assert result == expected
|
||||
|
||||
@@ -12,21 +12,21 @@ class TestKleinanzeigenBot:
|
||||
def bot(self) -> KleinanzeigenBot:
|
||||
return KleinanzeigenBot()
|
||||
|
||||
def test_parse_args_help(self, bot: KleinanzeigenBot) -> None:
|
||||
def test_parse_args_help(self, bot:KleinanzeigenBot) -> None:
|
||||
"""Test parsing of help command"""
|
||||
bot.parse_args(["app", "help"])
|
||||
assert bot.command == "help"
|
||||
assert bot.ads_selector == "due"
|
||||
assert not bot.keep_old_ads
|
||||
|
||||
def test_parse_args_publish(self, bot: KleinanzeigenBot) -> None:
|
||||
def test_parse_args_publish(self, bot:KleinanzeigenBot) -> None:
|
||||
"""Test parsing of publish command with options"""
|
||||
bot.parse_args(["app", "publish", "--ads=all", "--keep-old"])
|
||||
assert bot.command == "publish"
|
||||
assert bot.ads_selector == "all"
|
||||
assert bot.keep_old_ads
|
||||
|
||||
def test_get_version(self, bot: KleinanzeigenBot) -> None:
|
||||
def test_get_version(self, bot:KleinanzeigenBot) -> None:
|
||||
"""Test version retrieval"""
|
||||
version = bot.get_version()
|
||||
assert isinstance(version, str)
|
||||
|
||||
@@ -12,31 +12,31 @@ from kleinanzeigen_bot.utils.web_scraping_mixin import Browser, By, Element
|
||||
|
||||
|
||||
class _DimensionsDict(TypedDict):
|
||||
dimension108: str
|
||||
dimension108:str
|
||||
|
||||
|
||||
class _UniversalAnalyticsOptsDict(TypedDict):
|
||||
dimensions: _DimensionsDict
|
||||
dimensions:_DimensionsDict
|
||||
|
||||
|
||||
class _BelenConfDict(TypedDict):
|
||||
universalAnalyticsOpts: _UniversalAnalyticsOptsDict
|
||||
universalAnalyticsOpts:_UniversalAnalyticsOptsDict
|
||||
|
||||
|
||||
class _SpecialAttributesDict(TypedDict, total = False):
|
||||
art_s: str
|
||||
condition_s: str
|
||||
art_s:str
|
||||
condition_s:str
|
||||
|
||||
|
||||
class _TestCaseDict(TypedDict): # noqa: PYI049 Private TypedDict `...` is never used
|
||||
belen_conf: _BelenConfDict
|
||||
expected: _SpecialAttributesDict
|
||||
belen_conf:_BelenConfDict
|
||||
expected:_SpecialAttributesDict
|
||||
|
||||
|
||||
class TestAdExtractorBasics:
|
||||
"""Basic synchronous tests for AdExtractor."""
|
||||
|
||||
def test_constructor(self, browser_mock: MagicMock, sample_config: dict[str, Any]) -> None:
|
||||
def test_constructor(self, browser_mock:MagicMock, sample_config:dict[str, Any]) -> None:
|
||||
"""Test the constructor of AdExtractor"""
|
||||
extractor = AdExtractor(browser_mock, sample_config)
|
||||
assert extractor.browser == browser_mock
|
||||
@@ -48,7 +48,7 @@ class TestAdExtractorBasics:
|
||||
("https://www.kleinanzeigen.de/s-anzeige/invalid-id/abc", -1),
|
||||
("https://www.kleinanzeigen.de/invalid-url", -1),
|
||||
])
|
||||
def test_extract_ad_id_from_ad_url(self, test_extractor: AdExtractor, url: str, expected_id: int) -> None:
|
||||
def test_extract_ad_id_from_ad_url(self, test_extractor:AdExtractor, url:str, expected_id:int) -> None:
|
||||
"""Test extraction of ad ID from different URL formats."""
|
||||
assert test_extractor.extract_ad_id_from_ad_url(url) == expected_id
|
||||
|
||||
@@ -66,19 +66,19 @@ class TestAdExtractorPricing:
|
||||
@pytest.mark.asyncio
|
||||
# pylint: disable=protected-access
|
||||
async def test_extract_pricing_info(
|
||||
self, test_extractor: AdExtractor, price_text: str, expected_price: int | None, expected_type: str
|
||||
self, test_extractor:AdExtractor, price_text:str, expected_price:int | None, expected_type:str
|
||||
) -> None:
|
||||
"""Test price extraction with different formats"""
|
||||
with patch.object(test_extractor, 'web_text', new_callable = AsyncMock, return_value = price_text):
|
||||
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = price_text):
|
||||
price, price_type = await test_extractor._extract_pricing_info_from_ad_page()
|
||||
assert price == expected_price
|
||||
assert price_type == expected_type
|
||||
|
||||
@pytest.mark.asyncio
|
||||
# pylint: disable=protected-access
|
||||
async def test_extract_pricing_info_timeout(self, test_extractor: AdExtractor) -> None:
|
||||
async def test_extract_pricing_info_timeout(self, test_extractor:AdExtractor) -> None:
|
||||
"""Test price extraction when element is not found"""
|
||||
with patch.object(test_extractor, 'web_text', new_callable = AsyncMock, side_effect = TimeoutError):
|
||||
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError):
|
||||
price, price_type = await test_extractor._extract_pricing_info_from_ad_page()
|
||||
assert price is None
|
||||
assert price_type == "NOT_APPLICABLE"
|
||||
@@ -95,15 +95,15 @@ class TestAdExtractorShipping:
|
||||
@pytest.mark.asyncio
|
||||
# pylint: disable=protected-access
|
||||
async def test_extract_shipping_info(
|
||||
self, test_extractor: AdExtractor, shipping_text: str, expected_type: str, expected_cost: float | None
|
||||
self, test_extractor:AdExtractor, shipping_text:str, expected_type:str, expected_cost:float | None
|
||||
) -> None:
|
||||
"""Test shipping info extraction with different text formats."""
|
||||
with patch.object(test_extractor, 'page', MagicMock()), \
|
||||
patch.object(test_extractor, 'web_text', new_callable = AsyncMock, return_value = shipping_text), \
|
||||
patch.object(test_extractor, 'web_request', new_callable = AsyncMock) as mock_web_request:
|
||||
with patch.object(test_extractor, "page", MagicMock()), \
|
||||
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = shipping_text), \
|
||||
patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
|
||||
if expected_cost:
|
||||
shipping_response: dict[str, Any] = {
|
||||
shipping_response:dict[str, Any] = {
|
||||
"data": {
|
||||
"shippingOptionsResponse": {
|
||||
"options": [
|
||||
@@ -125,7 +125,7 @@ class TestAdExtractorShipping:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
# pylint: disable=protected-access
|
||||
async def test_extract_shipping_info_with_options(self, test_extractor: AdExtractor) -> None:
|
||||
async def test_extract_shipping_info_with_options(self, test_extractor:AdExtractor) -> None:
|
||||
"""Test shipping info extraction with shipping options."""
|
||||
shipping_response = {
|
||||
"content": json.dumps({
|
||||
@@ -139,9 +139,9 @@ class TestAdExtractorShipping:
|
||||
})
|
||||
}
|
||||
|
||||
with patch.object(test_extractor, 'page', MagicMock()), \
|
||||
patch.object(test_extractor, 'web_text', new_callable = AsyncMock, return_value = "+ Versand ab 5,49 €"), \
|
||||
patch.object(test_extractor, 'web_request', new_callable = AsyncMock, return_value = shipping_response):
|
||||
with patch.object(test_extractor, "page", MagicMock()), \
|
||||
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 5,49 €"), \
|
||||
patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response):
|
||||
|
||||
shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page()
|
||||
|
||||
@@ -154,21 +154,21 @@ class TestAdExtractorNavigation:
|
||||
"""Tests for navigation related functionality."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_navigate_to_ad_page_with_url(self, test_extractor: AdExtractor) -> None:
|
||||
async def test_navigate_to_ad_page_with_url(self, test_extractor:AdExtractor) -> None:
|
||||
"""Test navigation to ad page using a URL."""
|
||||
page_mock = AsyncMock()
|
||||
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
||||
|
||||
with patch.object(test_extractor, 'page', page_mock), \
|
||||
patch.object(test_extractor, 'web_open', new_callable = AsyncMock) as mock_web_open, \
|
||||
patch.object(test_extractor, 'web_find', new_callable = AsyncMock, side_effect = TimeoutError):
|
||||
with patch.object(test_extractor, "page", page_mock), \
|
||||
patch.object(test_extractor, "web_open", new_callable = AsyncMock) as mock_web_open, \
|
||||
patch.object(test_extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError):
|
||||
|
||||
result = await test_extractor.naviagte_to_ad_page("https://www.kleinanzeigen.de/s-anzeige/test/12345")
|
||||
assert result is True
|
||||
mock_web_open.assert_called_with("https://www.kleinanzeigen.de/s-anzeige/test/12345")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_navigate_to_ad_page_with_id(self, test_extractor: AdExtractor) -> None:
|
||||
async def test_navigate_to_ad_page_with_id(self, test_extractor:AdExtractor) -> None:
|
||||
"""Test navigation to ad page using an ID."""
|
||||
page_mock = AsyncMock()
|
||||
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
||||
@@ -186,7 +186,7 @@ class TestAdExtractorNavigation:
|
||||
popup_close_mock.click = AsyncMock()
|
||||
popup_close_mock.apply = AsyncMock(return_value = True)
|
||||
|
||||
def find_mock(selector_type: By, selector_value: str, **_: Any) -> Element | None:
|
||||
def find_mock(selector_type:By, selector_value:str, **_:Any) -> Element | None:
|
||||
if selector_type == By.ID and selector_value == "site-search-query":
|
||||
return input_mock
|
||||
if selector_type == By.ID and selector_value == "site-search-submit":
|
||||
@@ -195,20 +195,20 @@ class TestAdExtractorNavigation:
|
||||
return popup_close_mock
|
||||
return None
|
||||
|
||||
with patch.object(test_extractor, 'page', page_mock), \
|
||||
patch.object(test_extractor, 'web_open', new_callable = AsyncMock) as mock_web_open, \
|
||||
patch.object(test_extractor, 'web_input', new_callable = AsyncMock), \
|
||||
patch.object(test_extractor, 'web_check', new_callable = AsyncMock, return_value = True), \
|
||||
patch.object(test_extractor, 'web_find', new_callable = AsyncMock, side_effect = find_mock):
|
||||
with patch.object(test_extractor, "page", page_mock), \
|
||||
patch.object(test_extractor, "web_open", new_callable = AsyncMock) as mock_web_open, \
|
||||
patch.object(test_extractor, "web_input", new_callable = AsyncMock), \
|
||||
patch.object(test_extractor, "web_check", new_callable = AsyncMock, return_value = True), \
|
||||
patch.object(test_extractor, "web_find", new_callable = AsyncMock, side_effect = find_mock):
|
||||
|
||||
result = await test_extractor.naviagte_to_ad_page(12345)
|
||||
assert result is True
|
||||
mock_web_open.assert_called_with('https://www.kleinanzeigen.de/')
|
||||
mock_web_open.assert_called_with("https://www.kleinanzeigen.de/")
|
||||
submit_button_mock.click.assert_awaited_once()
|
||||
popup_close_mock.click.assert_awaited_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_navigate_to_ad_page_with_popup(self, test_extractor: AdExtractor) -> None:
|
||||
async def test_navigate_to_ad_page_with_popup(self, test_extractor:AdExtractor) -> None:
|
||||
"""Test navigation to ad page with popup handling."""
|
||||
page_mock = AsyncMock()
|
||||
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
||||
@@ -218,18 +218,18 @@ class TestAdExtractorNavigation:
|
||||
input_mock.send_keys = AsyncMock()
|
||||
input_mock.apply = AsyncMock(return_value = True)
|
||||
|
||||
with patch.object(test_extractor, 'page', page_mock), \
|
||||
patch.object(test_extractor, 'web_open', new_callable = AsyncMock), \
|
||||
patch.object(test_extractor, 'web_find', new_callable = AsyncMock, return_value = input_mock), \
|
||||
patch.object(test_extractor, 'web_click', new_callable = AsyncMock) as mock_web_click, \
|
||||
patch.object(test_extractor, 'web_check', new_callable = AsyncMock, return_value = True):
|
||||
with patch.object(test_extractor, "page", page_mock), \
|
||||
patch.object(test_extractor, "web_open", new_callable = AsyncMock), \
|
||||
patch.object(test_extractor, "web_find", new_callable = AsyncMock, return_value = input_mock), \
|
||||
patch.object(test_extractor, "web_click", new_callable = AsyncMock) as mock_web_click, \
|
||||
patch.object(test_extractor, "web_check", new_callable = AsyncMock, return_value = True):
|
||||
|
||||
result = await test_extractor.naviagte_to_ad_page(12345)
|
||||
assert result is True
|
||||
mock_web_click.assert_called_with(By.CLASS_NAME, 'mfp-close')
|
||||
mock_web_click.assert_called_with(By.CLASS_NAME, "mfp-close")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_navigate_to_ad_page_invalid_id(self, test_extractor: AdExtractor) -> None:
|
||||
async def test_navigate_to_ad_page_invalid_id(self, test_extractor:AdExtractor) -> None:
|
||||
"""Test navigation to ad page with invalid ID."""
|
||||
page_mock = AsyncMock()
|
||||
page_mock.url = "https://www.kleinanzeigen.de/s-suchen.html?k0"
|
||||
@@ -240,22 +240,22 @@ class TestAdExtractorNavigation:
|
||||
input_mock.apply = AsyncMock(return_value = True)
|
||||
input_mock.attrs = {}
|
||||
|
||||
with patch.object(test_extractor, 'page', page_mock), \
|
||||
patch.object(test_extractor, 'web_open', new_callable = AsyncMock), \
|
||||
patch.object(test_extractor, 'web_find', new_callable = AsyncMock, return_value = input_mock):
|
||||
with patch.object(test_extractor, "page", page_mock), \
|
||||
patch.object(test_extractor, "web_open", new_callable = AsyncMock), \
|
||||
patch.object(test_extractor, "web_find", new_callable = AsyncMock, return_value = input_mock):
|
||||
|
||||
result = await test_extractor.naviagte_to_ad_page(99999)
|
||||
assert result is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_own_ads_urls(self, test_extractor: AdExtractor) -> None:
|
||||
async def test_extract_own_ads_urls(self, test_extractor:AdExtractor) -> None:
|
||||
"""Test extraction of own ads URLs - basic test."""
|
||||
with patch.object(test_extractor, 'web_open', new_callable=AsyncMock), \
|
||||
patch.object(test_extractor, 'web_sleep', new_callable=AsyncMock), \
|
||||
patch.object(test_extractor, 'web_find', new_callable=AsyncMock) as mock_web_find, \
|
||||
patch.object(test_extractor, 'web_find_all', new_callable=AsyncMock) as mock_web_find_all, \
|
||||
patch.object(test_extractor, 'web_scroll_page_down', new_callable=AsyncMock), \
|
||||
patch.object(test_extractor, 'web_execute', new_callable=AsyncMock):
|
||||
with patch.object(test_extractor, "web_open", new_callable = AsyncMock), \
|
||||
patch.object(test_extractor, "web_sleep", new_callable = AsyncMock), \
|
||||
patch.object(test_extractor, "web_find", new_callable = AsyncMock) as mock_web_find, \
|
||||
patch.object(test_extractor, "web_find_all", new_callable = AsyncMock) as mock_web_find_all, \
|
||||
patch.object(test_extractor, "web_scroll_page_down", new_callable = AsyncMock), \
|
||||
patch.object(test_extractor, "web_execute", new_callable = AsyncMock):
|
||||
|
||||
# --- Setup mock objects for DOM elements ---
|
||||
# Mocks needed for the actual execution flow
|
||||
@@ -263,7 +263,7 @@ class TestAdExtractorNavigation:
|
||||
pagination_section_mock = MagicMock()
|
||||
cardbox_mock = MagicMock() # Represents the <li> element
|
||||
link_mock = MagicMock() # Represents the <a> element
|
||||
link_mock.attrs = {'href': '/s-anzeige/test/12345'} # Configure the desired output
|
||||
link_mock.attrs = {"href": "/s-anzeige/test/12345"} # Configure the desired output
|
||||
|
||||
# Mocks for elements potentially checked but maybe not strictly needed for output
|
||||
# (depending on how robust the mocking is)
|
||||
@@ -295,20 +295,20 @@ class TestAdExtractorNavigation:
|
||||
refs = await test_extractor.extract_own_ads_urls()
|
||||
|
||||
# --- Assertions ---
|
||||
assert refs == ['/s-anzeige/test/12345'] # Now it should match
|
||||
assert refs == ["/s-anzeige/test/12345"] # Now it should match
|
||||
|
||||
# Optional: Verify calls were made as expected
|
||||
mock_web_find.assert_has_calls([
|
||||
call(By.ID, 'my-manageitems-adlist'),
|
||||
call(By.CSS_SELECTOR, '.Pagination', timeout=10),
|
||||
call(By.ID, 'my-manageitems-adlist'),
|
||||
call(By.CSS_SELECTOR, 'div.manageitems-item-ad h3 a.text-onSurface', parent=cardbox_mock),
|
||||
], any_order=False) # Check order if important
|
||||
call(By.ID, "my-manageitems-adlist"),
|
||||
call(By.CSS_SELECTOR, ".Pagination", timeout = 10),
|
||||
call(By.ID, "my-manageitems-adlist"),
|
||||
call(By.CSS_SELECTOR, "div.manageitems-item-ad h3 a.text-onSurface", parent = cardbox_mock),
|
||||
], any_order = False) # Check order if important
|
||||
|
||||
mock_web_find_all.assert_has_calls([
|
||||
call(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent=pagination_section_mock),
|
||||
call(By.CLASS_NAME, 'cardbox', parent=ad_list_container_mock),
|
||||
], any_order=False)
|
||||
call(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section_mock),
|
||||
call(By.CLASS_NAME, "cardbox", parent = ad_list_container_mock),
|
||||
], any_order = False)
|
||||
|
||||
|
||||
class TestAdExtractorContent:
|
||||
@@ -318,14 +318,14 @@ class TestAdExtractorContent:
|
||||
@pytest.fixture
|
||||
def extractor_with_config(self) -> AdExtractor:
|
||||
"""Create extractor with specific config for testing prefix/suffix handling."""
|
||||
browser_mock = MagicMock(spec=Browser)
|
||||
browser_mock = MagicMock(spec = Browser)
|
||||
return AdExtractor(browser_mock, {}) # Empty config, will be overridden in tests
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_description_with_affixes(
|
||||
self,
|
||||
test_extractor: AdExtractor,
|
||||
description_test_cases: list[tuple[dict[str, Any], str, str]]
|
||||
test_extractor:AdExtractor,
|
||||
description_test_cases:list[tuple[dict[str, Any], str, str]]
|
||||
) -> None:
|
||||
"""Test extraction of description with various prefix/suffix configurations."""
|
||||
# Mock the page
|
||||
@@ -337,18 +337,18 @@ class TestAdExtractorContent:
|
||||
test_extractor.config = config
|
||||
|
||||
with patch.multiple(test_extractor,
|
||||
web_text=AsyncMock(side_effect=[
|
||||
web_text = AsyncMock(side_effect = [
|
||||
"Test Title", # Title
|
||||
raw_description, # Raw description (without affixes)
|
||||
"03.02.2025" # Creation date
|
||||
]),
|
||||
_extract_category_from_ad_page=AsyncMock(return_value="160"),
|
||||
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}),
|
||||
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")),
|
||||
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)),
|
||||
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False),
|
||||
_download_images_from_ad_page=AsyncMock(return_value=[]),
|
||||
_extract_contact_from_ad_page=AsyncMock(return_value={})
|
||||
_extract_category_from_ad_page = AsyncMock(return_value = "160"),
|
||||
_extract_special_attributes_from_ad_page = AsyncMock(return_value = {}),
|
||||
_extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")),
|
||||
_extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)),
|
||||
_extract_sell_directly_from_ad_page = AsyncMock(return_value = False),
|
||||
_download_images_from_ad_page = AsyncMock(return_value = []),
|
||||
_extract_contact_from_ad_page = AsyncMock(return_value = {})
|
||||
):
|
||||
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
||||
assert info["description"] == raw_description
|
||||
@@ -356,7 +356,7 @@ class TestAdExtractorContent:
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_description_with_affixes_timeout(
|
||||
self,
|
||||
test_extractor: AdExtractor
|
||||
test_extractor:AdExtractor
|
||||
) -> None:
|
||||
"""Test handling of timeout when extracting description."""
|
||||
# Mock the page
|
||||
@@ -365,18 +365,18 @@ class TestAdExtractorContent:
|
||||
test_extractor.page = page_mock
|
||||
|
||||
with patch.multiple(test_extractor,
|
||||
web_text=AsyncMock(side_effect=[
|
||||
web_text = AsyncMock(side_effect = [
|
||||
"Test Title", # Title succeeds
|
||||
TimeoutError("Timeout"), # Description times out
|
||||
"03.02.2025" # Date succeeds
|
||||
]),
|
||||
_extract_category_from_ad_page=AsyncMock(return_value="160"),
|
||||
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}),
|
||||
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")),
|
||||
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)),
|
||||
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False),
|
||||
_download_images_from_ad_page=AsyncMock(return_value=[]),
|
||||
_extract_contact_from_ad_page=AsyncMock(return_value={})
|
||||
_extract_category_from_ad_page = AsyncMock(return_value = "160"),
|
||||
_extract_special_attributes_from_ad_page = AsyncMock(return_value = {}),
|
||||
_extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")),
|
||||
_extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)),
|
||||
_extract_sell_directly_from_ad_page = AsyncMock(return_value = False),
|
||||
_download_images_from_ad_page = AsyncMock(return_value = []),
|
||||
_extract_contact_from_ad_page = AsyncMock(return_value = {})
|
||||
):
|
||||
try:
|
||||
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
||||
@@ -388,7 +388,7 @@ class TestAdExtractorContent:
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_description_with_affixes_no_affixes(
|
||||
self,
|
||||
test_extractor: AdExtractor
|
||||
test_extractor:AdExtractor
|
||||
) -> None:
|
||||
"""Test extraction of description without any affixes in config."""
|
||||
# Mock the page
|
||||
@@ -399,24 +399,24 @@ class TestAdExtractorContent:
|
||||
raw_description = "Original Description"
|
||||
|
||||
with patch.multiple(test_extractor,
|
||||
web_text=AsyncMock(side_effect=[
|
||||
web_text = AsyncMock(side_effect = [
|
||||
"Test Title", # Title
|
||||
raw_description, # Description without affixes
|
||||
"03.02.2025" # Creation date
|
||||
]),
|
||||
_extract_category_from_ad_page=AsyncMock(return_value="160"),
|
||||
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}),
|
||||
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")),
|
||||
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)),
|
||||
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False),
|
||||
_download_images_from_ad_page=AsyncMock(return_value=[]),
|
||||
_extract_contact_from_ad_page=AsyncMock(return_value={})
|
||||
_extract_category_from_ad_page = AsyncMock(return_value = "160"),
|
||||
_extract_special_attributes_from_ad_page = AsyncMock(return_value = {}),
|
||||
_extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")),
|
||||
_extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)),
|
||||
_extract_sell_directly_from_ad_page = AsyncMock(return_value = False),
|
||||
_download_images_from_ad_page = AsyncMock(return_value = []),
|
||||
_extract_contact_from_ad_page = AsyncMock(return_value = {})
|
||||
):
|
||||
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
||||
assert info["description"] == raw_description
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_sell_directly(self, test_extractor: AdExtractor) -> None:
|
||||
async def test_extract_sell_directly(self, test_extractor:AdExtractor) -> None:
|
||||
"""Test extraction of sell directly option."""
|
||||
test_cases = [
|
||||
("Direkt kaufen", True),
|
||||
@@ -424,11 +424,11 @@ class TestAdExtractorContent:
|
||||
]
|
||||
|
||||
for text, expected in test_cases:
|
||||
with patch.object(test_extractor, 'web_text', new_callable=AsyncMock, return_value=text):
|
||||
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = text):
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is expected
|
||||
|
||||
with patch.object(test_extractor, 'web_text', new_callable=AsyncMock, side_effect=TimeoutError):
|
||||
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError):
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
|
||||
@@ -451,15 +451,15 @@ class TestAdExtractorCategory:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
# pylint: disable=protected-access
|
||||
async def test_extract_category(self, extractor: AdExtractor) -> None:
|
||||
async def test_extract_category(self, extractor:AdExtractor) -> None:
|
||||
"""Test category extraction from breadcrumb."""
|
||||
category_line = MagicMock()
|
||||
first_part = MagicMock()
|
||||
first_part.attrs = {'href': '/s-familie-kind-baby/c17'}
|
||||
first_part.attrs = {"href": "/s-familie-kind-baby/c17"}
|
||||
second_part = MagicMock()
|
||||
second_part.attrs = {'href': '/s-spielzeug/c23'}
|
||||
second_part.attrs = {"href": "/s-spielzeug/c23"}
|
||||
|
||||
with patch.object(extractor, 'web_find', new_callable = AsyncMock) as mock_web_find:
|
||||
with patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find:
|
||||
mock_web_find.side_effect = [
|
||||
category_line,
|
||||
first_part,
|
||||
@@ -469,15 +469,15 @@ class TestAdExtractorCategory:
|
||||
result = await extractor._extract_category_from_ad_page()
|
||||
assert result == "17/23"
|
||||
|
||||
mock_web_find.assert_any_call(By.ID, 'vap-brdcrmb')
|
||||
mock_web_find.assert_any_call(By.CSS_SELECTOR, 'a:nth-of-type(2)', parent = category_line)
|
||||
mock_web_find.assert_any_call(By.CSS_SELECTOR, 'a:nth-of-type(3)', parent = category_line)
|
||||
mock_web_find.assert_any_call(By.ID, "vap-brdcrmb")
|
||||
mock_web_find.assert_any_call(By.CSS_SELECTOR, "a:nth-of-type(2)", parent = category_line)
|
||||
mock_web_find.assert_any_call(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
# pylint: disable=protected-access
|
||||
async def test_extract_special_attributes_empty(self, extractor: AdExtractor) -> None:
|
||||
async def test_extract_special_attributes_empty(self, extractor:AdExtractor) -> None:
|
||||
"""Test extraction of special attributes when empty."""
|
||||
with patch.object(extractor, 'web_execute', new_callable = AsyncMock) as mock_web_execute:
|
||||
with patch.object(extractor, "web_execute", new_callable = AsyncMock) as mock_web_execute:
|
||||
mock_web_execute.return_value = {
|
||||
"universalAnalyticsOpts": {
|
||||
"dimensions": {
|
||||
@@ -507,11 +507,11 @@ class TestAdExtractorContact:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
# pylint: disable=protected-access
|
||||
async def test_extract_contact_info(self, extractor: AdExtractor) -> None:
|
||||
async def test_extract_contact_info(self, extractor:AdExtractor) -> None:
|
||||
"""Test extraction of contact information."""
|
||||
with patch.object(extractor, 'page', MagicMock()), \
|
||||
patch.object(extractor, 'web_text', new_callable = AsyncMock) as mock_web_text, \
|
||||
patch.object(extractor, 'web_find', new_callable = AsyncMock) as mock_web_find:
|
||||
with patch.object(extractor, "page", MagicMock()), \
|
||||
patch.object(extractor, "web_text", new_callable = AsyncMock) as mock_web_text, \
|
||||
patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find:
|
||||
|
||||
mock_web_text.side_effect = [
|
||||
"12345 Berlin - Mitte",
|
||||
@@ -535,22 +535,22 @@ class TestAdExtractorContact:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
# pylint: disable=protected-access
|
||||
async def test_extract_contact_info_timeout(self, extractor: AdExtractor) -> None:
|
||||
async def test_extract_contact_info_timeout(self, extractor:AdExtractor) -> None:
|
||||
"""Test contact info extraction when elements are not found."""
|
||||
with patch.object(extractor, 'page', MagicMock()), \
|
||||
patch.object(extractor, 'web_text', new_callable = AsyncMock, side_effect = TimeoutError()), \
|
||||
patch.object(extractor, 'web_find', new_callable = AsyncMock, side_effect = TimeoutError()), \
|
||||
with patch.object(extractor, "page", MagicMock()), \
|
||||
patch.object(extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError()), \
|
||||
patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError()), \
|
||||
pytest.raises(TimeoutError):
|
||||
|
||||
await extractor._extract_contact_from_ad_page()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
# pylint: disable=protected-access
|
||||
async def test_extract_contact_info_with_phone(self, extractor: AdExtractor) -> None:
|
||||
async def test_extract_contact_info_with_phone(self, extractor:AdExtractor) -> None:
|
||||
"""Test extraction of contact information including phone number."""
|
||||
with patch.object(extractor, 'page', MagicMock()), \
|
||||
patch.object(extractor, 'web_text', new_callable = AsyncMock) as mock_web_text, \
|
||||
patch.object(extractor, 'web_find', new_callable = AsyncMock) as mock_web_find:
|
||||
with patch.object(extractor, "page", MagicMock()), \
|
||||
patch.object(extractor, "web_text", new_callable = AsyncMock) as mock_web_text, \
|
||||
patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find:
|
||||
|
||||
mock_web_text.side_effect = [
|
||||
"12345 Berlin - Mitte",
|
||||
@@ -588,19 +588,19 @@ class TestAdExtractorDownload:
|
||||
return AdExtractor(browser_mock, config_mock)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_download_ad_existing_directory(self, extractor: AdExtractor) -> None:
|
||||
async def test_download_ad_existing_directory(self, extractor:AdExtractor) -> None:
|
||||
"""Test downloading an ad when the directory already exists."""
|
||||
with patch('os.path.exists') as mock_exists, \
|
||||
patch('os.path.isdir') as mock_isdir, \
|
||||
patch('os.makedirs') as mock_makedirs, \
|
||||
patch('os.mkdir') as mock_mkdir, \
|
||||
patch('shutil.rmtree') as mock_rmtree, \
|
||||
patch('kleinanzeigen_bot.extract.dicts.save_dict', autospec = True) as mock_save_dict, \
|
||||
patch.object(extractor, '_extract_ad_page_info', new_callable = AsyncMock) as mock_extract:
|
||||
with patch("os.path.exists") as mock_exists, \
|
||||
patch("os.path.isdir") as mock_isdir, \
|
||||
patch("os.makedirs") as mock_makedirs, \
|
||||
patch("os.mkdir") as mock_mkdir, \
|
||||
patch("shutil.rmtree") as mock_rmtree, \
|
||||
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
||||
patch.object(extractor, "_extract_ad_page_info", new_callable = AsyncMock) as mock_extract:
|
||||
|
||||
base_dir = 'downloaded-ads'
|
||||
ad_dir = os.path.join(base_dir, 'ad_12345')
|
||||
yaml_path = os.path.join(ad_dir, 'ad_12345.yaml')
|
||||
base_dir = "downloaded-ads"
|
||||
ad_dir = os.path.join(base_dir, "ad_12345")
|
||||
yaml_path = os.path.join(ad_dir, "ad_12345.yaml")
|
||||
|
||||
# Configure mocks for directory checks
|
||||
existing_paths = {base_dir, ad_dir}
|
||||
@@ -632,32 +632,32 @@ class TestAdExtractorDownload:
|
||||
# Workaround for hard-coded path in download_ad
|
||||
actual_call = mock_save_dict.call_args
|
||||
assert actual_call is not None
|
||||
actual_path = actual_call[0][0].replace('/', os.path.sep)
|
||||
actual_path = actual_call[0][0].replace("/", os.path.sep)
|
||||
assert actual_path == yaml_path
|
||||
assert actual_call[0][1] == mock_extract.return_value
|
||||
|
||||
@pytest.mark.asyncio
|
||||
# pylint: disable=protected-access
|
||||
async def test_download_images_no_images(self, extractor: AdExtractor) -> None:
|
||||
async def test_download_images_no_images(self, extractor:AdExtractor) -> None:
|
||||
"""Test image download when no images are found."""
|
||||
with patch.object(extractor, 'web_find', new_callable = AsyncMock, side_effect = TimeoutError):
|
||||
with patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError):
|
||||
image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345)
|
||||
assert len(image_paths) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_download_ad(self, extractor: AdExtractor) -> None:
|
||||
async def test_download_ad(self, extractor:AdExtractor) -> None:
|
||||
"""Test downloading an entire ad."""
|
||||
with patch('os.path.exists') as mock_exists, \
|
||||
patch('os.path.isdir') as mock_isdir, \
|
||||
patch('os.makedirs') as mock_makedirs, \
|
||||
patch('os.mkdir') as mock_mkdir, \
|
||||
patch('shutil.rmtree') as mock_rmtree, \
|
||||
patch('kleinanzeigen_bot.extract.dicts.save_dict', autospec = True) as mock_save_dict, \
|
||||
patch.object(extractor, '_extract_ad_page_info', new_callable = AsyncMock) as mock_extract:
|
||||
with patch("os.path.exists") as mock_exists, \
|
||||
patch("os.path.isdir") as mock_isdir, \
|
||||
patch("os.makedirs") as mock_makedirs, \
|
||||
patch("os.mkdir") as mock_mkdir, \
|
||||
patch("shutil.rmtree") as mock_rmtree, \
|
||||
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
||||
patch.object(extractor, "_extract_ad_page_info", new_callable = AsyncMock) as mock_extract:
|
||||
|
||||
base_dir = 'downloaded-ads'
|
||||
ad_dir = os.path.join(base_dir, 'ad_12345')
|
||||
yaml_path = os.path.join(ad_dir, 'ad_12345.yaml')
|
||||
base_dir = "downloaded-ads"
|
||||
ad_dir = os.path.join(base_dir, "ad_12345")
|
||||
yaml_path = os.path.join(ad_dir, "ad_12345.yaml")
|
||||
|
||||
# Configure mocks for directory checks
|
||||
mock_exists.return_value = False
|
||||
@@ -690,6 +690,6 @@ class TestAdExtractorDownload:
|
||||
# Get the actual call arguments
|
||||
actual_call = mock_save_dict.call_args
|
||||
assert actual_call is not None
|
||||
actual_path = actual_call[0][0].replace('/', os.path.sep)
|
||||
actual_path = actual_call[0][0].replace("/", os.path.sep)
|
||||
assert actual_path == yaml_path
|
||||
assert actual_call[0][1] == mock_extract.return_value
|
||||
|
||||
@@ -13,7 +13,7 @@ from kleinanzeigen_bot.utils import i18n
|
||||
("fr_CA", ("fr", "CA", "UTF-8")), # Test with language + region, no encoding
|
||||
("pt_BR.iso8859-1", ("pt", "BR", "ISO8859-1")), # Test with language + region + encoding
|
||||
])
|
||||
def test_detect_locale(monkeypatch: MonkeyPatch, lang: str | None, expected: i18n.Locale) -> None:
|
||||
def test_detect_locale(monkeypatch:MonkeyPatch, lang:str | None, expected:i18n.Locale) -> None:
|
||||
"""
|
||||
Pytest test case to verify detect_system_language() behavior under various LANG values.
|
||||
"""
|
||||
@@ -49,7 +49,7 @@ def test_pluralize(
|
||||
noun:str,
|
||||
count:int,
|
||||
prefix_with_count:bool,
|
||||
expected: str
|
||||
expected:str
|
||||
) -> None:
|
||||
i18n.set_current_locale(i18n.Locale(lang, "US", "UTF_8"))
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -26,12 +26,12 @@ from ruamel.yaml import YAML
|
||||
from kleinanzeigen_bot import resources
|
||||
|
||||
# Messages that are intentionally not translated (internal/debug messages)
|
||||
EXCLUDED_MESSAGES: dict[str, set[str]] = {
|
||||
EXCLUDED_MESSAGES:dict[str, set[str]] = {
|
||||
"kleinanzeigen_bot/__init__.py": {"############################################"}
|
||||
}
|
||||
|
||||
# Special modules that are known to be needed even if not in messages_by_file
|
||||
KNOWN_NEEDED_MODULES = {'getopt.py'}
|
||||
KNOWN_NEEDED_MODULES = {"getopt.py"}
|
||||
|
||||
# Type aliases for better readability
|
||||
ModulePath = str
|
||||
@@ -45,12 +45,12 @@ MissingDict = dict[FunctionName, dict[Message, set[Message]]]
|
||||
@dataclass
|
||||
class MessageLocation:
|
||||
"""Represents the location of a message in the codebase."""
|
||||
module: str
|
||||
function: str
|
||||
message: str
|
||||
module:str
|
||||
function:str
|
||||
message:str
|
||||
|
||||
|
||||
def _get_function_name(node: ast.AST) -> str:
|
||||
def _get_function_name(node:ast.AST) -> str:
|
||||
"""
|
||||
Get the name of the function containing this AST node.
|
||||
This matches i18n.py's behavior which only uses the function name for translation lookups.
|
||||
@@ -63,14 +63,14 @@ def _get_function_name(node: ast.AST) -> str:
|
||||
The function name or "module" for module-level code
|
||||
"""
|
||||
|
||||
def find_parent_context(n: ast.AST) -> tuple[str | None, str | None]:
|
||||
def find_parent_context(n:ast.AST) -> tuple[str | None, str | None]:
|
||||
"""Find the containing class and function names."""
|
||||
class_name = None
|
||||
function_name = None
|
||||
current = n
|
||||
|
||||
while hasattr(current, '_parent'):
|
||||
current = getattr(current, '_parent')
|
||||
while hasattr(current, "_parent"):
|
||||
current = getattr(current, "_parent")
|
||||
if isinstance(current, ast.ClassDef) and not class_name:
|
||||
class_name = current.name
|
||||
elif isinstance(current, ast.FunctionDef) or isinstance(current, ast.AsyncFunctionDef) and not function_name:
|
||||
@@ -84,7 +84,7 @@ def _get_function_name(node: ast.AST) -> str:
|
||||
return "module" # For module-level code
|
||||
|
||||
|
||||
def _extract_log_messages(file_path: str, exclude_debug:bool = False) -> MessageDict:
|
||||
def _extract_log_messages(file_path:str, exclude_debug:bool = False) -> MessageDict:
|
||||
"""
|
||||
Extract all translatable messages from a Python file with their function context.
|
||||
|
||||
@@ -94,27 +94,27 @@ def _extract_log_messages(file_path: str, exclude_debug:bool = False) -> Message
|
||||
Returns:
|
||||
Dictionary mapping function names to their messages
|
||||
"""
|
||||
with open(file_path, 'r', encoding = 'utf-8') as file:
|
||||
with open(file_path, "r", encoding = "utf-8") as file:
|
||||
tree = ast.parse(file.read(), filename = file_path)
|
||||
|
||||
# Add parent references for context tracking
|
||||
for parent in ast.walk(tree):
|
||||
for child in ast.iter_child_nodes(parent):
|
||||
setattr(child, '_parent', parent)
|
||||
setattr(child, "_parent", parent)
|
||||
|
||||
messages: MessageDict = defaultdict(lambda: defaultdict(set))
|
||||
messages:MessageDict = defaultdict(lambda: defaultdict(set))
|
||||
|
||||
def add_message(function: str, msg: str) -> None:
|
||||
def add_message(function:str, msg:str) -> None:
|
||||
"""Add a message to the messages dictionary."""
|
||||
if function not in messages:
|
||||
messages[function] = defaultdict(set)
|
||||
if msg not in messages[function]:
|
||||
messages[function][msg] = {msg}
|
||||
|
||||
def extract_string_value(node: ast.AST) -> str | None:
|
||||
def extract_string_value(node:ast.AST) -> str | None:
|
||||
"""Safely extract string value from an AST node."""
|
||||
if isinstance(node, ast.Constant):
|
||||
value = getattr(node, 'value', None)
|
||||
value = getattr(node, "value", None)
|
||||
return value if isinstance(value, str) else None
|
||||
return None
|
||||
|
||||
@@ -127,24 +127,24 @@ def _extract_log_messages(file_path: str, exclude_debug:bool = False) -> Message
|
||||
# Extract messages from various call types
|
||||
if (isinstance(node.func, ast.Attribute) and
|
||||
isinstance(node.func.value, ast.Name) and
|
||||
node.func.value.id in {'LOG', 'logger', 'logging'} and
|
||||
node.func.attr in {None if exclude_debug else 'debug', 'info', 'warning', 'error', 'exception', 'critical'}):
|
||||
node.func.value.id in {"LOG", "logger", "logging"} and
|
||||
node.func.attr in {None if exclude_debug else "debug", "info", "warning", "error", "exception", "critical"}):
|
||||
if node.args:
|
||||
msg = extract_string_value(node.args[0])
|
||||
if msg:
|
||||
add_message(function_name, msg)
|
||||
|
||||
# Handle gettext calls
|
||||
elif ((isinstance(node.func, ast.Name) and node.func.id == '_') or
|
||||
(isinstance(node.func, ast.Attribute) and node.func.attr == 'gettext')):
|
||||
elif ((isinstance(node.func, ast.Name) and node.func.id == "_") or
|
||||
(isinstance(node.func, ast.Attribute) and node.func.attr == "gettext")):
|
||||
if node.args:
|
||||
msg = extract_string_value(node.args[0])
|
||||
if msg:
|
||||
add_message(function_name, msg)
|
||||
|
||||
# Handle other translatable function calls
|
||||
elif isinstance(node.func, ast.Name) and node.func.id in {'ainput', 'pluralize', 'ensure'}:
|
||||
arg_index = 0 if node.func.id == 'ainput' else 1
|
||||
elif isinstance(node.func, ast.Name) and node.func.id in {"ainput", "pluralize", "ensure"}:
|
||||
arg_index = 0 if node.func.id == "ainput" else 1
|
||||
if len(node.args) > arg_index:
|
||||
msg = extract_string_value(node.args[arg_index])
|
||||
if msg:
|
||||
@@ -162,10 +162,10 @@ def _get_all_log_messages(exclude_debug:bool = False) -> dict[str, MessageDict]:
|
||||
Returns:
|
||||
Dictionary mapping module paths to their function messages
|
||||
"""
|
||||
src_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src', 'kleinanzeigen_bot')
|
||||
src_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src", "kleinanzeigen_bot")
|
||||
print(f"\nScanning for messages in directory: {src_dir}")
|
||||
|
||||
messages_by_file: dict[str, MessageDict] = {
|
||||
messages_by_file:dict[str, MessageDict] = {
|
||||
# Special case for getopt.py which is imported
|
||||
"getopt.py": {
|
||||
"do_longs": {
|
||||
@@ -187,15 +187,15 @@ def _get_all_log_messages(exclude_debug:bool = False) -> dict[str, MessageDict]:
|
||||
|
||||
for root, _, filenames in os.walk(src_dir):
|
||||
for filename in filenames:
|
||||
if filename.endswith('.py'):
|
||||
if filename.endswith(".py"):
|
||||
file_path = os.path.join(root, filename)
|
||||
relative_path = os.path.relpath(file_path, src_dir)
|
||||
if relative_path.startswith('resources/'):
|
||||
if relative_path.startswith("resources/"):
|
||||
continue
|
||||
messages = _extract_log_messages(file_path, exclude_debug)
|
||||
if messages:
|
||||
module_path = os.path.join('kleinanzeigen_bot', relative_path)
|
||||
module_path = module_path.replace(os.sep, '/')
|
||||
module_path = os.path.join("kleinanzeigen_bot", relative_path)
|
||||
module_path = module_path.replace(os.sep, "/")
|
||||
messages_by_file[module_path] = messages
|
||||
|
||||
return messages_by_file
|
||||
@@ -217,7 +217,7 @@ def _get_available_languages() -> list[str]:
|
||||
return sorted(languages)
|
||||
|
||||
|
||||
def _get_translations_for_language(lang: str) -> TranslationDict:
|
||||
def _get_translations_for_language(lang:str) -> TranslationDict:
|
||||
"""
|
||||
Get translations for a specific language from its YAML file.
|
||||
|
||||
@@ -227,7 +227,7 @@ def _get_translations_for_language(lang: str) -> TranslationDict:
|
||||
Returns:
|
||||
Dictionary containing all translations for the language
|
||||
"""
|
||||
yaml = YAML(typ = 'safe')
|
||||
yaml = YAML(typ = "safe")
|
||||
translation_file = f"translations.{lang}.yaml"
|
||||
print(f"Loading translations from {translation_file}")
|
||||
content = files(resources).joinpath(translation_file).read_text()
|
||||
@@ -235,10 +235,10 @@ def _get_translations_for_language(lang: str) -> TranslationDict:
|
||||
return translations
|
||||
|
||||
|
||||
def _find_translation(translations: TranslationDict,
|
||||
module: str,
|
||||
function: str,
|
||||
message: str) -> bool:
|
||||
def _find_translation(translations:TranslationDict,
|
||||
module:str,
|
||||
function:str,
|
||||
message:str) -> bool:
|
||||
"""
|
||||
Check if a translation exists for a given message in the exact location where i18n.py will look.
|
||||
This matches the lookup logic in i18n.py which uses dicts.safe_get().
|
||||
@@ -253,11 +253,11 @@ def _find_translation(translations: TranslationDict,
|
||||
True if translation exists in the correct location, False otherwise
|
||||
"""
|
||||
# Special case for getopt.py
|
||||
if module == 'getopt.py':
|
||||
if module == "getopt.py":
|
||||
return bool(translations.get(module, {}).get(function, {}).get(message))
|
||||
|
||||
# Add kleinanzeigen_bot/ prefix if not present
|
||||
module_path = f'kleinanzeigen_bot/{module}' if not module.startswith('kleinanzeigen_bot/') else module
|
||||
module_path = f'kleinanzeigen_bot/{module}' if not module.startswith("kleinanzeigen_bot/") else module
|
||||
|
||||
# Check if module exists in translations
|
||||
module_trans = translations.get(module_path, {})
|
||||
@@ -277,10 +277,10 @@ def _find_translation(translations: TranslationDict,
|
||||
return has_translation
|
||||
|
||||
|
||||
def _message_exists_in_code(code_messages: dict[str, MessageDict],
|
||||
module: str,
|
||||
function: str,
|
||||
message: str) -> bool:
|
||||
def _message_exists_in_code(code_messages:dict[str, MessageDict],
|
||||
module:str,
|
||||
function:str,
|
||||
message:str) -> bool:
|
||||
"""
|
||||
Check if a message exists in the code at the given location.
|
||||
This is the reverse of _find_translation - it checks if a translation's message
|
||||
@@ -296,11 +296,11 @@ def _message_exists_in_code(code_messages: dict[str, MessageDict],
|
||||
True if message exists in the code, False otherwise
|
||||
"""
|
||||
# Special case for getopt.py
|
||||
if module == 'getopt.py':
|
||||
if module == "getopt.py":
|
||||
return bool(code_messages.get(module, {}).get(function, {}).get(message))
|
||||
|
||||
# Remove kleinanzeigen_bot/ prefix if present for code message lookup
|
||||
module_path = module[len('kleinanzeigen_bot/'):] if module.startswith('kleinanzeigen_bot/') else module
|
||||
module_path = module[len("kleinanzeigen_bot/"):] if module.startswith("kleinanzeigen_bot/") else module
|
||||
module_path = f'kleinanzeigen_bot/{module_path}'
|
||||
|
||||
# Check if module exists in code messages
|
||||
@@ -318,7 +318,7 @@ def _message_exists_in_code(code_messages: dict[str, MessageDict],
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lang", _get_available_languages())
|
||||
def test_all_log_messages_have_translations(lang: str) -> None:
|
||||
def test_all_log_messages_have_translations(lang:str) -> None:
|
||||
"""
|
||||
Test that all translatable messages in the code have translations for each language.
|
||||
|
||||
@@ -345,7 +345,7 @@ def test_all_log_messages_have_translations(lang: str) -> None:
|
||||
def make_inner_dict() -> defaultdict[str, set[str]]:
|
||||
return defaultdict(set)
|
||||
|
||||
by_module: defaultdict[str, defaultdict[str, set[str]]] = defaultdict(make_inner_dict)
|
||||
by_module:defaultdict[str, defaultdict[str, set[str]]] = defaultdict(make_inner_dict)
|
||||
|
||||
for loc in missing_translations:
|
||||
assert isinstance(loc.module, str), "Module must be a string"
|
||||
@@ -364,7 +364,7 @@ def test_all_log_messages_have_translations(lang: str) -> None:
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lang", _get_available_languages())
|
||||
def test_no_obsolete_translations(lang: str) -> None:
|
||||
def test_no_obsolete_translations(lang:str) -> None:
|
||||
"""
|
||||
Test that all translations in each language YAML file are actually used in the code.
|
||||
|
||||
@@ -376,7 +376,7 @@ def test_no_obsolete_translations(lang: str) -> None:
|
||||
"""
|
||||
messages_by_file = _get_all_log_messages(exclude_debug = False)
|
||||
translations = _get_translations_for_language(lang)
|
||||
obsolete_items: list[tuple[str, str, str]] = []
|
||||
obsolete_items:list[tuple[str, str, str]] = []
|
||||
|
||||
for module, module_trans in translations.items():
|
||||
if not isinstance(module_trans, dict):
|
||||
@@ -402,7 +402,7 @@ def test_no_obsolete_translations(lang: str) -> None:
|
||||
obsolete_str = f"\nObsolete translations found for language [{lang}]:\n"
|
||||
|
||||
# Group by module and function for better readability
|
||||
by_module: defaultdict[str, defaultdict[str, list[str]]] = defaultdict(lambda: defaultdict(list))
|
||||
by_module:defaultdict[str, defaultdict[str, list[str]]] = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
for module, function, message in obsolete_items:
|
||||
by_module[module][function].append(message)
|
||||
|
||||
Reference in New Issue
Block a user