mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
refact: apply consistent formatting
This commit is contained in:
2
.github/PULL_REQUEST_TEMPLATE.md
vendored
2
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -20,8 +20,8 @@ Select the type(s) of change(s) included in this pull request:
|
|||||||
Before requesting a review, confirm the following:
|
Before requesting a review, confirm the following:
|
||||||
- [ ] I have reviewed my changes to ensure they meet the project's standards.
|
- [ ] I have reviewed my changes to ensure they meet the project's standards.
|
||||||
- [ ] I have tested my changes and ensured that all tests pass (`pdm run test`).
|
- [ ] I have tested my changes and ensured that all tests pass (`pdm run test`).
|
||||||
|
- [ ] I have formatted the code (`pdm run format`).
|
||||||
- [ ] I have verified that linting passes (`pdm run lint`).
|
- [ ] I have verified that linting passes (`pdm run lint`).
|
||||||
- [ ] I have run security scans and addressed any identified issues (`pdm run audit`).
|
|
||||||
- [ ] I have updated documentation where necessary.
|
- [ ] I have updated documentation where necessary.
|
||||||
|
|
||||||
By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
|
By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ app = "python -m kleinanzeigen_bot"
|
|||||||
compile.cmd = "python -O -m PyInstaller pyinstaller.spec --clean"
|
compile.cmd = "python -O -m PyInstaller pyinstaller.spec --clean"
|
||||||
compile.env = {PYTHONHASHSEED = "1", SOURCE_DATE_EPOCH = "0"} # https://pyinstaller.org/en/stable/advanced-topics.html#creating-a-reproducible-build
|
compile.env = {PYTHONHASHSEED = "1", SOURCE_DATE_EPOCH = "0"} # https://pyinstaller.org/en/stable/advanced-topics.html#creating-a-reproducible-build
|
||||||
debug = "python -m pdb -m kleinanzeigen_bot"
|
debug = "python -m pdb -m kleinanzeigen_bot"
|
||||||
format = "autopep8 --recursive --in-place src tests --verbose"
|
format = {shell = "autopep8 --recursive --in-place scripts src tests --verbose && python scripts/post_autopep8.py scripts src tests" }
|
||||||
lint = {shell = "ruff check && mypy && basedpyright" }
|
lint = {shell = "ruff check && mypy && basedpyright" }
|
||||||
fix = {shell = "ruff check --fix" }
|
fix = {shell = "ruff check --fix" }
|
||||||
test = "python -m pytest --capture=tee-sys -v"
|
test = "python -m pytest --capture=tee-sys -v"
|
||||||
@@ -113,7 +113,7 @@ aggressive = 3
|
|||||||
# https://docs.astral.sh/ruff/configuration/
|
# https://docs.astral.sh/ruff/configuration/
|
||||||
#####################
|
#####################
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
include = ["pyproject.toml", "src/**/*.py", "tests/**/*.py"]
|
include = ["pyproject.toml", "scripts/**/*.py", "src/**/*.py", "tests/**/*.py"]
|
||||||
line-length = 160
|
line-length = 160
|
||||||
indent-width = 4
|
indent-width = 4
|
||||||
target-version = "py310"
|
target-version = "py310"
|
||||||
@@ -208,14 +208,10 @@ ignore = [
|
|||||||
"TC006", # Add quotes to type expression in `typing.cast()`
|
"TC006", # Add quotes to type expression in `typing.cast()`
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.ruff.format]
|
|
||||||
quote-style = "double"
|
|
||||||
indent-style = "space"
|
|
||||||
line-ending = "native"
|
|
||||||
docstring-code-format = false
|
|
||||||
skip-magic-trailing-comma = false
|
|
||||||
|
|
||||||
[tool.ruff.lint.per-file-ignores]
|
[tool.ruff.lint.per-file-ignores]
|
||||||
|
"scripts/**/*.py" = [
|
||||||
|
"INP001", # File `...` is part of an implicit namespace package. Add an `__init__.py`.
|
||||||
|
]
|
||||||
"tests/**/*.py" = [
|
"tests/**/*.py" = [
|
||||||
"ARG",
|
"ARG",
|
||||||
"B",
|
"B",
|
||||||
@@ -247,7 +243,7 @@ max-statements = 150 # max. number of statements in function / method body (R091
|
|||||||
# https://mypy.readthedocs.io/en/stable/config_file.html
|
# https://mypy.readthedocs.io/en/stable/config_file.html
|
||||||
#mypy_path = "$MYPY_CONFIG_FILE_DIR/tests/stubs"
|
#mypy_path = "$MYPY_CONFIG_FILE_DIR/tests/stubs"
|
||||||
python_version = "3.10"
|
python_version = "3.10"
|
||||||
files = "src,tests"
|
files = "scripts,src,tests"
|
||||||
strict = true
|
strict = true
|
||||||
disallow_untyped_calls = false
|
disallow_untyped_calls = false
|
||||||
disallow_untyped_defs = true
|
disallow_untyped_defs = true
|
||||||
@@ -264,7 +260,7 @@ verbosity = 0
|
|||||||
#####################
|
#####################
|
||||||
[tool.basedpyright]
|
[tool.basedpyright]
|
||||||
# https://docs.basedpyright.com/latest/configuration/config-files/
|
# https://docs.basedpyright.com/latest/configuration/config-files/
|
||||||
include = ["src", "tests"]
|
include = ["scripts", "src", "tests"]
|
||||||
defineConstant = { DEBUG = false }
|
defineConstant = { DEBUG = false }
|
||||||
pythonVersion = "3.10"
|
pythonVersion = "3.10"
|
||||||
typeCheckingMode = "standard"
|
typeCheckingMode = "standard"
|
||||||
|
|||||||
317
scripts/post_autopep8.py
Normal file
317
scripts/post_autopep8.py
Normal file
@@ -0,0 +1,317 @@
|
|||||||
|
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
|
import ast, logging, re, sys # isort: skip
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Final, List, Protocol, Tuple
|
||||||
|
|
||||||
|
from typing_extensions import override
|
||||||
|
|
||||||
|
# Configure basic logging
|
||||||
|
logging.basicConfig(level = logging.INFO, format = "%(levelname)s: %(message)s")
|
||||||
|
LOG:Final[logging.Logger] = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class FormatterRule(Protocol):
|
||||||
|
"""
|
||||||
|
A code processor that can modify source lines based on the AST.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class NoSpaceAfterColonInTypeAnnotationRule(FormatterRule):
|
||||||
|
"""
|
||||||
|
Removes whitespace between the colon (:) and the type annotation in variable and function parameter declarations.
|
||||||
|
|
||||||
|
This rule enforces `a:int` instead of `a: int`.
|
||||||
|
It is the opposite behavior of autopep8 rule E231.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
# Before
|
||||||
|
def foo(a: int, b : str) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# After
|
||||||
|
def foo(a:int, b:str) -> None:
|
||||||
|
pass
|
||||||
|
"""
|
||||||
|
|
||||||
|
@override
|
||||||
|
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
|
||||||
|
ann_positions:List[Tuple[int, int]] = []
|
||||||
|
for node in ast.walk(tree):
|
||||||
|
if isinstance(node, ast.arg) and node.annotation is not None:
|
||||||
|
ann_positions.append((node.annotation.lineno - 1, node.annotation.col_offset))
|
||||||
|
elif isinstance(node, ast.AnnAssign) and node.annotation is not None:
|
||||||
|
ann = node.annotation
|
||||||
|
ann_positions.append((ann.lineno - 1, ann.col_offset))
|
||||||
|
|
||||||
|
if not ann_positions:
|
||||||
|
return lines
|
||||||
|
|
||||||
|
new_lines:List[str] = []
|
||||||
|
for idx, line in enumerate(lines):
|
||||||
|
if line.lstrip().startswith("#"):
|
||||||
|
new_lines.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
chars = list(line)
|
||||||
|
offsets = [col for (lin, col) in ann_positions if lin == idx]
|
||||||
|
for col in sorted(offsets, reverse = True):
|
||||||
|
prefix = "".join(chars[:col])
|
||||||
|
colon_idx = prefix.rfind(":")
|
||||||
|
if colon_idx == -1:
|
||||||
|
continue
|
||||||
|
j = colon_idx + 1
|
||||||
|
while j < len(chars) and chars[j].isspace():
|
||||||
|
del chars[j]
|
||||||
|
new_lines.append("".join(chars))
|
||||||
|
|
||||||
|
return new_lines
|
||||||
|
|
||||||
|
|
||||||
|
class EqualSignSpacingInDefaultsAndNamedArgsRule(FormatterRule):
|
||||||
|
"""
|
||||||
|
Ensures that the '=' sign in default values for function parameters and keyword arguments in function calls
|
||||||
|
is surrounded by exactly one space on each side.
|
||||||
|
|
||||||
|
This rule enforces `a:int = 3` instead of `a:int=3`, and `x = 42` instead of `x=42` or `x =42`.
|
||||||
|
It is the opposite behavior of autopep8 rule E251.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
# Before
|
||||||
|
def foo(a:int=3, b :str= "bar"):
|
||||||
|
pass
|
||||||
|
|
||||||
|
foo(x=42,y = "hello")
|
||||||
|
|
||||||
|
# After
|
||||||
|
def foo(a:int = 3, b:str = "bar"):
|
||||||
|
pass
|
||||||
|
|
||||||
|
foo(x = 42, y = "hello")
|
||||||
|
"""
|
||||||
|
|
||||||
|
@override
|
||||||
|
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
|
||||||
|
equals_positions:List[Tuple[int, int]] = []
|
||||||
|
for node in ast.walk(tree):
|
||||||
|
# --- Defaults in function definitions, async defs & lambdas ---
|
||||||
|
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.Lambda)):
|
||||||
|
# positional defaults
|
||||||
|
equals_positions.extend(
|
||||||
|
(d.lineno - 1, d.col_offset)
|
||||||
|
for d in node.args.defaults
|
||||||
|
if d is not None
|
||||||
|
)
|
||||||
|
# keyword-only defaults (only on defs, not lambdas)
|
||||||
|
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||||
|
equals_positions.extend(
|
||||||
|
(d.lineno - 1, d.col_offset)
|
||||||
|
for d in node.args.kw_defaults
|
||||||
|
if d is not None
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Keyword arguments in calls ---
|
||||||
|
if isinstance(node, ast.Call):
|
||||||
|
equals_positions.extend(
|
||||||
|
(kw.value.lineno - 1, kw.value.col_offset)
|
||||||
|
for kw in node.keywords
|
||||||
|
if kw.arg is not None
|
||||||
|
)
|
||||||
|
|
||||||
|
if not equals_positions:
|
||||||
|
return lines
|
||||||
|
|
||||||
|
new_lines:List[str] = []
|
||||||
|
for line_idx, line in enumerate(lines):
|
||||||
|
if line.lstrip().startswith("#"):
|
||||||
|
new_lines.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
chars = list(line)
|
||||||
|
equals_offsets = [col for (lineno, col) in equals_positions if lineno == line_idx]
|
||||||
|
for col in sorted(equals_offsets, reverse = True):
|
||||||
|
prefix = "".join(chars[:col])
|
||||||
|
equal_sign_idx = prefix.rfind("=")
|
||||||
|
if equal_sign_idx == -1:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# remove spaces before '='
|
||||||
|
left_index = equal_sign_idx - 1
|
||||||
|
while left_index >= 0 and chars[left_index].isspace():
|
||||||
|
del chars[left_index]
|
||||||
|
equal_sign_idx -= 1
|
||||||
|
left_index -= 1
|
||||||
|
|
||||||
|
# remove spaces after '='
|
||||||
|
right_index = equal_sign_idx + 1
|
||||||
|
while right_index < len(chars) and chars[right_index].isspace():
|
||||||
|
del chars[right_index]
|
||||||
|
|
||||||
|
# insert single spaces
|
||||||
|
chars.insert(equal_sign_idx, " ")
|
||||||
|
chars.insert(equal_sign_idx + 2, " ")
|
||||||
|
new_lines.append("".join(chars))
|
||||||
|
|
||||||
|
return new_lines
|
||||||
|
|
||||||
|
|
||||||
|
class PreferDoubleQuotesRule(FormatterRule):
|
||||||
|
"""
|
||||||
|
Ensures string literals use double quotes unless the content contains a double quote.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
# Before
|
||||||
|
foo = 'hello'
|
||||||
|
bar = 'a "quote" inside'
|
||||||
|
|
||||||
|
# After
|
||||||
|
foo = "hello"
|
||||||
|
bar = 'a "quote" inside' # kept as-is, because it contains a double quote
|
||||||
|
"""
|
||||||
|
|
||||||
|
@override
|
||||||
|
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
|
||||||
|
new_lines = lines.copy()
|
||||||
|
|
||||||
|
# Track how much each line has shifted so far
|
||||||
|
line_shifts:dict[int, int] = dict.fromkeys(range(len(lines)), 0)
|
||||||
|
|
||||||
|
# Build a parent map for f-string detection
|
||||||
|
parent_map:dict[ast.AST, ast.AST] = {}
|
||||||
|
for parent in ast.walk(tree):
|
||||||
|
for child in ast.iter_child_nodes(parent):
|
||||||
|
parent_map[child] = parent
|
||||||
|
|
||||||
|
def is_in_fstring(node:ast.AST) -> bool:
|
||||||
|
p = parent_map.get(node)
|
||||||
|
while p:
|
||||||
|
if isinstance(p, ast.JoinedStr):
|
||||||
|
return True
|
||||||
|
p = parent_map.get(p)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Regex to locate a single- or triple-quoted literal:
|
||||||
|
# (?P<prefix>[rRbuUfF]*) optional string flags (r, b, u, f, etc.), case-insensitive
|
||||||
|
# (?P<quote>'{3}|') the opening delimiter: either three single-quotes (''') or one ('),
|
||||||
|
# but never two in a row (so we won't mis-interpret adjacent quotes)
|
||||||
|
# (?P<content>.*?) the literal's content, non-greedy up to the next same delimiter
|
||||||
|
# (?P=quote) the matching closing delimiter (same length as the opener)
|
||||||
|
literal_re = re.compile(
|
||||||
|
r"(?P<prefix>[rRbuUfF]*)(?P<quote>'{3}|')(?P<content>.*?)(?P=quote)",
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
|
|
||||||
|
for node in ast.walk(tree):
|
||||||
|
# only handle simple string constants
|
||||||
|
if not (isinstance(node, ast.Constant) and isinstance(node.value, str)):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# skip anything inside an f-string, at any depth
|
||||||
|
if is_in_fstring(node):
|
||||||
|
continue
|
||||||
|
|
||||||
|
starting_line_number = getattr(node, "lineno", None)
|
||||||
|
starting_col_offset = getattr(node, "col_offset", None)
|
||||||
|
if starting_line_number is None or starting_col_offset is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
start_line = starting_line_number - 1
|
||||||
|
shift = line_shifts[start_line]
|
||||||
|
raw = new_lines[start_line]
|
||||||
|
# apply shift so we match against current edited line
|
||||||
|
idx = starting_col_offset + shift
|
||||||
|
if idx >= len(raw) or raw[idx] not in ("'", "r", "u", "b", "f", "R", "U", "B", "F"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# match literal at that column
|
||||||
|
m = literal_re.match(raw[idx:])
|
||||||
|
if not m:
|
||||||
|
continue
|
||||||
|
|
||||||
|
prefix = m.group("prefix")
|
||||||
|
quote = m.group("quote") # either "'" or "'''"
|
||||||
|
content = m.group("content") # what's inside
|
||||||
|
|
||||||
|
# skip if content has a double-quote already
|
||||||
|
if '"' in content:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# build new literal with the same prefix, but double‐quote delimiter
|
||||||
|
delim = '"' * len(quote)
|
||||||
|
escaped = content.replace(delim, "\\" + delim)
|
||||||
|
new_literal = f"{prefix}{delim}{escaped}{delim}"
|
||||||
|
|
||||||
|
literal_len = m.end() # how many chars we're replacing
|
||||||
|
before = raw[:idx]
|
||||||
|
after = raw[idx + literal_len:]
|
||||||
|
new_lines[start_line] = before + new_literal + after
|
||||||
|
|
||||||
|
# record shift delta for any further edits on this line
|
||||||
|
line_shifts[start_line] += len(new_literal) - literal_len
|
||||||
|
|
||||||
|
return new_lines
|
||||||
|
|
||||||
|
|
||||||
|
FORMATTER_RULES:List[FormatterRule] = [
|
||||||
|
NoSpaceAfterColonInTypeAnnotationRule(),
|
||||||
|
EqualSignSpacingInDefaultsAndNamedArgsRule(),
|
||||||
|
PreferDoubleQuotesRule(),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def format_file(path:Path) -> None:
|
||||||
|
# Read without newline conversion
|
||||||
|
with path.open("r", encoding = "utf-8", newline = "") as rf:
|
||||||
|
original_text = rf.read()
|
||||||
|
|
||||||
|
# Initial parse
|
||||||
|
try:
|
||||||
|
tree = ast.parse(original_text)
|
||||||
|
except SyntaxError as e:
|
||||||
|
LOG.error(
|
||||||
|
"Syntax error parsing %s[%d:%d]: %r -> %s",
|
||||||
|
path, e.lineno, e.offset, (e.text or "").rstrip(), e.msg
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
lines = original_text.splitlines(keepends = True)
|
||||||
|
formatted_text = original_text
|
||||||
|
success = True
|
||||||
|
for rule in FORMATTER_RULES:
|
||||||
|
lines = rule.apply(tree, lines, path)
|
||||||
|
formatted_text = "".join(lines)
|
||||||
|
|
||||||
|
# Re-parse the updated text
|
||||||
|
try:
|
||||||
|
tree = ast.parse(formatted_text)
|
||||||
|
except SyntaxError as e:
|
||||||
|
LOG.error(
|
||||||
|
"Syntax error after %s at %s[%d:%d]: %r -> %s",
|
||||||
|
rule.__class__.__name__, path, e.lineno, e.offset, (e.text or "").rstrip(), e.msg
|
||||||
|
)
|
||||||
|
success = False
|
||||||
|
break
|
||||||
|
|
||||||
|
if success and formatted_text != original_text:
|
||||||
|
with path.open("w", encoding = "utf-8", newline = "") as wf:
|
||||||
|
wf.write(formatted_text)
|
||||||
|
LOG.info("Formatted [%s].", path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if len(sys.argv) < 2: # noqa: PLR2004 Magic value used in comparison
|
||||||
|
script_path = Path(sys.argv[0])
|
||||||
|
print(f"Usage: python {script_path} <directory1> [<directory2> ...]")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
for dir_arg in sys.argv[1:]:
|
||||||
|
root = Path(dir_arg)
|
||||||
|
if not root.exists():
|
||||||
|
LOG.warning("Directory [%s] does not exist, skipping...", root)
|
||||||
|
continue
|
||||||
|
for py_file in root.rglob("*.py"):
|
||||||
|
format_file(py_file)
|
||||||
@@ -83,11 +83,11 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
self.configure_file_logging()
|
self.configure_file_logging()
|
||||||
self.load_config()
|
self.load_config()
|
||||||
|
|
||||||
if not (self.ads_selector in {'all', 'new', 'due', 'changed'} or
|
if not (self.ads_selector in {"all", "new", "due", "changed"} or
|
||||||
any(selector in self.ads_selector.split(',') for selector in ('all', 'new', 'due', 'changed')) or
|
any(selector in self.ads_selector.split(",") for selector in ("all", "new", "due", "changed")) or
|
||||||
re.compile(r'\d+[,\d+]*').search(self.ads_selector)):
|
re.compile(r"\d+[,\d+]*").search(self.ads_selector)):
|
||||||
LOG.warning('You provided no ads selector. Defaulting to "due".')
|
LOG.warning('You provided no ads selector. Defaulting to "due".')
|
||||||
self.ads_selector = 'due'
|
self.ads_selector = "due"
|
||||||
|
|
||||||
if ads := self.load_ads():
|
if ads := self.load_ads():
|
||||||
await self.create_browser_session()
|
await self.create_browser_session()
|
||||||
@@ -111,9 +111,9 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
case "download":
|
case "download":
|
||||||
self.configure_file_logging()
|
self.configure_file_logging()
|
||||||
# ad IDs depends on selector
|
# ad IDs depends on selector
|
||||||
if not (self.ads_selector in {'all', 'new'} or re.compile(r'\d+[,\d+]*').search(self.ads_selector)):
|
if not (self.ads_selector in {"all", "new"} or re.compile(r"\d+[,\d+]*").search(self.ads_selector)):
|
||||||
LOG.warning('You provided no ads selector. Defaulting to "new".')
|
LOG.warning('You provided no ads selector. Defaulting to "new".')
|
||||||
self.ads_selector = 'new'
|
self.ads_selector = "new"
|
||||||
self.load_config()
|
self.load_config()
|
||||||
await self.create_browser_session()
|
await self.create_browser_session()
|
||||||
await self.login()
|
await self.login()
|
||||||
@@ -265,7 +265,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
LOG.info("App version: %s", self.get_version())
|
LOG.info("App version: %s", self.get_version())
|
||||||
LOG.info("Python version: %s", sys.version)
|
LOG.info("Python version: %s", sys.version)
|
||||||
|
|
||||||
def __check_ad_republication(self, ad_cfg: dict[str, Any], ad_file_relative: str) -> bool:
|
def __check_ad_republication(self, ad_cfg:dict[str, Any], ad_file_relative:str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if an ad needs to be republished based on republication interval.
|
Check if an ad needs to be republished based on republication interval.
|
||||||
Returns True if the ad should be republished based on the interval.
|
Returns True if the ad should be republished based on the interval.
|
||||||
@@ -295,7 +295,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def __check_ad_changed(self, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any], ad_file_relative: str) -> bool:
|
def __check_ad_changed(self, ad_cfg:dict[str, Any], ad_cfg_orig:dict[str, Any], ad_file_relative:str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if an ad has been changed since last publication.
|
Check if an ad has been changed since last publication.
|
||||||
Returns True if the ad has been changed.
|
Returns True if the ad has been changed.
|
||||||
@@ -327,7 +327,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
data_root_dir = os.path.dirname(self.config_file_path)
|
data_root_dir = os.path.dirname(self.config_file_path)
|
||||||
for file_pattern in self.config["ad_files"]:
|
for file_pattern in self.config["ad_files"]:
|
||||||
for ad_file in glob.glob(file_pattern, root_dir = data_root_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB):
|
for ad_file in glob.glob(file_pattern, root_dir = data_root_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB):
|
||||||
if not str(ad_file).endswith('ad_fields.yaml'):
|
if not str(ad_file).endswith("ad_fields.yaml"):
|
||||||
ad_files[abspath(ad_file, relative_to = data_root_dir)] = ad_file
|
ad_files[abspath(ad_file, relative_to = data_root_dir)] = ad_file
|
||||||
LOG.info(" -> found %s", pluralize("ad config file", ad_files))
|
LOG.info(" -> found %s", pluralize("ad config file", ad_files))
|
||||||
if not ad_files:
|
if not ad_files:
|
||||||
@@ -335,13 +335,13 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
|
|
||||||
ids = []
|
ids = []
|
||||||
use_specific_ads = False
|
use_specific_ads = False
|
||||||
selectors = self.ads_selector.split(',')
|
selectors = self.ads_selector.split(",")
|
||||||
|
|
||||||
if re.compile(r'\d+[,\d+]*').search(self.ads_selector):
|
if re.compile(r"\d+[,\d+]*").search(self.ads_selector):
|
||||||
ids = [int(n) for n in self.ads_selector.split(',')]
|
ids = [int(n) for n in self.ads_selector.split(",")]
|
||||||
use_specific_ads = True
|
use_specific_ads = True
|
||||||
LOG.info('Start fetch task for the ad(s) with id(s):')
|
LOG.info("Start fetch task for the ad(s) with id(s):")
|
||||||
LOG.info(' | '.join([str(id_) for id_ in ids]))
|
LOG.info(" | ".join([str(id_) for id_ in ids]))
|
||||||
|
|
||||||
ad_fields = dicts.load_dict_from_module(resources, "ad_fields.yaml")
|
ad_fields = dicts.load_dict_from_module(resources, "ad_fields.yaml")
|
||||||
ads = []
|
ads = []
|
||||||
@@ -548,7 +548,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
async def is_logged_in(self) -> bool:
|
async def is_logged_in(self) -> bool:
|
||||||
try:
|
try:
|
||||||
user_info = await self.web_text(By.CLASS_NAME, "mr-medium")
|
user_info = await self.web_text(By.CLASS_NAME, "mr-medium")
|
||||||
if self.config['login']['username'].lower() in user_info.lower():
|
if self.config["login"]["username"].lower() in user_info.lower():
|
||||||
return True
|
return True
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
return False
|
return False
|
||||||
@@ -570,7 +570,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
LOG.info("DONE: Deleted %s", pluralize("ad", count))
|
LOG.info("DONE: Deleted %s", pluralize("ad", count))
|
||||||
LOG.info("############################################")
|
LOG.info("############################################")
|
||||||
|
|
||||||
async def delete_ad(self, ad_cfg: dict[str, Any], published_ads: list[dict[str, Any]], *, delete_old_ads_by_title: bool) -> bool:
|
async def delete_ad(self, ad_cfg:dict[str, Any], published_ads:list[dict[str, Any]], *, delete_old_ads_by_title:bool) -> bool:
|
||||||
LOG.info("Deleting ad '%s' if already present...", ad_cfg["title"])
|
LOG.info("Deleting ad '%s' if already present...", ad_cfg["title"])
|
||||||
|
|
||||||
await self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
|
await self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
|
||||||
@@ -627,7 +627,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
LOG.info("DONE: (Re-)published %s", pluralize("ad", count))
|
LOG.info("DONE: (Re-)published %s", pluralize("ad", count))
|
||||||
LOG.info("############################################")
|
LOG.info("############################################")
|
||||||
|
|
||||||
async def publish_ad(self, ad_file:str, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any], published_ads: list[dict[str, Any]]) -> None:
|
async def publish_ad(self, ad_file:str, ad_cfg:dict[str, Any], ad_cfg_orig:dict[str, Any], published_ads:list[dict[str, Any]]) -> None:
|
||||||
"""
|
"""
|
||||||
@param ad_cfg: the effective ad config (i.e. with default values applied etc.)
|
@param ad_cfg: the effective ad config (i.e. with default values applied etc.)
|
||||||
@param ad_cfg_orig: the ad config as present in the YAML file
|
@param ad_cfg_orig: the ad config as present in the YAML file
|
||||||
@@ -657,7 +657,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
#############################
|
#############################
|
||||||
# set category
|
# set category
|
||||||
#############################
|
#############################
|
||||||
await self.__set_category(ad_cfg['category'], ad_file)
|
await self.__set_category(ad_cfg["category"], ad_file)
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# set special attributes
|
# set special attributes
|
||||||
@@ -674,7 +674,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
try:
|
try:
|
||||||
await self.web_select(By.XPATH, "//select[contains(@id, '.versand_s')]", shipping_value)
|
await self.web_select(By.XPATH, "//select[contains(@id, '.versand_s')]", shipping_value)
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
LOG.warning("Failed to set shipping attribute for type '%s'!", ad_cfg['shipping_type'])
|
LOG.warning("Failed to set shipping attribute for type '%s'!", ad_cfg["shipping_type"])
|
||||||
else:
|
else:
|
||||||
await self.__set_shipping(ad_cfg)
|
await self.__set_shipping(ad_cfg)
|
||||||
|
|
||||||
@@ -698,9 +698,9 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
if ad_cfg["shipping_type"] == "SHIPPING":
|
if ad_cfg["shipping_type"] == "SHIPPING":
|
||||||
if sell_directly and ad_cfg["shipping_options"] and price_type in {"FIXED", "NEGOTIABLE"}:
|
if sell_directly and ad_cfg["shipping_options"] and price_type in {"FIXED", "NEGOTIABLE"}:
|
||||||
if not await self.web_check(By.ID, "radio-buy-now-yes", Is.SELECTED):
|
if not await self.web_check(By.ID, "radio-buy-now-yes", Is.SELECTED):
|
||||||
await self.web_click(By.ID, 'radio-buy-now-yes')
|
await self.web_click(By.ID, "radio-buy-now-yes")
|
||||||
elif not await self.web_check(By.ID, "radio-buy-now-no", Is.SELECTED):
|
elif not await self.web_check(By.ID, "radio-buy-now-no", Is.SELECTED):
|
||||||
await self.web_click(By.ID, 'radio-buy-now-no')
|
await self.web_click(By.ID, "radio-buy-now-no")
|
||||||
except TimeoutError as ex:
|
except TimeoutError as ex:
|
||||||
LOG.debug(ex, exc_info = True)
|
LOG.debug(ex, exc_info = True)
|
||||||
|
|
||||||
@@ -832,7 +832,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
|
|
||||||
dicts.save_dict(ad_file, ad_cfg_orig)
|
dicts.save_dict(ad_file, ad_cfg_orig)
|
||||||
|
|
||||||
async def __set_condition(self, condition_value: str) -> None:
|
async def __set_condition(self, condition_value:str) -> None:
|
||||||
condition_mapping = {
|
condition_mapping = {
|
||||||
"new_with_tag": "Neu mit Etikett",
|
"new_with_tag": "Neu mit Etikett",
|
||||||
"new": "Neu",
|
"new": "Neu",
|
||||||
@@ -862,7 +862,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
except TimeoutError as ex:
|
except TimeoutError as ex:
|
||||||
raise TimeoutError(_("Unable to close condition dialog!")) from ex
|
raise TimeoutError(_("Unable to close condition dialog!")) from ex
|
||||||
|
|
||||||
async def __set_category(self, category: str | None, ad_file:str) -> None:
|
async def __set_category(self, category:str | None, ad_file:str) -> None:
|
||||||
# click on something to trigger automatic category detection
|
# click on something to trigger automatic category detection
|
||||||
await self.web_click(By.ID, "pstad-descrptn")
|
await self.web_click(By.ID, "pstad-descrptn")
|
||||||
|
|
||||||
@@ -884,9 +884,9 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
else:
|
else:
|
||||||
ensure(is_category_auto_selected, f"No category specified in [{ad_file}] and automatic category detection failed")
|
ensure(is_category_auto_selected, f"No category specified in [{ad_file}] and automatic category detection failed")
|
||||||
|
|
||||||
async def __set_special_attributes(self, ad_cfg: dict[str, Any]) -> None:
|
async def __set_special_attributes(self, ad_cfg:dict[str, Any]) -> None:
|
||||||
if ad_cfg["special_attributes"]:
|
if ad_cfg["special_attributes"]:
|
||||||
LOG.debug('Found %i special attributes', len(ad_cfg["special_attributes"]))
|
LOG.debug("Found %i special attributes", len(ad_cfg["special_attributes"]))
|
||||||
for special_attribute_key, special_attribute_value in ad_cfg["special_attributes"].items():
|
for special_attribute_key, special_attribute_value in ad_cfg["special_attributes"].items():
|
||||||
|
|
||||||
if special_attribute_key == "condition_s":
|
if special_attribute_key == "condition_s":
|
||||||
@@ -911,10 +911,10 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
elem_id = special_attr_elem.attrs.id
|
elem_id = special_attr_elem.attrs.id
|
||||||
if special_attr_elem.local_name == 'select':
|
if special_attr_elem.local_name == "select":
|
||||||
LOG.debug("Attribute field '%s' seems to be a select...", special_attribute_key)
|
LOG.debug("Attribute field '%s' seems to be a select...", special_attribute_key)
|
||||||
await self.web_select(By.ID, elem_id, special_attribute_value)
|
await self.web_select(By.ID, elem_id, special_attribute_value)
|
||||||
elif special_attr_elem.attrs.type == 'checkbox':
|
elif special_attr_elem.attrs.type == "checkbox":
|
||||||
LOG.debug("Attribute field '%s' seems to be a checkbox...", special_attribute_key)
|
LOG.debug("Attribute field '%s' seems to be a checkbox...", special_attribute_key)
|
||||||
await self.web_click(By.ID, elem_id)
|
await self.web_click(By.ID, elem_id)
|
||||||
else:
|
else:
|
||||||
@@ -925,7 +925,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
raise TimeoutError(f"Failed to set special attribute [{special_attribute_key}]") from ex
|
raise TimeoutError(f"Failed to set special attribute [{special_attribute_key}]") from ex
|
||||||
LOG.debug("Successfully set attribute field [%s] to [%s]...", special_attribute_key, special_attribute_value)
|
LOG.debug("Successfully set attribute field [%s] to [%s]...", special_attribute_key, special_attribute_value)
|
||||||
|
|
||||||
async def __set_shipping(self, ad_cfg: dict[str, Any]) -> None:
|
async def __set_shipping(self, ad_cfg:dict[str, Any]) -> None:
|
||||||
if ad_cfg["shipping_type"] == "PICKUP":
|
if ad_cfg["shipping_type"] == "PICKUP":
|
||||||
try:
|
try:
|
||||||
await self.web_click(By.XPATH,
|
await self.web_click(By.XPATH,
|
||||||
@@ -960,7 +960,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
LOG.debug(ex, exc_info = True)
|
LOG.debug(ex, exc_info = True)
|
||||||
raise TimeoutError(_("Unable to close shipping dialog!")) from ex
|
raise TimeoutError(_("Unable to close shipping dialog!")) from ex
|
||||||
|
|
||||||
async def __set_shipping_options(self, ad_cfg: dict[str, Any]) -> None:
|
async def __set_shipping_options(self, ad_cfg:dict[str, Any]) -> None:
|
||||||
shipping_options_mapping = {
|
shipping_options_mapping = {
|
||||||
"DHL_2": ("Klein", "Paket 2 kg"),
|
"DHL_2": ("Klein", "Paket 2 kg"),
|
||||||
"Hermes_Päckchen": ("Klein", "Päckchen"),
|
"Hermes_Päckchen": ("Klein", "Päckchen"),
|
||||||
@@ -980,7 +980,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
except KeyError as ex:
|
except KeyError as ex:
|
||||||
raise KeyError(f"Unknown shipping option(s), please refer to the documentation/README: {ad_cfg['shipping_options']}") from ex
|
raise KeyError(f"Unknown shipping option(s), please refer to the documentation/README: {ad_cfg['shipping_options']}") from ex
|
||||||
|
|
||||||
shipping_sizes, shipping_packages = zip(*mapped_shipping_options, strict=False)
|
shipping_sizes, shipping_packages = zip(*mapped_shipping_options, strict = False)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
shipping_size, = set(shipping_sizes)
|
shipping_size, = set(shipping_sizes)
|
||||||
@@ -1025,7 +1025,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
except TimeoutError as ex:
|
except TimeoutError as ex:
|
||||||
raise TimeoutError(_("Unable to close shipping dialog!")) from ex
|
raise TimeoutError(_("Unable to close shipping dialog!")) from ex
|
||||||
|
|
||||||
async def __upload_images(self, ad_cfg: dict[str, Any]) -> None:
|
async def __upload_images(self, ad_cfg:dict[str, Any]) -> None:
|
||||||
LOG.info(" -> found %s", pluralize("image", ad_cfg["images"]))
|
LOG.info(" -> found %s", pluralize("image", ad_cfg["images"]))
|
||||||
image_upload:Element = await self.web_find(By.CSS_SELECTOR, "input[type=file]")
|
image_upload:Element = await self.web_find(By.CSS_SELECTOR, "input[type=file]")
|
||||||
|
|
||||||
@@ -1036,7 +1036,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
|
|
||||||
async def assert_free_ad_limit_not_reached(self) -> None:
|
async def assert_free_ad_limit_not_reached(self) -> None:
|
||||||
try:
|
try:
|
||||||
await self.web_find(By.XPATH, '/html/body/div[1]/form/fieldset[6]/div[1]/header', timeout = 2)
|
await self.web_find(By.XPATH, "/html/body/div[1]/form/fieldset[6]/div[1]/header", timeout = 2)
|
||||||
raise AssertionError(f"Cannot publish more ads. The monthly limit of free ads of account {self.config['login']['username']} is reached.")
|
raise AssertionError(f"Cannot publish more ads. The monthly limit of free ads of account {self.config['login']['username']} is reached.")
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
pass
|
pass
|
||||||
@@ -1050,13 +1050,13 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
ad_extractor = extract.AdExtractor(self.browser, self.config)
|
ad_extractor = extract.AdExtractor(self.browser, self.config)
|
||||||
|
|
||||||
# use relevant download routine
|
# use relevant download routine
|
||||||
if self.ads_selector in {'all', 'new'}: # explore ads overview for these two modes
|
if self.ads_selector in {"all", "new"}: # explore ads overview for these two modes
|
||||||
LOG.info('Scanning your ad overview...')
|
LOG.info("Scanning your ad overview...")
|
||||||
own_ad_urls = await ad_extractor.extract_own_ads_urls()
|
own_ad_urls = await ad_extractor.extract_own_ads_urls()
|
||||||
LOG.info('%s found.', pluralize("ad", len(own_ad_urls)))
|
LOG.info("%s found.", pluralize("ad", len(own_ad_urls)))
|
||||||
|
|
||||||
if self.ads_selector == 'all': # download all of your adds
|
if self.ads_selector == "all": # download all of your adds
|
||||||
LOG.info('Starting download of all ads...')
|
LOG.info("Starting download of all ads...")
|
||||||
|
|
||||||
success_count = 0
|
success_count = 0
|
||||||
# call download function for each ad page
|
# call download function for each ad page
|
||||||
@@ -1067,12 +1067,12 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
success_count += 1
|
success_count += 1
|
||||||
LOG.info("%d of %d ads were downloaded from your profile.", success_count, len(own_ad_urls))
|
LOG.info("%d of %d ads were downloaded from your profile.", success_count, len(own_ad_urls))
|
||||||
|
|
||||||
elif self.ads_selector == 'new': # download only unsaved ads
|
elif self.ads_selector == "new": # download only unsaved ads
|
||||||
# check which ads already saved
|
# check which ads already saved
|
||||||
saved_ad_ids = []
|
saved_ad_ids = []
|
||||||
ads = self.load_ads(ignore_inactive = False, check_id = False) # do not skip because of existing IDs
|
ads = self.load_ads(ignore_inactive = False, check_id = False) # do not skip because of existing IDs
|
||||||
for ad in ads:
|
for ad in ads:
|
||||||
ad_id = int(ad[2]['id'])
|
ad_id = int(ad[2]["id"])
|
||||||
saved_ad_ids.append(ad_id)
|
saved_ad_ids.append(ad_id)
|
||||||
|
|
||||||
# determine ad IDs from links
|
# determine ad IDs from links
|
||||||
@@ -1083,28 +1083,28 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
for ad_url, ad_id in ad_id_by_url.items():
|
for ad_url, ad_id in ad_id_by_url.items():
|
||||||
# check if ad with ID already saved
|
# check if ad with ID already saved
|
||||||
if ad_id in saved_ad_ids:
|
if ad_id in saved_ad_ids:
|
||||||
LOG.info('The ad with id %d has already been saved.', ad_id)
|
LOG.info("The ad with id %d has already been saved.", ad_id)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if await ad_extractor.naviagte_to_ad_page(ad_url):
|
if await ad_extractor.naviagte_to_ad_page(ad_url):
|
||||||
await ad_extractor.download_ad(ad_id)
|
await ad_extractor.download_ad(ad_id)
|
||||||
new_count += 1
|
new_count += 1
|
||||||
LOG.info('%s were downloaded from your profile.', pluralize("new ad", new_count))
|
LOG.info("%s were downloaded from your profile.", pluralize("new ad", new_count))
|
||||||
|
|
||||||
elif re.compile(r'\d+[,\d+]*').search(self.ads_selector): # download ad(s) with specific id(s)
|
elif re.compile(r"\d+[,\d+]*").search(self.ads_selector): # download ad(s) with specific id(s)
|
||||||
ids = [int(n) for n in self.ads_selector.split(',')]
|
ids = [int(n) for n in self.ads_selector.split(",")]
|
||||||
LOG.info('Starting download of ad(s) with the id(s):')
|
LOG.info("Starting download of ad(s) with the id(s):")
|
||||||
LOG.info(' | '.join([str(ad_id) for ad_id in ids]))
|
LOG.info(" | ".join([str(ad_id) for ad_id in ids]))
|
||||||
|
|
||||||
for ad_id in ids: # call download routine for every id
|
for ad_id in ids: # call download routine for every id
|
||||||
exists = await ad_extractor.naviagte_to_ad_page(ad_id)
|
exists = await ad_extractor.naviagte_to_ad_page(ad_id)
|
||||||
if exists:
|
if exists:
|
||||||
await ad_extractor.download_ad(ad_id)
|
await ad_extractor.download_ad(ad_id)
|
||||||
LOG.info('Downloaded ad with id %d', ad_id)
|
LOG.info("Downloaded ad with id %d", ad_id)
|
||||||
else:
|
else:
|
||||||
LOG.error('The page with the id %d does not exist!', ad_id)
|
LOG.error("The page with the id %d does not exist!", ad_id)
|
||||||
|
|
||||||
def __get_description_with_affixes(self, ad_cfg: dict[str, Any]) -> str:
|
def __get_description_with_affixes(self, ad_cfg:dict[str, Any]) -> str:
|
||||||
"""Get the complete description with prefix and suffix applied.
|
"""Get the complete description with prefix and suffix applied.
|
||||||
|
|
||||||
Precedence (highest to lowest):
|
Precedence (highest to lowest):
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from .utils import dicts
|
|||||||
MAX_DESCRIPTION_LENGTH:Final[int] = 4000
|
MAX_DESCRIPTION_LENGTH:Final[int] = 4000
|
||||||
|
|
||||||
|
|
||||||
def calculate_content_hash(ad_cfg: dict[str, Any]) -> str:
|
def calculate_content_hash(ad_cfg:dict[str, Any]) -> str:
|
||||||
"""Calculate a hash for user-modifiable fields of the ad."""
|
"""Calculate a hash for user-modifiable fields of the ad."""
|
||||||
|
|
||||||
# Relevant fields for the hash
|
# Relevant fields for the hash
|
||||||
@@ -40,7 +40,7 @@ def calculate_content_hash(ad_cfg: dict[str, Any]) -> str:
|
|||||||
return hashlib.sha256(content_str.encode()).hexdigest()
|
return hashlib.sha256(content_str.encode()).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
def get_description_affixes(config: dict[str, Any], *, prefix: bool = True) -> str:
|
def get_description_affixes(config:dict[str, Any], *, prefix:bool = True) -> str:
|
||||||
"""Get prefix or suffix for description with proper precedence.
|
"""Get prefix or suffix for description with proper precedence.
|
||||||
|
|
||||||
This function handles both the new flattened format and legacy nested format:
|
This function handles both the new flattened format and legacy nested format:
|
||||||
|
|||||||
@@ -36,22 +36,22 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# create sub-directory for ad(s) to download (if necessary):
|
# create sub-directory for ad(s) to download (if necessary):
|
||||||
relative_directory = 'downloaded-ads'
|
relative_directory = "downloaded-ads"
|
||||||
# make sure configured base directory exists
|
# make sure configured base directory exists
|
||||||
if not os.path.exists(relative_directory) or not os.path.isdir(relative_directory):
|
if not os.path.exists(relative_directory) or not os.path.isdir(relative_directory):
|
||||||
os.mkdir(relative_directory)
|
os.mkdir(relative_directory)
|
||||||
LOG.info('Created ads directory at ./%s.', relative_directory)
|
LOG.info("Created ads directory at ./%s.", relative_directory)
|
||||||
|
|
||||||
new_base_dir = os.path.join(relative_directory, f'ad_{ad_id}')
|
new_base_dir = os.path.join(relative_directory, f'ad_{ad_id}')
|
||||||
if os.path.exists(new_base_dir):
|
if os.path.exists(new_base_dir):
|
||||||
LOG.info('Deleting current folder of ad %s...', ad_id)
|
LOG.info("Deleting current folder of ad %s...", ad_id)
|
||||||
shutil.rmtree(new_base_dir)
|
shutil.rmtree(new_base_dir)
|
||||||
os.mkdir(new_base_dir)
|
os.mkdir(new_base_dir)
|
||||||
LOG.info('New directory for ad created at %s.', new_base_dir)
|
LOG.info("New directory for ad created at %s.", new_base_dir)
|
||||||
|
|
||||||
# call extraction function
|
# call extraction function
|
||||||
info = await self._extract_ad_page_info(new_base_dir, ad_id)
|
info = await self._extract_ad_page_info(new_base_dir, ad_id)
|
||||||
ad_file_path = new_base_dir + '/' + f'ad_{ad_id}.yaml'
|
ad_file_path = new_base_dir + "/" + f'ad_{ad_id}.yaml'
|
||||||
dicts.save_dict(ad_file_path, info)
|
dicts.save_dict(ad_file_path, info)
|
||||||
|
|
||||||
async def _download_images_from_ad_page(self, directory:str, ad_id:int) -> list[str]:
|
async def _download_images_from_ad_page(self, directory:str, ad_id:int) -> list[str]:
|
||||||
@@ -67,18 +67,18 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
img_paths = []
|
img_paths = []
|
||||||
try:
|
try:
|
||||||
# download all images from box
|
# download all images from box
|
||||||
image_box = await self.web_find(By.CLASS_NAME, 'galleryimage-large')
|
image_box = await self.web_find(By.CLASS_NAME, "galleryimage-large")
|
||||||
|
|
||||||
n_images = len(await self.web_find_all(By.CSS_SELECTOR, '.galleryimage-element[data-ix]', parent = image_box))
|
n_images = len(await self.web_find_all(By.CSS_SELECTOR, ".galleryimage-element[data-ix]", parent = image_box))
|
||||||
LOG.info('Found %s.', i18n.pluralize("image", n_images))
|
LOG.info("Found %s.", i18n.pluralize("image", n_images))
|
||||||
|
|
||||||
img_element:Element = await self.web_find(By.CSS_SELECTOR, 'div:nth-child(1) > img', parent = image_box)
|
img_element:Element = await self.web_find(By.CSS_SELECTOR, "div:nth-child(1) > img", parent = image_box)
|
||||||
img_fn_prefix = 'ad_' + str(ad_id) + '__img'
|
img_fn_prefix = "ad_" + str(ad_id) + "__img"
|
||||||
|
|
||||||
img_nr = 1
|
img_nr = 1
|
||||||
dl_counter = 0
|
dl_counter = 0
|
||||||
while img_nr <= n_images: # scrolling + downloading
|
while img_nr <= n_images: # scrolling + downloading
|
||||||
current_img_url = img_element.attrs['src'] # URL of the image
|
current_img_url = img_element.attrs["src"] # URL of the image
|
||||||
if current_img_url is None:
|
if current_img_url is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -86,43 +86,43 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
content_type = response.info().get_content_type()
|
content_type = response.info().get_content_type()
|
||||||
file_ending = mimetypes.guess_extension(content_type)
|
file_ending = mimetypes.guess_extension(content_type)
|
||||||
img_path = f"{directory}/{img_fn_prefix}{img_nr}{file_ending}"
|
img_path = f"{directory}/{img_fn_prefix}{img_nr}{file_ending}"
|
||||||
with open(img_path, 'wb') as f:
|
with open(img_path, "wb") as f:
|
||||||
shutil.copyfileobj(response, f)
|
shutil.copyfileobj(response, f)
|
||||||
dl_counter += 1
|
dl_counter += 1
|
||||||
img_paths.append(img_path.rsplit('/', maxsplit = 1)[-1])
|
img_paths.append(img_path.rsplit("/", maxsplit = 1)[-1])
|
||||||
|
|
||||||
# navigate to next image (if exists)
|
# navigate to next image (if exists)
|
||||||
if img_nr < n_images:
|
if img_nr < n_images:
|
||||||
try:
|
try:
|
||||||
# click next button, wait, and re-establish reference
|
# click next button, wait, and re-establish reference
|
||||||
await (await self.web_find(By.CLASS_NAME, 'galleryimage--navigation--next')).click()
|
await (await self.web_find(By.CLASS_NAME, "galleryimage--navigation--next")).click()
|
||||||
new_div = await self.web_find(By.CSS_SELECTOR, f'div.galleryimage-element:nth-child({img_nr + 1})')
|
new_div = await self.web_find(By.CSS_SELECTOR, f'div.galleryimage-element:nth-child({img_nr + 1})')
|
||||||
img_element = await self.web_find(By.TAG_NAME, 'img', parent = new_div)
|
img_element = await self.web_find(By.TAG_NAME, "img", parent = new_div)
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
LOG.error('NEXT button in image gallery somehow missing, aborting image fetching.')
|
LOG.error("NEXT button in image gallery somehow missing, aborting image fetching.")
|
||||||
break
|
break
|
||||||
img_nr += 1
|
img_nr += 1
|
||||||
LOG.info('Downloaded %s.', i18n.pluralize("image", dl_counter))
|
LOG.info("Downloaded %s.", i18n.pluralize("image", dl_counter))
|
||||||
|
|
||||||
except TimeoutError: # some ads do not require images
|
except TimeoutError: # some ads do not require images
|
||||||
LOG.warning('No image area found. Continuing without downloading images.')
|
LOG.warning("No image area found. Continuing without downloading images.")
|
||||||
|
|
||||||
return img_paths
|
return img_paths
|
||||||
|
|
||||||
def extract_ad_id_from_ad_url(self, url: str) -> int:
|
def extract_ad_id_from_ad_url(self, url:str) -> int:
|
||||||
"""
|
"""
|
||||||
Extracts the ID of an ad, given by its reference link.
|
Extracts the ID of an ad, given by its reference link.
|
||||||
|
|
||||||
:param url: the URL to the ad page
|
:param url: the URL to the ad page
|
||||||
:return: the ad ID, a (ten-digit) integer number
|
:return: the ad ID, a (ten-digit) integer number
|
||||||
"""
|
"""
|
||||||
num_part = url.split('/')[-1] # suffix
|
num_part = url.split("/")[-1] # suffix
|
||||||
id_part = num_part.split('-')[0]
|
id_part = num_part.split("-")[0]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
path = url.split('?', 1)[0] # Remove query string if present
|
path = url.split("?", 1)[0] # Remove query string if present
|
||||||
last_segment = path.rstrip('/').split('/')[-1] # Get last path component
|
last_segment = path.rstrip("/").split("/")[-1] # Get last path component
|
||||||
id_part = last_segment.split('-')[0] # Extract part before first hyphen
|
id_part = last_segment.split("-")[0] # Extract part before first hyphen
|
||||||
return int(id_part)
|
return int(id_part)
|
||||||
except (IndexError, ValueError) as ex:
|
except (IndexError, ValueError) as ex:
|
||||||
LOG.warning("Failed to extract ad ID from URL '%s': %s", url, ex)
|
LOG.warning("Failed to extract ad ID from URL '%s': %s", url, ex)
|
||||||
@@ -135,41 +135,41 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
:return: the links to your ad pages
|
:return: the links to your ad pages
|
||||||
"""
|
"""
|
||||||
# navigate to "your ads" page
|
# navigate to "your ads" page
|
||||||
await self.web_open('https://www.kleinanzeigen.de/m-meine-anzeigen.html')
|
await self.web_open("https://www.kleinanzeigen.de/m-meine-anzeigen.html")
|
||||||
await self.web_sleep(2000, 3000) # Consider replacing with explicit waits later
|
await self.web_sleep(2000, 3000) # Consider replacing with explicit waits later
|
||||||
|
|
||||||
# Try to find the main ad list container first
|
# Try to find the main ad list container first
|
||||||
try:
|
try:
|
||||||
ad_list_container = await self.web_find(By.ID, 'my-manageitems-adlist')
|
ad_list_container = await self.web_find(By.ID, "my-manageitems-adlist")
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
LOG.warning('Ad list container #my-manageitems-adlist not found. Maybe no ads present?')
|
LOG.warning("Ad list container #my-manageitems-adlist not found. Maybe no ads present?")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# --- Pagination handling ---
|
# --- Pagination handling ---
|
||||||
multi_page = False
|
multi_page = False
|
||||||
try:
|
try:
|
||||||
# Correct selector: Use uppercase '.Pagination'
|
# Correct selector: Use uppercase '.Pagination'
|
||||||
pagination_section = await self.web_find(By.CSS_SELECTOR, '.Pagination', timeout=10) # Increased timeout slightly
|
pagination_section = await self.web_find(By.CSS_SELECTOR, ".Pagination", timeout = 10) # Increased timeout slightly
|
||||||
# Correct selector: Use 'aria-label'
|
# Correct selector: Use 'aria-label'
|
||||||
# Also check if the button is actually present AND potentially enabled (though enabled check isn't strictly necessary here, only for clicking later)
|
# Also check if the button is actually present AND potentially enabled (though enabled check isn't strictly necessary here, only for clicking later)
|
||||||
next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent=pagination_section)
|
next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section)
|
||||||
if next_buttons:
|
if next_buttons:
|
||||||
# Check if at least one 'Nächste' button is not disabled (optional but good practice)
|
# Check if at least one 'Nächste' button is not disabled (optional but good practice)
|
||||||
enabled_next_buttons = [btn for btn in next_buttons if not btn.attrs.get('disabled')]
|
enabled_next_buttons = [btn for btn in next_buttons if not btn.attrs.get("disabled")]
|
||||||
if enabled_next_buttons:
|
if enabled_next_buttons:
|
||||||
multi_page = True
|
multi_page = True
|
||||||
LOG.info('Multiple ad pages detected.')
|
LOG.info("Multiple ad pages detected.")
|
||||||
else:
|
else:
|
||||||
LOG.info('Next button found but is disabled. Assuming single effective page.')
|
LOG.info("Next button found but is disabled. Assuming single effective page.")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
LOG.info('No "Naechste" button found within pagination. Assuming single page.')
|
LOG.info('No "Naechste" button found within pagination. Assuming single page.')
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
# This will now correctly trigger only if the '.Pagination' div itself is not found
|
# This will now correctly trigger only if the '.Pagination' div itself is not found
|
||||||
LOG.info('No pagination controls found. Assuming single page.')
|
LOG.info("No pagination controls found. Assuming single page.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOG.exception("Error during pagination detection: %s", e)
|
LOG.exception("Error during pagination detection: %s", e)
|
||||||
LOG.info('Assuming single page due to error during pagination check.')
|
LOG.info("Assuming single page due to error during pagination check.")
|
||||||
# --- End Pagination Handling ---
|
# --- End Pagination Handling ---
|
||||||
|
|
||||||
refs:list[str] = []
|
refs:list[str] = []
|
||||||
@@ -182,8 +182,8 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
|
|
||||||
# Re-find the ad list container on the current page/state
|
# Re-find the ad list container on the current page/state
|
||||||
try:
|
try:
|
||||||
ad_list_container = await self.web_find(By.ID, 'my-manageitems-adlist')
|
ad_list_container = await self.web_find(By.ID, "my-manageitems-adlist")
|
||||||
list_items = await self.web_find_all(By.CLASS_NAME, 'cardbox', parent=ad_list_container)
|
list_items = await self.web_find_all(By.CLASS_NAME, "cardbox", parent = ad_list_container)
|
||||||
LOG.info("Found %s ad items on page %s.", len(list_items), current_page)
|
LOG.info("Found %s ad items on page %s.", len(list_items), current_page)
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
LOG.warning("Could not find ad list container or items on page %s.", current_page)
|
LOG.warning("Could not find ad list container or items on page %s.", current_page)
|
||||||
@@ -192,7 +192,7 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
# Extract references using the CORRECTED selector
|
# Extract references using the CORRECTED selector
|
||||||
try:
|
try:
|
||||||
page_refs = [
|
page_refs = [
|
||||||
(await self.web_find(By.CSS_SELECTOR, 'div.manageitems-item-ad h3 a.text-onSurface', parent=li)).attrs['href']
|
(await self.web_find(By.CSS_SELECTOR, "div.manageitems-item-ad h3 a.text-onSurface", parent = li)).attrs["href"]
|
||||||
for li in list_items
|
for li in list_items
|
||||||
]
|
]
|
||||||
refs.extend(page_refs)
|
refs.extend(page_refs)
|
||||||
@@ -207,12 +207,12 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
# --- Navigate to next page ---
|
# --- Navigate to next page ---
|
||||||
try:
|
try:
|
||||||
# Find the pagination section again (scope might have changed after scroll/wait)
|
# Find the pagination section again (scope might have changed after scroll/wait)
|
||||||
pagination_section = await self.web_find(By.CSS_SELECTOR, '.Pagination', timeout=5)
|
pagination_section = await self.web_find(By.CSS_SELECTOR, ".Pagination", timeout = 5)
|
||||||
# Find the "Next" button using the correct aria-label selector and ensure it's not disabled
|
# Find the "Next" button using the correct aria-label selector and ensure it's not disabled
|
||||||
next_button_element = None
|
next_button_element = None
|
||||||
possible_next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent=pagination_section)
|
possible_next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section)
|
||||||
for btn in possible_next_buttons:
|
for btn in possible_next_buttons:
|
||||||
if not btn.attrs.get('disabled'): # Check if the button is enabled
|
if not btn.attrs.get("disabled"): # Check if the button is enabled
|
||||||
next_button_element = btn
|
next_button_element = btn
|
||||||
break # Found an enabled next button
|
break # Found an enabled next button
|
||||||
|
|
||||||
@@ -235,7 +235,7 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
# --- End Navigation ---
|
# --- End Navigation ---
|
||||||
|
|
||||||
if not refs:
|
if not refs:
|
||||||
LOG.warning('No ad URLs were extracted.')
|
LOG.warning("No ad URLs were extracted.")
|
||||||
|
|
||||||
return refs
|
return refs
|
||||||
|
|
||||||
@@ -246,27 +246,27 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
"""
|
"""
|
||||||
if reflect.is_integer(id_or_url):
|
if reflect.is_integer(id_or_url):
|
||||||
# navigate to start page, otherwise page can be None!
|
# navigate to start page, otherwise page can be None!
|
||||||
await self.web_open('https://www.kleinanzeigen.de/')
|
await self.web_open("https://www.kleinanzeigen.de/")
|
||||||
# enter the ad ID into the search bar
|
# enter the ad ID into the search bar
|
||||||
await self.web_input(By.ID, "site-search-query", id_or_url)
|
await self.web_input(By.ID, "site-search-query", id_or_url)
|
||||||
# navigate to ad page and wait
|
# navigate to ad page and wait
|
||||||
await self.web_check(By.ID, 'site-search-submit', Is.CLICKABLE)
|
await self.web_check(By.ID, "site-search-submit", Is.CLICKABLE)
|
||||||
submit_button = await self.web_find(By.ID, 'site-search-submit')
|
submit_button = await self.web_find(By.ID, "site-search-submit")
|
||||||
await submit_button.click()
|
await submit_button.click()
|
||||||
else:
|
else:
|
||||||
await self.web_open(str(id_or_url)) # navigate to URL directly given
|
await self.web_open(str(id_or_url)) # navigate to URL directly given
|
||||||
await self.web_sleep()
|
await self.web_sleep()
|
||||||
|
|
||||||
# handle the case that invalid ad ID given
|
# handle the case that invalid ad ID given
|
||||||
if self.page.url.endswith('k0'):
|
if self.page.url.endswith("k0"):
|
||||||
LOG.error('There is no ad under the given ID.')
|
LOG.error("There is no ad under the given ID.")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# close (warning) popup, if given
|
# close (warning) popup, if given
|
||||||
try:
|
try:
|
||||||
await self.web_find(By.ID, 'vap-ovrly-secure')
|
await self.web_find(By.ID, "vap-ovrly-secure")
|
||||||
LOG.warning('A popup appeared!')
|
LOG.warning("A popup appeared!")
|
||||||
await self.web_click(By.CLASS_NAME, 'mfp-close')
|
await self.web_click(By.CLASS_NAME, "mfp-close")
|
||||||
await self.web_sleep()
|
await self.web_sleep()
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
pass
|
pass
|
||||||
@@ -280,22 +280,22 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
:param ad_id: the ad ID, already extracted by a calling function
|
:param ad_id: the ad ID, already extracted by a calling function
|
||||||
:return: a dictionary with the keys as given in an ad YAML, and their respective values
|
:return: a dictionary with the keys as given in an ad YAML, and their respective values
|
||||||
"""
|
"""
|
||||||
info:dict[str, Any] = {'active': True}
|
info:dict[str, Any] = {"active": True}
|
||||||
|
|
||||||
# extract basic info
|
# extract basic info
|
||||||
info['type'] = 'OFFER' if 's-anzeige' in self.page.url else 'WANTED'
|
info["type"] = "OFFER" if "s-anzeige" in self.page.url else "WANTED"
|
||||||
title:str = await self.web_text(By.ID, 'viewad-title')
|
title:str = await self.web_text(By.ID, "viewad-title")
|
||||||
LOG.info('Extracting information from ad with title "%s"', title)
|
LOG.info('Extracting information from ad with title "%s"', title)
|
||||||
|
|
||||||
info['category'] = await self._extract_category_from_ad_page()
|
info["category"] = await self._extract_category_from_ad_page()
|
||||||
info['title'] = title
|
info["title"] = title
|
||||||
|
|
||||||
# Get raw description text
|
# Get raw description text
|
||||||
raw_description = (await self.web_text(By.ID, 'viewad-description-text')).strip()
|
raw_description = (await self.web_text(By.ID, "viewad-description-text")).strip()
|
||||||
|
|
||||||
# Get prefix and suffix from config
|
# Get prefix and suffix from config
|
||||||
prefix = get_description_affixes(self.config, prefix=True)
|
prefix = get_description_affixes(self.config, prefix = True)
|
||||||
suffix = get_description_affixes(self.config, prefix=False)
|
suffix = get_description_affixes(self.config, prefix = False)
|
||||||
|
|
||||||
# Remove prefix and suffix if present
|
# Remove prefix and suffix if present
|
||||||
description_text = raw_description
|
description_text = raw_description
|
||||||
@@ -304,38 +304,38 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
if suffix and description_text.endswith(suffix.strip()):
|
if suffix and description_text.endswith(suffix.strip()):
|
||||||
description_text = description_text[:-len(suffix.strip())]
|
description_text = description_text[:-len(suffix.strip())]
|
||||||
|
|
||||||
info['description'] = description_text.strip()
|
info["description"] = description_text.strip()
|
||||||
|
|
||||||
info['special_attributes'] = await self._extract_special_attributes_from_ad_page()
|
info["special_attributes"] = await self._extract_special_attributes_from_ad_page()
|
||||||
if "art_s" in info['special_attributes']:
|
if "art_s" in info["special_attributes"]:
|
||||||
# change e.g. category "161/172" to "161/172/lautsprecher_kopfhoerer"
|
# change e.g. category "161/172" to "161/172/lautsprecher_kopfhoerer"
|
||||||
info['category'] = f"{info['category']}/{info['special_attributes']['art_s']}"
|
info["category"] = f"{info['category']}/{info['special_attributes']['art_s']}"
|
||||||
del info['special_attributes']['art_s']
|
del info["special_attributes"]["art_s"]
|
||||||
if "schaden_s" in info['special_attributes']:
|
if "schaden_s" in info["special_attributes"]:
|
||||||
# change f to 'nein' and 't' to 'ja'
|
# change f to 'nein' and 't' to 'ja'
|
||||||
info['special_attributes']['schaden_s'] = info['special_attributes']['schaden_s'].translate(str.maketrans({'t': 'ja', 'f': 'nein'}))
|
info["special_attributes"]["schaden_s"] = info["special_attributes"]["schaden_s"].translate(str.maketrans({"t": "ja", "f": "nein"}))
|
||||||
info['price'], info['price_type'] = await self._extract_pricing_info_from_ad_page()
|
info["price"], info["price_type"] = await self._extract_pricing_info_from_ad_page()
|
||||||
info['shipping_type'], info['shipping_costs'], info['shipping_options'] = await self._extract_shipping_info_from_ad_page()
|
info["shipping_type"], info["shipping_costs"], info["shipping_options"] = await self._extract_shipping_info_from_ad_page()
|
||||||
info['sell_directly'] = await self._extract_sell_directly_from_ad_page()
|
info["sell_directly"] = await self._extract_sell_directly_from_ad_page()
|
||||||
info['images'] = await self._download_images_from_ad_page(directory, ad_id)
|
info["images"] = await self._download_images_from_ad_page(directory, ad_id)
|
||||||
info['contact'] = await self._extract_contact_from_ad_page()
|
info["contact"] = await self._extract_contact_from_ad_page()
|
||||||
info['id'] = ad_id
|
info["id"] = ad_id
|
||||||
|
|
||||||
try: # try different locations known for creation date element
|
try: # try different locations known for creation date element
|
||||||
creation_date = await self.web_text(By.XPATH,
|
creation_date = await self.web_text(By.XPATH,
|
||||||
'/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/div[1]/span')
|
"/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/div[1]/span")
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
creation_date = await self.web_text(By.CSS_SELECTOR, '#viewad-extra-info > div:nth-child(1) > span:nth-child(2)')
|
creation_date = await self.web_text(By.CSS_SELECTOR, "#viewad-extra-info > div:nth-child(1) > span:nth-child(2)")
|
||||||
|
|
||||||
# convert creation date to ISO format
|
# convert creation date to ISO format
|
||||||
created_parts = creation_date.split('.')
|
created_parts = creation_date.split(".")
|
||||||
creation_date = created_parts[2] + '-' + created_parts[1] + '-' + created_parts[0] + ' 00:00:00'
|
creation_date = created_parts[2] + "-" + created_parts[1] + "-" + created_parts[0] + " 00:00:00"
|
||||||
creation_date = datetime.fromisoformat(creation_date).isoformat()
|
creation_date = datetime.fromisoformat(creation_date).isoformat()
|
||||||
info['created_on'] = creation_date
|
info["created_on"] = creation_date
|
||||||
info['updated_on'] = None # will be set later on
|
info["updated_on"] = None # will be set later on
|
||||||
|
|
||||||
# Calculate the initial hash for the downloaded ad
|
# Calculate the initial hash for the downloaded ad
|
||||||
info['content_hash'] = calculate_content_hash(info)
|
info["content_hash"] = calculate_content_hash(info)
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
@@ -346,12 +346,12 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
|
|
||||||
:return: a category string of form abc/def, where a-f are digits
|
:return: a category string of form abc/def, where a-f are digits
|
||||||
"""
|
"""
|
||||||
category_line = await self.web_find(By.ID, 'vap-brdcrmb')
|
category_line = await self.web_find(By.ID, "vap-brdcrmb")
|
||||||
category_first_part = await self.web_find(By.CSS_SELECTOR, 'a:nth-of-type(2)', parent = category_line)
|
category_first_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(2)", parent = category_line)
|
||||||
category_second_part = await self.web_find(By.CSS_SELECTOR, 'a:nth-of-type(3)', parent = category_line)
|
category_second_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line)
|
||||||
cat_num_first = category_first_part.attrs['href'].split('/')[-1][1:]
|
cat_num_first = category_first_part.attrs["href"].split("/")[-1][1:]
|
||||||
cat_num_second = category_second_part.attrs['href'].split('/')[-1][1:]
|
cat_num_second = category_second_part.attrs["href"].split("/")[-1][1:]
|
||||||
category:str = cat_num_first + '/' + cat_num_second
|
category:str = cat_num_first + "/" + cat_num_second
|
||||||
|
|
||||||
return category
|
return category
|
||||||
|
|
||||||
@@ -368,7 +368,7 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
special_attributes_str = belen_conf["universalAnalyticsOpts"]["dimensions"]["dimension108"]
|
special_attributes_str = belen_conf["universalAnalyticsOpts"]["dimensions"]["dimension108"]
|
||||||
|
|
||||||
special_attributes = dict(item.split(":") for item in special_attributes_str.split("|") if ":" in item)
|
special_attributes = dict(item.split(":") for item in special_attributes_str.split("|") if ":" in item)
|
||||||
special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith('.versand_s') and k != "versand_s"}
|
special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith(".versand_s") and k != "versand_s"}
|
||||||
return special_attributes
|
return special_attributes
|
||||||
|
|
||||||
async def _extract_pricing_info_from_ad_page(self) -> tuple[float | None, str]:
|
async def _extract_pricing_info_from_ad_page(self) -> tuple[float | None, str]:
|
||||||
@@ -378,24 +378,24 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
:return: the price of the offer (optional); and the pricing type
|
:return: the price of the offer (optional); and the pricing type
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
price_str:str = await self.web_text(By.ID, 'viewad-price')
|
price_str:str = await self.web_text(By.ID, "viewad-price")
|
||||||
price:int | None = None
|
price:int | None = None
|
||||||
match price_str.split()[-1]:
|
match price_str.split()[-1]:
|
||||||
case '€':
|
case "€":
|
||||||
price_type = 'FIXED'
|
price_type = "FIXED"
|
||||||
# replace('.', '') is to remove the thousands separator before parsing as int
|
# replace('.', '') is to remove the thousands separator before parsing as int
|
||||||
price = int(price_str.replace('.', '').split()[0])
|
price = int(price_str.replace(".", "").split()[0])
|
||||||
case 'VB':
|
case "VB":
|
||||||
price_type = 'NEGOTIABLE'
|
price_type = "NEGOTIABLE"
|
||||||
if price_str != "VB": # can be either 'X € VB', or just 'VB'
|
if price_str != "VB": # can be either 'X € VB', or just 'VB'
|
||||||
price = int(price_str.replace('.', '').split()[0])
|
price = int(price_str.replace(".", "").split()[0])
|
||||||
case 'verschenken':
|
case "verschenken":
|
||||||
price_type = 'GIVE_AWAY'
|
price_type = "GIVE_AWAY"
|
||||||
case _:
|
case _:
|
||||||
price_type = 'NOT_APPLICABLE'
|
price_type = "NOT_APPLICABLE"
|
||||||
return price, price_type
|
return price, price_type
|
||||||
except TimeoutError: # no 'commercial' ad, has no pricing box etc.
|
except TimeoutError: # no 'commercial' ad, has no pricing box etc.
|
||||||
return None, 'NOT_APPLICABLE'
|
return None, "NOT_APPLICABLE"
|
||||||
|
|
||||||
async def _extract_shipping_info_from_ad_page(self) -> tuple[str, float | None, list[str] | None]:
|
async def _extract_shipping_info_from_ad_page(self) -> tuple[str, float | None, list[str] | None]:
|
||||||
"""
|
"""
|
||||||
@@ -403,17 +403,17 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
|
|
||||||
:return: the shipping type, and the shipping price (optional)
|
:return: the shipping type, and the shipping price (optional)
|
||||||
"""
|
"""
|
||||||
ship_type, ship_costs, shipping_options = 'NOT_APPLICABLE', None, None
|
ship_type, ship_costs, shipping_options = "NOT_APPLICABLE", None, None
|
||||||
try:
|
try:
|
||||||
shipping_text = await self.web_text(By.CLASS_NAME, 'boxedarticle--details--shipping')
|
shipping_text = await self.web_text(By.CLASS_NAME, "boxedarticle--details--shipping")
|
||||||
# e.g. '+ Versand ab 5,49 €' OR 'Nur Abholung'
|
# e.g. '+ Versand ab 5,49 €' OR 'Nur Abholung'
|
||||||
if shipping_text == 'Nur Abholung':
|
if shipping_text == "Nur Abholung":
|
||||||
ship_type = 'PICKUP'
|
ship_type = "PICKUP"
|
||||||
elif shipping_text == 'Versand möglich':
|
elif shipping_text == "Versand möglich":
|
||||||
ship_type = 'SHIPPING'
|
ship_type = "SHIPPING"
|
||||||
elif '€' in shipping_text:
|
elif "€" in shipping_text:
|
||||||
shipping_price_parts = shipping_text.split(' ')
|
shipping_price_parts = shipping_text.split(" ")
|
||||||
ship_type = 'SHIPPING'
|
ship_type = "SHIPPING"
|
||||||
ship_costs = float(misc.parse_decimal(shipping_price_parts[-2]))
|
ship_costs = float(misc.parse_decimal(shipping_price_parts[-2]))
|
||||||
|
|
||||||
# reading shipping option from kleinanzeigen
|
# reading shipping option from kleinanzeigen
|
||||||
@@ -425,7 +425,7 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
internal_shipping_opt = [x for x in shipping_costs if x["priceInEuroCent"] == ship_costs * 100]
|
internal_shipping_opt = [x for x in shipping_costs if x["priceInEuroCent"] == ship_costs * 100]
|
||||||
|
|
||||||
if not internal_shipping_opt:
|
if not internal_shipping_opt:
|
||||||
return 'NOT_APPLICABLE', ship_costs, shipping_options
|
return "NOT_APPLICABLE", ship_costs, shipping_options
|
||||||
|
|
||||||
# map to internal shipping identifiers used by kleinanzeigen-bot
|
# map to internal shipping identifiers used by kleinanzeigen-bot
|
||||||
shipping_option_mapping = {
|
shipping_option_mapping = {
|
||||||
@@ -440,13 +440,13 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
"HERMES_004": "Hermes_L"
|
"HERMES_004": "Hermes_L"
|
||||||
}
|
}
|
||||||
|
|
||||||
shipping_option = shipping_option_mapping.get(internal_shipping_opt[0]['id'])
|
shipping_option = shipping_option_mapping.get(internal_shipping_opt[0]["id"])
|
||||||
if not shipping_option:
|
if not shipping_option:
|
||||||
return 'NOT_APPLICABLE', ship_costs, shipping_options
|
return "NOT_APPLICABLE", ship_costs, shipping_options
|
||||||
|
|
||||||
shipping_options = [shipping_option]
|
shipping_options = [shipping_option]
|
||||||
except TimeoutError: # no pricing box -> no shipping given
|
except TimeoutError: # no pricing box -> no shipping given
|
||||||
ship_type = 'NOT_APPLICABLE'
|
ship_type = "NOT_APPLICABLE"
|
||||||
|
|
||||||
return ship_type, ship_costs, shipping_options
|
return ship_type, ship_costs, shipping_options
|
||||||
|
|
||||||
@@ -457,7 +457,7 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
:return: a boolean indicating whether the sell directly option is active (optional)
|
:return: a boolean indicating whether the sell directly option is active (optional)
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
buy_now_is_active:bool = 'Direkt kaufen' in (await self.web_text(By.ID, 'payment-buttons-sidebar'))
|
buy_now_is_active:bool = "Direkt kaufen" in (await self.web_text(By.ID, "payment-buttons-sidebar"))
|
||||||
return buy_now_is_active
|
return buy_now_is_active
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
return None
|
return None
|
||||||
@@ -469,34 +469,34 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
:return: a dictionary containing the address parts with their corresponding values
|
:return: a dictionary containing the address parts with their corresponding values
|
||||||
"""
|
"""
|
||||||
contact:dict[str, (str | None)] = {}
|
contact:dict[str, (str | None)] = {}
|
||||||
address_text = await self.web_text(By.ID, 'viewad-locality')
|
address_text = await self.web_text(By.ID, "viewad-locality")
|
||||||
# format: e.g. (Beispiel Allee 42,) 12345 Bundesland - Stadt
|
# format: e.g. (Beispiel Allee 42,) 12345 Bundesland - Stadt
|
||||||
try:
|
try:
|
||||||
street = (await self.web_text(By.ID, 'street-address'))[:-1] # trailing comma
|
street = (await self.web_text(By.ID, "street-address"))[:-1] # trailing comma
|
||||||
contact['street'] = street
|
contact["street"] = street
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
LOG.info('No street given in the contact.')
|
LOG.info("No street given in the contact.")
|
||||||
|
|
||||||
(zipcode, location) = address_text.split(" ", 1)
|
(zipcode, location) = address_text.split(" ", 1)
|
||||||
contact['zipcode'] = zipcode # e.g. 19372
|
contact["zipcode"] = zipcode # e.g. 19372
|
||||||
contact['location'] = location # e.g. Mecklenburg-Vorpommern - Steinbeck
|
contact["location"] = location # e.g. Mecklenburg-Vorpommern - Steinbeck
|
||||||
|
|
||||||
contact_person_element:Element = await self.web_find(By.ID, 'viewad-contact')
|
contact_person_element:Element = await self.web_find(By.ID, "viewad-contact")
|
||||||
name_element = await self.web_find(By.CLASS_NAME, 'iconlist-text', parent = contact_person_element)
|
name_element = await self.web_find(By.CLASS_NAME, "iconlist-text", parent = contact_person_element)
|
||||||
try:
|
try:
|
||||||
name = await self.web_text(By.TAG_NAME, 'a', parent = name_element)
|
name = await self.web_text(By.TAG_NAME, "a", parent = name_element)
|
||||||
except TimeoutError: # edge case: name without link
|
except TimeoutError: # edge case: name without link
|
||||||
name = await self.web_text(By.TAG_NAME, 'span', parent = name_element)
|
name = await self.web_text(By.TAG_NAME, "span", parent = name_element)
|
||||||
contact['name'] = name
|
contact["name"] = name
|
||||||
|
|
||||||
if 'street' not in contact:
|
if "street" not in contact:
|
||||||
contact['street'] = None
|
contact["street"] = None
|
||||||
try: # phone number is unusual for non-professional sellers today
|
try: # phone number is unusual for non-professional sellers today
|
||||||
phone_element = await self.web_find(By.ID, 'viewad-contact-phone')
|
phone_element = await self.web_find(By.ID, "viewad-contact-phone")
|
||||||
phone_number = await self.web_text(By.TAG_NAME, 'a', parent = phone_element)
|
phone_number = await self.web_text(By.TAG_NAME, "a", parent = phone_element)
|
||||||
contact['phone'] = ''.join(phone_number.replace('-', ' ').split(' ')).replace('+49(0)', '0')
|
contact["phone"] = "".join(phone_number.replace("-", " ").split(" ")).replace("+49(0)", "0")
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
contact['phone'] = None # phone seems to be a deprecated feature (for non-professional users)
|
contact["phone"] = None # phone seems to be a deprecated feature (for non-professional users)
|
||||||
# also see 'https://themen.kleinanzeigen.de/hilfe/deine-anzeigen/Telefon/
|
# also see 'https://themen.kleinanzeigen.de/hilfe/deine-anzeigen/Telefon/
|
||||||
|
|
||||||
return contact
|
return contact
|
||||||
|
|||||||
@@ -96,7 +96,7 @@ def save_dict(filepath:str, content:dict[str, Any]) -> None:
|
|||||||
yaml.indent(mapping = 2, sequence = 4, offset = 2)
|
yaml.indent(mapping = 2, sequence = 4, offset = 2)
|
||||||
yaml.representer.add_representer(str, # use YAML | block style for multi-line strings
|
yaml.representer.add_representer(str, # use YAML | block style for multi-line strings
|
||||||
lambda dumper, data:
|
lambda dumper, data:
|
||||||
dumper.represent_scalar('tag:yaml.org,2002:str', data, style = '|' if '\n' in data else None)
|
dumper.represent_scalar("tag:yaml.org,2002:str", data, style = "|" if "\n" in data else None)
|
||||||
)
|
)
|
||||||
yaml.allow_duplicate_keys = False
|
yaml.allow_duplicate_keys = False
|
||||||
yaml.explicit_start = False
|
yaml.explicit_start = False
|
||||||
|
|||||||
@@ -3,14 +3,14 @@
|
|||||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
import sys, traceback # isort: skip
|
import sys, traceback # isort: skip
|
||||||
from types import FrameType, TracebackType
|
from types import FrameType, TracebackType
|
||||||
from typing import Any, Final
|
from typing import Final
|
||||||
|
|
||||||
from . import loggers
|
from . import loggers
|
||||||
|
|
||||||
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
|
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def on_exception(ex_type: type[BaseException] | None, ex_value: BaseException | None, ex_traceback: TracebackType | None) -> None:
|
def on_exception(ex_type:type[BaseException] | None, ex_value:BaseException | None, ex_traceback:TracebackType | None) -> None:
|
||||||
if ex_type is None or ex_value is None:
|
if ex_type is None or ex_value is None:
|
||||||
LOG.error("Unknown exception occurred (missing exception info): ex_type=%s, ex_value=%s", ex_type, ex_value)
|
LOG.error("Unknown exception occurred (missing exception info): ex_type=%s, ex_value=%s", ex_type, ex_value)
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -11,6 +11,6 @@ class KleinanzeigenBotError(RuntimeError):
|
|||||||
class CaptchaEncountered(KleinanzeigenBotError):
|
class CaptchaEncountered(KleinanzeigenBotError):
|
||||||
"""Raised when a Captcha was detected and auto-restart is enabled."""
|
"""Raised when a Captcha was detected and auto-restart is enabled."""
|
||||||
|
|
||||||
def __init__(self, restart_delay: timedelta) -> None:
|
def __init__(self, restart_delay:timedelta) -> None:
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.restart_delay = restart_delay
|
self.restart_delay = restart_delay
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ class Locale(NamedTuple):
|
|||||||
return f"{self.language}{region_part}{encoding_part}"
|
return f"{self.language}{region_part}{encoding_part}"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def of(locale_string: str) -> 'Locale':
|
def of(locale_string:str) -> "Locale":
|
||||||
"""
|
"""
|
||||||
>>> Locale.of("en_US.UTF-8")
|
>>> Locale.of("en_US.UTF-8")
|
||||||
Locale(language='en', region='US', encoding='UTF-8')
|
Locale(language='en', region='US', encoding='UTF-8')
|
||||||
@@ -86,11 +86,11 @@ def _detect_locale() -> Locale:
|
|||||||
return Locale.of(lang) if lang else Locale("en", "US", "UTF-8")
|
return Locale.of(lang) if lang else Locale("en", "US", "UTF-8")
|
||||||
|
|
||||||
|
|
||||||
_CURRENT_LOCALE: Locale = _detect_locale()
|
_CURRENT_LOCALE:Locale = _detect_locale()
|
||||||
_TRANSLATIONS: dict[str, Any] | None = None
|
_TRANSLATIONS:dict[str, Any] | None = None
|
||||||
|
|
||||||
|
|
||||||
def translate(text:object, caller: inspect.FrameInfo | None) -> str:
|
def translate(text:object, caller:inspect.FrameInfo | None) -> str:
|
||||||
text = str(text)
|
text = str(text)
|
||||||
if not caller:
|
if not caller:
|
||||||
return text
|
return text
|
||||||
@@ -105,7 +105,7 @@ def translate(text:object, caller: inspect.FrameInfo | None) -> str:
|
|||||||
if not _TRANSLATIONS:
|
if not _TRANSLATIONS:
|
||||||
return text
|
return text
|
||||||
|
|
||||||
module_name = caller.frame.f_globals.get('__name__') # pylint: disable=redefined-outer-name
|
module_name = caller.frame.f_globals.get("__name__") # pylint: disable=redefined-outer-name
|
||||||
file_basename = os.path.splitext(os.path.basename(caller.filename))[0]
|
file_basename = os.path.splitext(os.path.basename(caller.filename))[0]
|
||||||
if module_name and module_name.endswith(f".{file_basename}"):
|
if module_name and module_name.endswith(f".{file_basename}"):
|
||||||
module_name = module_name[:-(len(file_basename) + 1)]
|
module_name = module_name[:-(len(file_basename) + 1)]
|
||||||
@@ -124,9 +124,9 @@ gettext.gettext = lambda message: translate(_original_gettext(message), reflect.
|
|||||||
for module_name, module in sys.modules.items():
|
for module_name, module in sys.modules.items():
|
||||||
if module is None or module_name in sys.builtin_module_names:
|
if module is None or module_name in sys.builtin_module_names:
|
||||||
continue
|
continue
|
||||||
if hasattr(module, '_') and module._ is _original_gettext:
|
if hasattr(module, "_") and module._ is _original_gettext:
|
||||||
module._ = gettext.gettext # type: ignore[attr-defined]
|
module._ = gettext.gettext # type: ignore[attr-defined]
|
||||||
if hasattr(module, 'gettext') and module.gettext is _original_gettext:
|
if hasattr(module, "gettext") and module.gettext is _original_gettext:
|
||||||
module.gettext = gettext.gettext # type: ignore[attr-defined]
|
module.gettext = gettext.gettext # type: ignore[attr-defined]
|
||||||
|
|
||||||
|
|
||||||
@@ -190,8 +190,8 @@ def pluralize(noun:str, count:int | Sized, *, prefix_with_count:bool = True) ->
|
|||||||
# English
|
# English
|
||||||
if len(noun) < 2: # noqa: PLR2004 Magic value used in comparison
|
if len(noun) < 2: # noqa: PLR2004 Magic value used in comparison
|
||||||
return f"{prefix}{noun}s"
|
return f"{prefix}{noun}s"
|
||||||
if noun.endswith(('s', 'sh', 'ch', 'x', 'z')):
|
if noun.endswith(("s", "sh", "ch", "x", "z")):
|
||||||
return f"{prefix}{noun}es"
|
return f"{prefix}{noun}es"
|
||||||
if noun.endswith('y') and noun[-2].lower() not in "aeiou":
|
if noun.endswith("y") and noun[-2].lower() not in "aeiou":
|
||||||
return f"{prefix}{noun[:-1]}ies"
|
return f"{prefix}{noun[:-1]}ies"
|
||||||
return f"{prefix}{noun}s"
|
return f"{prefix}{noun}s"
|
||||||
|
|||||||
@@ -28,11 +28,11 @@ LOG_ROOT:Final[logging.Logger] = logging.getLogger()
|
|||||||
|
|
||||||
class _MaxLevelFilter(logging.Filter):
|
class _MaxLevelFilter(logging.Filter):
|
||||||
|
|
||||||
def __init__(self, level: int) -> None:
|
def __init__(self, level:int) -> None:
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.level = level
|
self.level = level
|
||||||
|
|
||||||
def filter(self, record: logging.LogRecord) -> bool:
|
def filter(self, record:logging.LogRecord) -> bool:
|
||||||
return record.levelno <= self.level
|
return record.levelno <= self.level
|
||||||
|
|
||||||
|
|
||||||
@@ -104,7 +104,7 @@ def configure_console_logging() -> None:
|
|||||||
class LogFileHandle:
|
class LogFileHandle:
|
||||||
"""Encapsulates a log file handler with close and status methods."""
|
"""Encapsulates a log file handler with close and status methods."""
|
||||||
|
|
||||||
def __init__(self, file_path: str, handler: RotatingFileHandler, logger: logging.Logger) -> None:
|
def __init__(self, file_path:str, handler:RotatingFileHandler, logger:logging.Logger) -> None:
|
||||||
self.file_path = file_path
|
self.file_path = file_path
|
||||||
self._handler:RotatingFileHandler | None = handler
|
self._handler:RotatingFileHandler | None = handler
|
||||||
self._logger = logger
|
self._logger = logger
|
||||||
@@ -146,14 +146,14 @@ def flush_all_handlers() -> None:
|
|||||||
handler.flush()
|
handler.flush()
|
||||||
|
|
||||||
|
|
||||||
def get_logger(name: str | None = None) -> logging.Logger:
|
def get_logger(name:str | None = None) -> logging.Logger:
|
||||||
"""
|
"""
|
||||||
Returns a localized logger
|
Returns a localized logger
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class TranslatingLogger(logging.Logger):
|
class TranslatingLogger(logging.Logger):
|
||||||
|
|
||||||
def _log(self, level: int, msg: object, *args: Any, **kwargs: Any) -> None:
|
def _log(self, level:int, msg:object, *args:Any, **kwargs:Any) -> None:
|
||||||
if level != DEBUG: # debug messages should not be translated
|
if level != DEBUG: # debug messages should not be translated
|
||||||
msg = i18n.translate(msg, reflect.get_caller(2))
|
msg = i18n.translate(msg, reflect.get_caller(2))
|
||||||
super()._log(level, msg, *args, **kwargs)
|
super()._log(level, msg, *args, **kwargs)
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from typing import Any, TypeVar
|
|||||||
from . import i18n
|
from . import i18n
|
||||||
|
|
||||||
# https://mypy.readthedocs.io/en/stable/generics.html#generic-functions
|
# https://mypy.readthedocs.io/en/stable/generics.html#generic-functions
|
||||||
T = TypeVar('T')
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
def ensure(condition:Any | bool | Callable[[], bool], error_message:str, timeout:float = 5, poll_requency:float = 0.5) -> None:
|
def ensure(condition:Any | bool | Callable[[], bool], error_message:str, timeout:float = 5, poll_requency:float = 0.5) -> None:
|
||||||
@@ -49,7 +49,7 @@ def is_frozen() -> bool:
|
|||||||
return getattr(sys, "frozen", False)
|
return getattr(sys, "frozen", False)
|
||||||
|
|
||||||
|
|
||||||
async def ainput(prompt: str) -> str:
|
async def ainput(prompt:str) -> str:
|
||||||
return await asyncio.to_thread(input, f'{prompt} ')
|
return await asyncio.to_thread(input, f'{prompt} ')
|
||||||
|
|
||||||
|
|
||||||
@@ -84,10 +84,10 @@ def parse_decimal(number:float | int | str) -> decimal.Decimal:
|
|||||||
|
|
||||||
|
|
||||||
def parse_datetime(
|
def parse_datetime(
|
||||||
date: datetime | str | None,
|
date:datetime | str | None,
|
||||||
*,
|
*,
|
||||||
add_timezone_if_missing: bool = True,
|
add_timezone_if_missing:bool = True,
|
||||||
use_local_timezone: bool = True
|
use_local_timezone:bool = True
|
||||||
) -> datetime | None:
|
) -> datetime | None:
|
||||||
"""
|
"""
|
||||||
Parses a datetime object or ISO-formatted string.
|
Parses a datetime object or ISO-formatted string.
|
||||||
@@ -152,22 +152,22 @@ def parse_duration(text:str) -> timedelta:
|
|||||||
>>> parse_duration("invalid input")
|
>>> parse_duration("invalid input")
|
||||||
datetime.timedelta(0)
|
datetime.timedelta(0)
|
||||||
"""
|
"""
|
||||||
pattern = re.compile(r'(\d+)\s*([dhms])')
|
pattern = re.compile(r"(\d+)\s*([dhms])")
|
||||||
parts = pattern.findall(text.lower())
|
parts = pattern.findall(text.lower())
|
||||||
kwargs: dict[str, int] = {}
|
kwargs:dict[str, int] = {}
|
||||||
for value, unit in parts:
|
for value, unit in parts:
|
||||||
if unit == 'd':
|
if unit == "d":
|
||||||
kwargs['days'] = kwargs.get('days', 0) + int(value)
|
kwargs["days"] = kwargs.get("days", 0) + int(value)
|
||||||
elif unit == 'h':
|
elif unit == "h":
|
||||||
kwargs['hours'] = kwargs.get('hours', 0) + int(value)
|
kwargs["hours"] = kwargs.get("hours", 0) + int(value)
|
||||||
elif unit == 'm':
|
elif unit == "m":
|
||||||
kwargs['minutes'] = kwargs.get('minutes', 0) + int(value)
|
kwargs["minutes"] = kwargs.get("minutes", 0) + int(value)
|
||||||
elif unit == 's':
|
elif unit == "s":
|
||||||
kwargs['seconds'] = kwargs.get('seconds', 0) + int(value)
|
kwargs["seconds"] = kwargs.get("seconds", 0) + int(value)
|
||||||
return timedelta(**kwargs)
|
return timedelta(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
def format_timedelta(td: timedelta) -> str:
|
def format_timedelta(td:timedelta) -> str:
|
||||||
"""
|
"""
|
||||||
Formats a timedelta into a human-readable string using the pluralize utility.
|
Formats a timedelta into a human-readable string using the pluralize utility.
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import inspect
|
|||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
def get_caller(depth: int = 1) -> inspect.FrameInfo | None:
|
def get_caller(depth:int = 1) -> inspect.FrameInfo | None:
|
||||||
stack = inspect.stack()
|
stack = inspect.stack()
|
||||||
try:
|
try:
|
||||||
for frame in stack[depth + 1:]:
|
for frame in stack[depth + 1:]:
|
||||||
|
|||||||
@@ -165,7 +165,7 @@ class WebScrapingMixin:
|
|||||||
prefs_file = os.path.join(profile_dir, "Preferences")
|
prefs_file = os.path.join(profile_dir, "Preferences")
|
||||||
if not os.path.exists(prefs_file):
|
if not os.path.exists(prefs_file):
|
||||||
LOG.info(" -> Setting chrome prefs [%s]...", prefs_file)
|
LOG.info(" -> Setting chrome prefs [%s]...", prefs_file)
|
||||||
with open(prefs_file, "w", encoding = 'UTF-8') as fd:
|
with open(prefs_file, "w", encoding = "UTF-8") as fd:
|
||||||
json.dump({
|
json.dump({
|
||||||
"credentials_enable_service": False,
|
"credentials_enable_service": False,
|
||||||
"enable_do_not_track": True,
|
"enable_do_not_track": True,
|
||||||
@@ -234,16 +234,16 @@ class WebScrapingMixin:
|
|||||||
|
|
||||||
case "Windows":
|
case "Windows":
|
||||||
browser_paths = [
|
browser_paths = [
|
||||||
os.environ.get("PROGRAMFILES", "C:\\Program Files") + r'\Microsoft\Edge\Application\msedge.exe',
|
os.environ.get("PROGRAMFILES", "C:\\Program Files") + r"\Microsoft\Edge\Application\msedge.exe",
|
||||||
os.environ.get("PROGRAMFILES(X86)", "C:\\Program Files (x86)") + r'\Microsoft\Edge\Application\msedge.exe',
|
os.environ.get("PROGRAMFILES(X86)", "C:\\Program Files (x86)") + r"\Microsoft\Edge\Application\msedge.exe",
|
||||||
|
|
||||||
os.environ["PROGRAMFILES"] + r'\Chromium\Application\chrome.exe',
|
os.environ["PROGRAMFILES"] + r"\Chromium\Application\chrome.exe",
|
||||||
os.environ["PROGRAMFILES(X86)"] + r'\Chromium\Application\chrome.exe',
|
os.environ["PROGRAMFILES(X86)"] + r"\Chromium\Application\chrome.exe",
|
||||||
os.environ["LOCALAPPDATA"] + r'\Chromium\Application\chrome.exe',
|
os.environ["LOCALAPPDATA"] + r"\Chromium\Application\chrome.exe",
|
||||||
|
|
||||||
os.environ["PROGRAMFILES"] + r'\Chrome\Application\chrome.exe',
|
os.environ["PROGRAMFILES"] + r"\Chrome\Application\chrome.exe",
|
||||||
os.environ["PROGRAMFILES(X86)"] + r'\Chrome\Application\chrome.exe',
|
os.environ["PROGRAMFILES(X86)"] + r"\Chrome\Application\chrome.exe",
|
||||||
os.environ["LOCALAPPDATA"] + r'\Chrome\Application\chrome.exe',
|
os.environ["LOCALAPPDATA"] + r"\Chrome\Application\chrome.exe",
|
||||||
|
|
||||||
shutil.which("msedge.exe"),
|
shutil.which("msedge.exe"),
|
||||||
shutil.which("chromium.exe"),
|
shutil.which("chromium.exe"),
|
||||||
@@ -259,8 +259,8 @@ class WebScrapingMixin:
|
|||||||
|
|
||||||
raise AssertionError(_("Installed browser could not be detected"))
|
raise AssertionError(_("Installed browser could not be detected"))
|
||||||
|
|
||||||
async def web_await(self, condition: Callable[[], T | Never | Coroutine[Any, Any, T | Never]], *,
|
async def web_await(self, condition:Callable[[], T | Never | Coroutine[Any, Any, T | Never]], *,
|
||||||
timeout:int | float = 5, timeout_error_message: str = "") -> T:
|
timeout:int | float = 5, timeout_error_message:str = "") -> T:
|
||||||
"""
|
"""
|
||||||
Blocks/waits until the given condition is met.
|
Blocks/waits until the given condition is met.
|
||||||
|
|
||||||
@@ -523,7 +523,7 @@ class WebScrapingMixin:
|
|||||||
return response
|
return response
|
||||||
# pylint: enable=dangerous-default-value
|
# pylint: enable=dangerous-default-value
|
||||||
|
|
||||||
async def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10_000, *, scroll_back_top: bool = False) -> None:
|
async def web_scroll_page_down(self, scroll_length:int = 10, scroll_speed:int = 10_000, *, scroll_back_top:bool = False) -> None:
|
||||||
"""
|
"""
|
||||||
Smoothly scrolls the current web page down.
|
Smoothly scrolls the current web page down.
|
||||||
|
|
||||||
@@ -532,7 +532,7 @@ class WebScrapingMixin:
|
|||||||
:param scroll_back_top: whether to scroll the page back to the top after scrolling to the bottom
|
:param scroll_back_top: whether to scroll the page back to the top after scrolling to the bottom
|
||||||
"""
|
"""
|
||||||
current_y_pos = 0
|
current_y_pos = 0
|
||||||
bottom_y_pos: int = await self.web_execute('document.body.scrollHeight') # get bottom position
|
bottom_y_pos:int = await self.web_execute("document.body.scrollHeight") # get bottom position
|
||||||
while current_y_pos < bottom_y_pos: # scroll in steps until bottom reached
|
while current_y_pos < bottom_y_pos: # scroll in steps until bottom reached
|
||||||
current_y_pos += scroll_length
|
current_y_pos += scroll_length
|
||||||
await self.web_execute(f'window.scrollTo(0, {current_y_pos})') # scroll one step
|
await self.web_execute(f'window.scrollTo(0, {current_y_pos})') # scroll one step
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
"""
|
# SPDX-FileCopyrightText: © Jens Bergmann and contributors
|
||||||
SPDX-FileCopyrightText: © Jens Bergmann and contributors
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
|
||||||
"""
|
|
||||||
import os
|
import os
|
||||||
from typing import Any, Final
|
from typing import Any, Final
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
@@ -21,7 +19,7 @@ LOG.setLevel(loggers.DEBUG)
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def test_data_dir(tmp_path: str) -> str:
|
def test_data_dir(tmp_path:str) -> str:
|
||||||
"""Provides a temporary directory for test data.
|
"""Provides a temporary directory for test data.
|
||||||
|
|
||||||
This fixture uses pytest's built-in tmp_path fixture to create a temporary
|
This fixture uses pytest's built-in tmp_path fixture to create a temporary
|
||||||
@@ -41,33 +39,33 @@ def sample_config() -> dict[str, Any]:
|
|||||||
- Publishing settings
|
- Publishing settings
|
||||||
"""
|
"""
|
||||||
return {
|
return {
|
||||||
'login': {
|
"login": {
|
||||||
'username': 'testuser',
|
"username": "testuser",
|
||||||
'password': 'testpass'
|
"password": "testpass"
|
||||||
},
|
},
|
||||||
'browser': {
|
"browser": {
|
||||||
'arguments': [],
|
"arguments": [],
|
||||||
'binary_location': None,
|
"binary_location": None,
|
||||||
'extensions': [],
|
"extensions": [],
|
||||||
'use_private_window': True,
|
"use_private_window": True,
|
||||||
'user_data_dir': None,
|
"user_data_dir": None,
|
||||||
'profile_name': None
|
"profile_name": None
|
||||||
},
|
},
|
||||||
'ad_defaults': {
|
"ad_defaults": {
|
||||||
'description': {
|
"description": {
|
||||||
'prefix': 'Test Prefix',
|
"prefix": "Test Prefix",
|
||||||
'suffix': 'Test Suffix'
|
"suffix": "Test Suffix"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'publishing': {
|
"publishing": {
|
||||||
'delete_old_ads': 'BEFORE_PUBLISH',
|
"delete_old_ads": "BEFORE_PUBLISH",
|
||||||
'delete_old_ads_by_title': False
|
"delete_old_ads_by_title": False
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def test_bot(sample_config: dict[str, Any]) -> KleinanzeigenBot:
|
def test_bot(sample_config:dict[str, Any]) -> KleinanzeigenBot:
|
||||||
"""Provides a fresh KleinanzeigenBot instance for all test classes.
|
"""Provides a fresh KleinanzeigenBot instance for all test classes.
|
||||||
|
|
||||||
Dependencies:
|
Dependencies:
|
||||||
@@ -89,7 +87,7 @@ def browser_mock() -> MagicMock:
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def log_file_path(test_data_dir: str) -> str:
|
def log_file_path(test_data_dir:str) -> str:
|
||||||
"""Provides a temporary path for log files.
|
"""Provides a temporary path for log files.
|
||||||
|
|
||||||
Dependencies:
|
Dependencies:
|
||||||
@@ -99,7 +97,7 @@ def log_file_path(test_data_dir: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def test_extractor(browser_mock: MagicMock, sample_config: dict[str, Any]) -> AdExtractor:
|
def test_extractor(browser_mock:MagicMock, sample_config:dict[str, Any]) -> AdExtractor:
|
||||||
"""Provides a fresh AdExtractor instance for testing.
|
"""Provides a fresh AdExtractor instance for testing.
|
||||||
|
|
||||||
Dependencies:
|
Dependencies:
|
||||||
|
|||||||
@@ -122,9 +122,9 @@ def test_calculate_content_hash_with_none_values() -> None:
|
|||||||
)
|
)
|
||||||
])
|
])
|
||||||
def test_get_description_affixes(
|
def test_get_description_affixes(
|
||||||
config: dict[str, Any],
|
config:dict[str, Any],
|
||||||
prefix: bool,
|
prefix:bool,
|
||||||
expected: str
|
expected:str
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Test get_description_affixes function with various inputs."""
|
"""Test get_description_affixes function with various inputs."""
|
||||||
result = ads.get_description_affixes(config, prefix = prefix)
|
result = ads.get_description_affixes(config, prefix = prefix)
|
||||||
@@ -157,7 +157,7 @@ def test_get_description_affixes(
|
|||||||
""
|
""
|
||||||
)
|
)
|
||||||
])
|
])
|
||||||
def test_get_description_affixes_edge_cases(config: dict[str, Any], prefix: bool, expected: str) -> None:
|
def test_get_description_affixes_edge_cases(config:dict[str, Any], prefix:bool, expected:str) -> None:
|
||||||
"""Test edge cases for description affix handling."""
|
"""Test edge cases for description affix handling."""
|
||||||
assert ads.get_description_affixes(config, prefix = prefix) == expected
|
assert ads.get_description_affixes(config, prefix = prefix) == expected
|
||||||
|
|
||||||
@@ -170,7 +170,7 @@ def test_get_description_affixes_edge_cases(config: dict[str, Any], prefix: bool
|
|||||||
(3.14, ""), # Test with a float
|
(3.14, ""), # Test with a float
|
||||||
(set(), ""), # Test with an empty set
|
(set(), ""), # Test with an empty set
|
||||||
])
|
])
|
||||||
def test_get_description_affixes_edge_cases_non_dict(config: Any, expected: str) -> None:
|
def test_get_description_affixes_edge_cases_non_dict(config:Any, expected:str) -> None:
|
||||||
"""Test get_description_affixes function with non-dict inputs."""
|
"""Test get_description_affixes function with non-dict inputs."""
|
||||||
result = ads.get_description_affixes(config, prefix=True)
|
result = ads.get_description_affixes(config, prefix = True)
|
||||||
assert result == expected
|
assert result == expected
|
||||||
|
|||||||
@@ -12,21 +12,21 @@ class TestKleinanzeigenBot:
|
|||||||
def bot(self) -> KleinanzeigenBot:
|
def bot(self) -> KleinanzeigenBot:
|
||||||
return KleinanzeigenBot()
|
return KleinanzeigenBot()
|
||||||
|
|
||||||
def test_parse_args_help(self, bot: KleinanzeigenBot) -> None:
|
def test_parse_args_help(self, bot:KleinanzeigenBot) -> None:
|
||||||
"""Test parsing of help command"""
|
"""Test parsing of help command"""
|
||||||
bot.parse_args(["app", "help"])
|
bot.parse_args(["app", "help"])
|
||||||
assert bot.command == "help"
|
assert bot.command == "help"
|
||||||
assert bot.ads_selector == "due"
|
assert bot.ads_selector == "due"
|
||||||
assert not bot.keep_old_ads
|
assert not bot.keep_old_ads
|
||||||
|
|
||||||
def test_parse_args_publish(self, bot: KleinanzeigenBot) -> None:
|
def test_parse_args_publish(self, bot:KleinanzeigenBot) -> None:
|
||||||
"""Test parsing of publish command with options"""
|
"""Test parsing of publish command with options"""
|
||||||
bot.parse_args(["app", "publish", "--ads=all", "--keep-old"])
|
bot.parse_args(["app", "publish", "--ads=all", "--keep-old"])
|
||||||
assert bot.command == "publish"
|
assert bot.command == "publish"
|
||||||
assert bot.ads_selector == "all"
|
assert bot.ads_selector == "all"
|
||||||
assert bot.keep_old_ads
|
assert bot.keep_old_ads
|
||||||
|
|
||||||
def test_get_version(self, bot: KleinanzeigenBot) -> None:
|
def test_get_version(self, bot:KleinanzeigenBot) -> None:
|
||||||
"""Test version retrieval"""
|
"""Test version retrieval"""
|
||||||
version = bot.get_version()
|
version = bot.get_version()
|
||||||
assert isinstance(version, str)
|
assert isinstance(version, str)
|
||||||
|
|||||||
@@ -12,31 +12,31 @@ from kleinanzeigen_bot.utils.web_scraping_mixin import Browser, By, Element
|
|||||||
|
|
||||||
|
|
||||||
class _DimensionsDict(TypedDict):
|
class _DimensionsDict(TypedDict):
|
||||||
dimension108: str
|
dimension108:str
|
||||||
|
|
||||||
|
|
||||||
class _UniversalAnalyticsOptsDict(TypedDict):
|
class _UniversalAnalyticsOptsDict(TypedDict):
|
||||||
dimensions: _DimensionsDict
|
dimensions:_DimensionsDict
|
||||||
|
|
||||||
|
|
||||||
class _BelenConfDict(TypedDict):
|
class _BelenConfDict(TypedDict):
|
||||||
universalAnalyticsOpts: _UniversalAnalyticsOptsDict
|
universalAnalyticsOpts:_UniversalAnalyticsOptsDict
|
||||||
|
|
||||||
|
|
||||||
class _SpecialAttributesDict(TypedDict, total = False):
|
class _SpecialAttributesDict(TypedDict, total = False):
|
||||||
art_s: str
|
art_s:str
|
||||||
condition_s: str
|
condition_s:str
|
||||||
|
|
||||||
|
|
||||||
class _TestCaseDict(TypedDict): # noqa: PYI049 Private TypedDict `...` is never used
|
class _TestCaseDict(TypedDict): # noqa: PYI049 Private TypedDict `...` is never used
|
||||||
belen_conf: _BelenConfDict
|
belen_conf:_BelenConfDict
|
||||||
expected: _SpecialAttributesDict
|
expected:_SpecialAttributesDict
|
||||||
|
|
||||||
|
|
||||||
class TestAdExtractorBasics:
|
class TestAdExtractorBasics:
|
||||||
"""Basic synchronous tests for AdExtractor."""
|
"""Basic synchronous tests for AdExtractor."""
|
||||||
|
|
||||||
def test_constructor(self, browser_mock: MagicMock, sample_config: dict[str, Any]) -> None:
|
def test_constructor(self, browser_mock:MagicMock, sample_config:dict[str, Any]) -> None:
|
||||||
"""Test the constructor of AdExtractor"""
|
"""Test the constructor of AdExtractor"""
|
||||||
extractor = AdExtractor(browser_mock, sample_config)
|
extractor = AdExtractor(browser_mock, sample_config)
|
||||||
assert extractor.browser == browser_mock
|
assert extractor.browser == browser_mock
|
||||||
@@ -48,7 +48,7 @@ class TestAdExtractorBasics:
|
|||||||
("https://www.kleinanzeigen.de/s-anzeige/invalid-id/abc", -1),
|
("https://www.kleinanzeigen.de/s-anzeige/invalid-id/abc", -1),
|
||||||
("https://www.kleinanzeigen.de/invalid-url", -1),
|
("https://www.kleinanzeigen.de/invalid-url", -1),
|
||||||
])
|
])
|
||||||
def test_extract_ad_id_from_ad_url(self, test_extractor: AdExtractor, url: str, expected_id: int) -> None:
|
def test_extract_ad_id_from_ad_url(self, test_extractor:AdExtractor, url:str, expected_id:int) -> None:
|
||||||
"""Test extraction of ad ID from different URL formats."""
|
"""Test extraction of ad ID from different URL formats."""
|
||||||
assert test_extractor.extract_ad_id_from_ad_url(url) == expected_id
|
assert test_extractor.extract_ad_id_from_ad_url(url) == expected_id
|
||||||
|
|
||||||
@@ -66,19 +66,19 @@ class TestAdExtractorPricing:
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
async def test_extract_pricing_info(
|
async def test_extract_pricing_info(
|
||||||
self, test_extractor: AdExtractor, price_text: str, expected_price: int | None, expected_type: str
|
self, test_extractor:AdExtractor, price_text:str, expected_price:int | None, expected_type:str
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Test price extraction with different formats"""
|
"""Test price extraction with different formats"""
|
||||||
with patch.object(test_extractor, 'web_text', new_callable = AsyncMock, return_value = price_text):
|
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = price_text):
|
||||||
price, price_type = await test_extractor._extract_pricing_info_from_ad_page()
|
price, price_type = await test_extractor._extract_pricing_info_from_ad_page()
|
||||||
assert price == expected_price
|
assert price == expected_price
|
||||||
assert price_type == expected_type
|
assert price_type == expected_type
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
async def test_extract_pricing_info_timeout(self, test_extractor: AdExtractor) -> None:
|
async def test_extract_pricing_info_timeout(self, test_extractor:AdExtractor) -> None:
|
||||||
"""Test price extraction when element is not found"""
|
"""Test price extraction when element is not found"""
|
||||||
with patch.object(test_extractor, 'web_text', new_callable = AsyncMock, side_effect = TimeoutError):
|
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError):
|
||||||
price, price_type = await test_extractor._extract_pricing_info_from_ad_page()
|
price, price_type = await test_extractor._extract_pricing_info_from_ad_page()
|
||||||
assert price is None
|
assert price is None
|
||||||
assert price_type == "NOT_APPLICABLE"
|
assert price_type == "NOT_APPLICABLE"
|
||||||
@@ -95,15 +95,15 @@ class TestAdExtractorShipping:
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
async def test_extract_shipping_info(
|
async def test_extract_shipping_info(
|
||||||
self, test_extractor: AdExtractor, shipping_text: str, expected_type: str, expected_cost: float | None
|
self, test_extractor:AdExtractor, shipping_text:str, expected_type:str, expected_cost:float | None
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Test shipping info extraction with different text formats."""
|
"""Test shipping info extraction with different text formats."""
|
||||||
with patch.object(test_extractor, 'page', MagicMock()), \
|
with patch.object(test_extractor, "page", MagicMock()), \
|
||||||
patch.object(test_extractor, 'web_text', new_callable = AsyncMock, return_value = shipping_text), \
|
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = shipping_text), \
|
||||||
patch.object(test_extractor, 'web_request', new_callable = AsyncMock) as mock_web_request:
|
patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||||
|
|
||||||
if expected_cost:
|
if expected_cost:
|
||||||
shipping_response: dict[str, Any] = {
|
shipping_response:dict[str, Any] = {
|
||||||
"data": {
|
"data": {
|
||||||
"shippingOptionsResponse": {
|
"shippingOptionsResponse": {
|
||||||
"options": [
|
"options": [
|
||||||
@@ -125,7 +125,7 @@ class TestAdExtractorShipping:
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
async def test_extract_shipping_info_with_options(self, test_extractor: AdExtractor) -> None:
|
async def test_extract_shipping_info_with_options(self, test_extractor:AdExtractor) -> None:
|
||||||
"""Test shipping info extraction with shipping options."""
|
"""Test shipping info extraction with shipping options."""
|
||||||
shipping_response = {
|
shipping_response = {
|
||||||
"content": json.dumps({
|
"content": json.dumps({
|
||||||
@@ -139,9 +139,9 @@ class TestAdExtractorShipping:
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
with patch.object(test_extractor, 'page', MagicMock()), \
|
with patch.object(test_extractor, "page", MagicMock()), \
|
||||||
patch.object(test_extractor, 'web_text', new_callable = AsyncMock, return_value = "+ Versand ab 5,49 €"), \
|
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 5,49 €"), \
|
||||||
patch.object(test_extractor, 'web_request', new_callable = AsyncMock, return_value = shipping_response):
|
patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response):
|
||||||
|
|
||||||
shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page()
|
shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page()
|
||||||
|
|
||||||
@@ -154,21 +154,21 @@ class TestAdExtractorNavigation:
|
|||||||
"""Tests for navigation related functionality."""
|
"""Tests for navigation related functionality."""
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_navigate_to_ad_page_with_url(self, test_extractor: AdExtractor) -> None:
|
async def test_navigate_to_ad_page_with_url(self, test_extractor:AdExtractor) -> None:
|
||||||
"""Test navigation to ad page using a URL."""
|
"""Test navigation to ad page using a URL."""
|
||||||
page_mock = AsyncMock()
|
page_mock = AsyncMock()
|
||||||
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
||||||
|
|
||||||
with patch.object(test_extractor, 'page', page_mock), \
|
with patch.object(test_extractor, "page", page_mock), \
|
||||||
patch.object(test_extractor, 'web_open', new_callable = AsyncMock) as mock_web_open, \
|
patch.object(test_extractor, "web_open", new_callable = AsyncMock) as mock_web_open, \
|
||||||
patch.object(test_extractor, 'web_find', new_callable = AsyncMock, side_effect = TimeoutError):
|
patch.object(test_extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError):
|
||||||
|
|
||||||
result = await test_extractor.naviagte_to_ad_page("https://www.kleinanzeigen.de/s-anzeige/test/12345")
|
result = await test_extractor.naviagte_to_ad_page("https://www.kleinanzeigen.de/s-anzeige/test/12345")
|
||||||
assert result is True
|
assert result is True
|
||||||
mock_web_open.assert_called_with("https://www.kleinanzeigen.de/s-anzeige/test/12345")
|
mock_web_open.assert_called_with("https://www.kleinanzeigen.de/s-anzeige/test/12345")
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_navigate_to_ad_page_with_id(self, test_extractor: AdExtractor) -> None:
|
async def test_navigate_to_ad_page_with_id(self, test_extractor:AdExtractor) -> None:
|
||||||
"""Test navigation to ad page using an ID."""
|
"""Test navigation to ad page using an ID."""
|
||||||
page_mock = AsyncMock()
|
page_mock = AsyncMock()
|
||||||
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
||||||
@@ -186,7 +186,7 @@ class TestAdExtractorNavigation:
|
|||||||
popup_close_mock.click = AsyncMock()
|
popup_close_mock.click = AsyncMock()
|
||||||
popup_close_mock.apply = AsyncMock(return_value = True)
|
popup_close_mock.apply = AsyncMock(return_value = True)
|
||||||
|
|
||||||
def find_mock(selector_type: By, selector_value: str, **_: Any) -> Element | None:
|
def find_mock(selector_type:By, selector_value:str, **_:Any) -> Element | None:
|
||||||
if selector_type == By.ID and selector_value == "site-search-query":
|
if selector_type == By.ID and selector_value == "site-search-query":
|
||||||
return input_mock
|
return input_mock
|
||||||
if selector_type == By.ID and selector_value == "site-search-submit":
|
if selector_type == By.ID and selector_value == "site-search-submit":
|
||||||
@@ -195,20 +195,20 @@ class TestAdExtractorNavigation:
|
|||||||
return popup_close_mock
|
return popup_close_mock
|
||||||
return None
|
return None
|
||||||
|
|
||||||
with patch.object(test_extractor, 'page', page_mock), \
|
with patch.object(test_extractor, "page", page_mock), \
|
||||||
patch.object(test_extractor, 'web_open', new_callable = AsyncMock) as mock_web_open, \
|
patch.object(test_extractor, "web_open", new_callable = AsyncMock) as mock_web_open, \
|
||||||
patch.object(test_extractor, 'web_input', new_callable = AsyncMock), \
|
patch.object(test_extractor, "web_input", new_callable = AsyncMock), \
|
||||||
patch.object(test_extractor, 'web_check', new_callable = AsyncMock, return_value = True), \
|
patch.object(test_extractor, "web_check", new_callable = AsyncMock, return_value = True), \
|
||||||
patch.object(test_extractor, 'web_find', new_callable = AsyncMock, side_effect = find_mock):
|
patch.object(test_extractor, "web_find", new_callable = AsyncMock, side_effect = find_mock):
|
||||||
|
|
||||||
result = await test_extractor.naviagte_to_ad_page(12345)
|
result = await test_extractor.naviagte_to_ad_page(12345)
|
||||||
assert result is True
|
assert result is True
|
||||||
mock_web_open.assert_called_with('https://www.kleinanzeigen.de/')
|
mock_web_open.assert_called_with("https://www.kleinanzeigen.de/")
|
||||||
submit_button_mock.click.assert_awaited_once()
|
submit_button_mock.click.assert_awaited_once()
|
||||||
popup_close_mock.click.assert_awaited_once()
|
popup_close_mock.click.assert_awaited_once()
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_navigate_to_ad_page_with_popup(self, test_extractor: AdExtractor) -> None:
|
async def test_navigate_to_ad_page_with_popup(self, test_extractor:AdExtractor) -> None:
|
||||||
"""Test navigation to ad page with popup handling."""
|
"""Test navigation to ad page with popup handling."""
|
||||||
page_mock = AsyncMock()
|
page_mock = AsyncMock()
|
||||||
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
||||||
@@ -218,18 +218,18 @@ class TestAdExtractorNavigation:
|
|||||||
input_mock.send_keys = AsyncMock()
|
input_mock.send_keys = AsyncMock()
|
||||||
input_mock.apply = AsyncMock(return_value = True)
|
input_mock.apply = AsyncMock(return_value = True)
|
||||||
|
|
||||||
with patch.object(test_extractor, 'page', page_mock), \
|
with patch.object(test_extractor, "page", page_mock), \
|
||||||
patch.object(test_extractor, 'web_open', new_callable = AsyncMock), \
|
patch.object(test_extractor, "web_open", new_callable = AsyncMock), \
|
||||||
patch.object(test_extractor, 'web_find', new_callable = AsyncMock, return_value = input_mock), \
|
patch.object(test_extractor, "web_find", new_callable = AsyncMock, return_value = input_mock), \
|
||||||
patch.object(test_extractor, 'web_click', new_callable = AsyncMock) as mock_web_click, \
|
patch.object(test_extractor, "web_click", new_callable = AsyncMock) as mock_web_click, \
|
||||||
patch.object(test_extractor, 'web_check', new_callable = AsyncMock, return_value = True):
|
patch.object(test_extractor, "web_check", new_callable = AsyncMock, return_value = True):
|
||||||
|
|
||||||
result = await test_extractor.naviagte_to_ad_page(12345)
|
result = await test_extractor.naviagte_to_ad_page(12345)
|
||||||
assert result is True
|
assert result is True
|
||||||
mock_web_click.assert_called_with(By.CLASS_NAME, 'mfp-close')
|
mock_web_click.assert_called_with(By.CLASS_NAME, "mfp-close")
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_navigate_to_ad_page_invalid_id(self, test_extractor: AdExtractor) -> None:
|
async def test_navigate_to_ad_page_invalid_id(self, test_extractor:AdExtractor) -> None:
|
||||||
"""Test navigation to ad page with invalid ID."""
|
"""Test navigation to ad page with invalid ID."""
|
||||||
page_mock = AsyncMock()
|
page_mock = AsyncMock()
|
||||||
page_mock.url = "https://www.kleinanzeigen.de/s-suchen.html?k0"
|
page_mock.url = "https://www.kleinanzeigen.de/s-suchen.html?k0"
|
||||||
@@ -240,22 +240,22 @@ class TestAdExtractorNavigation:
|
|||||||
input_mock.apply = AsyncMock(return_value = True)
|
input_mock.apply = AsyncMock(return_value = True)
|
||||||
input_mock.attrs = {}
|
input_mock.attrs = {}
|
||||||
|
|
||||||
with patch.object(test_extractor, 'page', page_mock), \
|
with patch.object(test_extractor, "page", page_mock), \
|
||||||
patch.object(test_extractor, 'web_open', new_callable = AsyncMock), \
|
patch.object(test_extractor, "web_open", new_callable = AsyncMock), \
|
||||||
patch.object(test_extractor, 'web_find', new_callable = AsyncMock, return_value = input_mock):
|
patch.object(test_extractor, "web_find", new_callable = AsyncMock, return_value = input_mock):
|
||||||
|
|
||||||
result = await test_extractor.naviagte_to_ad_page(99999)
|
result = await test_extractor.naviagte_to_ad_page(99999)
|
||||||
assert result is False
|
assert result is False
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_extract_own_ads_urls(self, test_extractor: AdExtractor) -> None:
|
async def test_extract_own_ads_urls(self, test_extractor:AdExtractor) -> None:
|
||||||
"""Test extraction of own ads URLs - basic test."""
|
"""Test extraction of own ads URLs - basic test."""
|
||||||
with patch.object(test_extractor, 'web_open', new_callable=AsyncMock), \
|
with patch.object(test_extractor, "web_open", new_callable = AsyncMock), \
|
||||||
patch.object(test_extractor, 'web_sleep', new_callable=AsyncMock), \
|
patch.object(test_extractor, "web_sleep", new_callable = AsyncMock), \
|
||||||
patch.object(test_extractor, 'web_find', new_callable=AsyncMock) as mock_web_find, \
|
patch.object(test_extractor, "web_find", new_callable = AsyncMock) as mock_web_find, \
|
||||||
patch.object(test_extractor, 'web_find_all', new_callable=AsyncMock) as mock_web_find_all, \
|
patch.object(test_extractor, "web_find_all", new_callable = AsyncMock) as mock_web_find_all, \
|
||||||
patch.object(test_extractor, 'web_scroll_page_down', new_callable=AsyncMock), \
|
patch.object(test_extractor, "web_scroll_page_down", new_callable = AsyncMock), \
|
||||||
patch.object(test_extractor, 'web_execute', new_callable=AsyncMock):
|
patch.object(test_extractor, "web_execute", new_callable = AsyncMock):
|
||||||
|
|
||||||
# --- Setup mock objects for DOM elements ---
|
# --- Setup mock objects for DOM elements ---
|
||||||
# Mocks needed for the actual execution flow
|
# Mocks needed for the actual execution flow
|
||||||
@@ -263,7 +263,7 @@ class TestAdExtractorNavigation:
|
|||||||
pagination_section_mock = MagicMock()
|
pagination_section_mock = MagicMock()
|
||||||
cardbox_mock = MagicMock() # Represents the <li> element
|
cardbox_mock = MagicMock() # Represents the <li> element
|
||||||
link_mock = MagicMock() # Represents the <a> element
|
link_mock = MagicMock() # Represents the <a> element
|
||||||
link_mock.attrs = {'href': '/s-anzeige/test/12345'} # Configure the desired output
|
link_mock.attrs = {"href": "/s-anzeige/test/12345"} # Configure the desired output
|
||||||
|
|
||||||
# Mocks for elements potentially checked but maybe not strictly needed for output
|
# Mocks for elements potentially checked but maybe not strictly needed for output
|
||||||
# (depending on how robust the mocking is)
|
# (depending on how robust the mocking is)
|
||||||
@@ -287,7 +287,7 @@ class TestAdExtractorNavigation:
|
|||||||
# 2. Find all '.cardbox' elements (inside loop)
|
# 2. Find all '.cardbox' elements (inside loop)
|
||||||
mock_web_find_all.side_effect = [
|
mock_web_find_all.side_effect = [
|
||||||
[], # Call 1: find 'button[aria-label="Nächste"]' -> No next button = single page
|
[], # Call 1: find 'button[aria-label="Nächste"]' -> No next button = single page
|
||||||
[cardbox_mock] # Call 2: find .cardbox -> One ad item
|
[cardbox_mock] # Call 2: find .cardbox -> One ad item
|
||||||
# Add more mocks here if pagination navigation calls web_find_all
|
# Add more mocks here if pagination navigation calls web_find_all
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -295,20 +295,20 @@ class TestAdExtractorNavigation:
|
|||||||
refs = await test_extractor.extract_own_ads_urls()
|
refs = await test_extractor.extract_own_ads_urls()
|
||||||
|
|
||||||
# --- Assertions ---
|
# --- Assertions ---
|
||||||
assert refs == ['/s-anzeige/test/12345'] # Now it should match
|
assert refs == ["/s-anzeige/test/12345"] # Now it should match
|
||||||
|
|
||||||
# Optional: Verify calls were made as expected
|
# Optional: Verify calls were made as expected
|
||||||
mock_web_find.assert_has_calls([
|
mock_web_find.assert_has_calls([
|
||||||
call(By.ID, 'my-manageitems-adlist'),
|
call(By.ID, "my-manageitems-adlist"),
|
||||||
call(By.CSS_SELECTOR, '.Pagination', timeout=10),
|
call(By.CSS_SELECTOR, ".Pagination", timeout = 10),
|
||||||
call(By.ID, 'my-manageitems-adlist'),
|
call(By.ID, "my-manageitems-adlist"),
|
||||||
call(By.CSS_SELECTOR, 'div.manageitems-item-ad h3 a.text-onSurface', parent=cardbox_mock),
|
call(By.CSS_SELECTOR, "div.manageitems-item-ad h3 a.text-onSurface", parent = cardbox_mock),
|
||||||
], any_order=False) # Check order if important
|
], any_order = False) # Check order if important
|
||||||
|
|
||||||
mock_web_find_all.assert_has_calls([
|
mock_web_find_all.assert_has_calls([
|
||||||
call(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent=pagination_section_mock),
|
call(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section_mock),
|
||||||
call(By.CLASS_NAME, 'cardbox', parent=ad_list_container_mock),
|
call(By.CLASS_NAME, "cardbox", parent = ad_list_container_mock),
|
||||||
], any_order=False)
|
], any_order = False)
|
||||||
|
|
||||||
|
|
||||||
class TestAdExtractorContent:
|
class TestAdExtractorContent:
|
||||||
@@ -318,14 +318,14 @@ class TestAdExtractorContent:
|
|||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def extractor_with_config(self) -> AdExtractor:
|
def extractor_with_config(self) -> AdExtractor:
|
||||||
"""Create extractor with specific config for testing prefix/suffix handling."""
|
"""Create extractor with specific config for testing prefix/suffix handling."""
|
||||||
browser_mock = MagicMock(spec=Browser)
|
browser_mock = MagicMock(spec = Browser)
|
||||||
return AdExtractor(browser_mock, {}) # Empty config, will be overridden in tests
|
return AdExtractor(browser_mock, {}) # Empty config, will be overridden in tests
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_extract_description_with_affixes(
|
async def test_extract_description_with_affixes(
|
||||||
self,
|
self,
|
||||||
test_extractor: AdExtractor,
|
test_extractor:AdExtractor,
|
||||||
description_test_cases: list[tuple[dict[str, Any], str, str]]
|
description_test_cases:list[tuple[dict[str, Any], str, str]]
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Test extraction of description with various prefix/suffix configurations."""
|
"""Test extraction of description with various prefix/suffix configurations."""
|
||||||
# Mock the page
|
# Mock the page
|
||||||
@@ -337,18 +337,18 @@ class TestAdExtractorContent:
|
|||||||
test_extractor.config = config
|
test_extractor.config = config
|
||||||
|
|
||||||
with patch.multiple(test_extractor,
|
with patch.multiple(test_extractor,
|
||||||
web_text=AsyncMock(side_effect=[
|
web_text = AsyncMock(side_effect = [
|
||||||
"Test Title", # Title
|
"Test Title", # Title
|
||||||
raw_description, # Raw description (without affixes)
|
raw_description, # Raw description (without affixes)
|
||||||
"03.02.2025" # Creation date
|
"03.02.2025" # Creation date
|
||||||
]),
|
]),
|
||||||
_extract_category_from_ad_page=AsyncMock(return_value="160"),
|
_extract_category_from_ad_page = AsyncMock(return_value = "160"),
|
||||||
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}),
|
_extract_special_attributes_from_ad_page = AsyncMock(return_value = {}),
|
||||||
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")),
|
_extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")),
|
||||||
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)),
|
_extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)),
|
||||||
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False),
|
_extract_sell_directly_from_ad_page = AsyncMock(return_value = False),
|
||||||
_download_images_from_ad_page=AsyncMock(return_value=[]),
|
_download_images_from_ad_page = AsyncMock(return_value = []),
|
||||||
_extract_contact_from_ad_page=AsyncMock(return_value={})
|
_extract_contact_from_ad_page = AsyncMock(return_value = {})
|
||||||
):
|
):
|
||||||
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
||||||
assert info["description"] == raw_description
|
assert info["description"] == raw_description
|
||||||
@@ -356,7 +356,7 @@ class TestAdExtractorContent:
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_extract_description_with_affixes_timeout(
|
async def test_extract_description_with_affixes_timeout(
|
||||||
self,
|
self,
|
||||||
test_extractor: AdExtractor
|
test_extractor:AdExtractor
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Test handling of timeout when extracting description."""
|
"""Test handling of timeout when extracting description."""
|
||||||
# Mock the page
|
# Mock the page
|
||||||
@@ -365,18 +365,18 @@ class TestAdExtractorContent:
|
|||||||
test_extractor.page = page_mock
|
test_extractor.page = page_mock
|
||||||
|
|
||||||
with patch.multiple(test_extractor,
|
with patch.multiple(test_extractor,
|
||||||
web_text=AsyncMock(side_effect=[
|
web_text = AsyncMock(side_effect = [
|
||||||
"Test Title", # Title succeeds
|
"Test Title", # Title succeeds
|
||||||
TimeoutError("Timeout"), # Description times out
|
TimeoutError("Timeout"), # Description times out
|
||||||
"03.02.2025" # Date succeeds
|
"03.02.2025" # Date succeeds
|
||||||
]),
|
]),
|
||||||
_extract_category_from_ad_page=AsyncMock(return_value="160"),
|
_extract_category_from_ad_page = AsyncMock(return_value = "160"),
|
||||||
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}),
|
_extract_special_attributes_from_ad_page = AsyncMock(return_value = {}),
|
||||||
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")),
|
_extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")),
|
||||||
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)),
|
_extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)),
|
||||||
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False),
|
_extract_sell_directly_from_ad_page = AsyncMock(return_value = False),
|
||||||
_download_images_from_ad_page=AsyncMock(return_value=[]),
|
_download_images_from_ad_page = AsyncMock(return_value = []),
|
||||||
_extract_contact_from_ad_page=AsyncMock(return_value={})
|
_extract_contact_from_ad_page = AsyncMock(return_value = {})
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
||||||
@@ -388,7 +388,7 @@ class TestAdExtractorContent:
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_extract_description_with_affixes_no_affixes(
|
async def test_extract_description_with_affixes_no_affixes(
|
||||||
self,
|
self,
|
||||||
test_extractor: AdExtractor
|
test_extractor:AdExtractor
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Test extraction of description without any affixes in config."""
|
"""Test extraction of description without any affixes in config."""
|
||||||
# Mock the page
|
# Mock the page
|
||||||
@@ -399,24 +399,24 @@ class TestAdExtractorContent:
|
|||||||
raw_description = "Original Description"
|
raw_description = "Original Description"
|
||||||
|
|
||||||
with patch.multiple(test_extractor,
|
with patch.multiple(test_extractor,
|
||||||
web_text=AsyncMock(side_effect=[
|
web_text = AsyncMock(side_effect = [
|
||||||
"Test Title", # Title
|
"Test Title", # Title
|
||||||
raw_description, # Description without affixes
|
raw_description, # Description without affixes
|
||||||
"03.02.2025" # Creation date
|
"03.02.2025" # Creation date
|
||||||
]),
|
]),
|
||||||
_extract_category_from_ad_page=AsyncMock(return_value="160"),
|
_extract_category_from_ad_page = AsyncMock(return_value = "160"),
|
||||||
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}),
|
_extract_special_attributes_from_ad_page = AsyncMock(return_value = {}),
|
||||||
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")),
|
_extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")),
|
||||||
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)),
|
_extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)),
|
||||||
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False),
|
_extract_sell_directly_from_ad_page = AsyncMock(return_value = False),
|
||||||
_download_images_from_ad_page=AsyncMock(return_value=[]),
|
_download_images_from_ad_page = AsyncMock(return_value = []),
|
||||||
_extract_contact_from_ad_page=AsyncMock(return_value={})
|
_extract_contact_from_ad_page = AsyncMock(return_value = {})
|
||||||
):
|
):
|
||||||
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
||||||
assert info["description"] == raw_description
|
assert info["description"] == raw_description
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_extract_sell_directly(self, test_extractor: AdExtractor) -> None:
|
async def test_extract_sell_directly(self, test_extractor:AdExtractor) -> None:
|
||||||
"""Test extraction of sell directly option."""
|
"""Test extraction of sell directly option."""
|
||||||
test_cases = [
|
test_cases = [
|
||||||
("Direkt kaufen", True),
|
("Direkt kaufen", True),
|
||||||
@@ -424,11 +424,11 @@ class TestAdExtractorContent:
|
|||||||
]
|
]
|
||||||
|
|
||||||
for text, expected in test_cases:
|
for text, expected in test_cases:
|
||||||
with patch.object(test_extractor, 'web_text', new_callable=AsyncMock, return_value=text):
|
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = text):
|
||||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
assert result is expected
|
assert result is expected
|
||||||
|
|
||||||
with patch.object(test_extractor, 'web_text', new_callable=AsyncMock, side_effect=TimeoutError):
|
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError):
|
||||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
assert result is None
|
assert result is None
|
||||||
|
|
||||||
@@ -451,15 +451,15 @@ class TestAdExtractorCategory:
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
async def test_extract_category(self, extractor: AdExtractor) -> None:
|
async def test_extract_category(self, extractor:AdExtractor) -> None:
|
||||||
"""Test category extraction from breadcrumb."""
|
"""Test category extraction from breadcrumb."""
|
||||||
category_line = MagicMock()
|
category_line = MagicMock()
|
||||||
first_part = MagicMock()
|
first_part = MagicMock()
|
||||||
first_part.attrs = {'href': '/s-familie-kind-baby/c17'}
|
first_part.attrs = {"href": "/s-familie-kind-baby/c17"}
|
||||||
second_part = MagicMock()
|
second_part = MagicMock()
|
||||||
second_part.attrs = {'href': '/s-spielzeug/c23'}
|
second_part.attrs = {"href": "/s-spielzeug/c23"}
|
||||||
|
|
||||||
with patch.object(extractor, 'web_find', new_callable = AsyncMock) as mock_web_find:
|
with patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find:
|
||||||
mock_web_find.side_effect = [
|
mock_web_find.side_effect = [
|
||||||
category_line,
|
category_line,
|
||||||
first_part,
|
first_part,
|
||||||
@@ -469,15 +469,15 @@ class TestAdExtractorCategory:
|
|||||||
result = await extractor._extract_category_from_ad_page()
|
result = await extractor._extract_category_from_ad_page()
|
||||||
assert result == "17/23"
|
assert result == "17/23"
|
||||||
|
|
||||||
mock_web_find.assert_any_call(By.ID, 'vap-brdcrmb')
|
mock_web_find.assert_any_call(By.ID, "vap-brdcrmb")
|
||||||
mock_web_find.assert_any_call(By.CSS_SELECTOR, 'a:nth-of-type(2)', parent = category_line)
|
mock_web_find.assert_any_call(By.CSS_SELECTOR, "a:nth-of-type(2)", parent = category_line)
|
||||||
mock_web_find.assert_any_call(By.CSS_SELECTOR, 'a:nth-of-type(3)', parent = category_line)
|
mock_web_find.assert_any_call(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line)
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
async def test_extract_special_attributes_empty(self, extractor: AdExtractor) -> None:
|
async def test_extract_special_attributes_empty(self, extractor:AdExtractor) -> None:
|
||||||
"""Test extraction of special attributes when empty."""
|
"""Test extraction of special attributes when empty."""
|
||||||
with patch.object(extractor, 'web_execute', new_callable = AsyncMock) as mock_web_execute:
|
with patch.object(extractor, "web_execute", new_callable = AsyncMock) as mock_web_execute:
|
||||||
mock_web_execute.return_value = {
|
mock_web_execute.return_value = {
|
||||||
"universalAnalyticsOpts": {
|
"universalAnalyticsOpts": {
|
||||||
"dimensions": {
|
"dimensions": {
|
||||||
@@ -507,11 +507,11 @@ class TestAdExtractorContact:
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
async def test_extract_contact_info(self, extractor: AdExtractor) -> None:
|
async def test_extract_contact_info(self, extractor:AdExtractor) -> None:
|
||||||
"""Test extraction of contact information."""
|
"""Test extraction of contact information."""
|
||||||
with patch.object(extractor, 'page', MagicMock()), \
|
with patch.object(extractor, "page", MagicMock()), \
|
||||||
patch.object(extractor, 'web_text', new_callable = AsyncMock) as mock_web_text, \
|
patch.object(extractor, "web_text", new_callable = AsyncMock) as mock_web_text, \
|
||||||
patch.object(extractor, 'web_find', new_callable = AsyncMock) as mock_web_find:
|
patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find:
|
||||||
|
|
||||||
mock_web_text.side_effect = [
|
mock_web_text.side_effect = [
|
||||||
"12345 Berlin - Mitte",
|
"12345 Berlin - Mitte",
|
||||||
@@ -535,22 +535,22 @@ class TestAdExtractorContact:
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
async def test_extract_contact_info_timeout(self, extractor: AdExtractor) -> None:
|
async def test_extract_contact_info_timeout(self, extractor:AdExtractor) -> None:
|
||||||
"""Test contact info extraction when elements are not found."""
|
"""Test contact info extraction when elements are not found."""
|
||||||
with patch.object(extractor, 'page', MagicMock()), \
|
with patch.object(extractor, "page", MagicMock()), \
|
||||||
patch.object(extractor, 'web_text', new_callable = AsyncMock, side_effect = TimeoutError()), \
|
patch.object(extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError()), \
|
||||||
patch.object(extractor, 'web_find', new_callable = AsyncMock, side_effect = TimeoutError()), \
|
patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError()), \
|
||||||
pytest.raises(TimeoutError):
|
pytest.raises(TimeoutError):
|
||||||
|
|
||||||
await extractor._extract_contact_from_ad_page()
|
await extractor._extract_contact_from_ad_page()
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
async def test_extract_contact_info_with_phone(self, extractor: AdExtractor) -> None:
|
async def test_extract_contact_info_with_phone(self, extractor:AdExtractor) -> None:
|
||||||
"""Test extraction of contact information including phone number."""
|
"""Test extraction of contact information including phone number."""
|
||||||
with patch.object(extractor, 'page', MagicMock()), \
|
with patch.object(extractor, "page", MagicMock()), \
|
||||||
patch.object(extractor, 'web_text', new_callable = AsyncMock) as mock_web_text, \
|
patch.object(extractor, "web_text", new_callable = AsyncMock) as mock_web_text, \
|
||||||
patch.object(extractor, 'web_find', new_callable = AsyncMock) as mock_web_find:
|
patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find:
|
||||||
|
|
||||||
mock_web_text.side_effect = [
|
mock_web_text.side_effect = [
|
||||||
"12345 Berlin - Mitte",
|
"12345 Berlin - Mitte",
|
||||||
@@ -588,19 +588,19 @@ class TestAdExtractorDownload:
|
|||||||
return AdExtractor(browser_mock, config_mock)
|
return AdExtractor(browser_mock, config_mock)
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_download_ad_existing_directory(self, extractor: AdExtractor) -> None:
|
async def test_download_ad_existing_directory(self, extractor:AdExtractor) -> None:
|
||||||
"""Test downloading an ad when the directory already exists."""
|
"""Test downloading an ad when the directory already exists."""
|
||||||
with patch('os.path.exists') as mock_exists, \
|
with patch("os.path.exists") as mock_exists, \
|
||||||
patch('os.path.isdir') as mock_isdir, \
|
patch("os.path.isdir") as mock_isdir, \
|
||||||
patch('os.makedirs') as mock_makedirs, \
|
patch("os.makedirs") as mock_makedirs, \
|
||||||
patch('os.mkdir') as mock_mkdir, \
|
patch("os.mkdir") as mock_mkdir, \
|
||||||
patch('shutil.rmtree') as mock_rmtree, \
|
patch("shutil.rmtree") as mock_rmtree, \
|
||||||
patch('kleinanzeigen_bot.extract.dicts.save_dict', autospec = True) as mock_save_dict, \
|
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
||||||
patch.object(extractor, '_extract_ad_page_info', new_callable = AsyncMock) as mock_extract:
|
patch.object(extractor, "_extract_ad_page_info", new_callable = AsyncMock) as mock_extract:
|
||||||
|
|
||||||
base_dir = 'downloaded-ads'
|
base_dir = "downloaded-ads"
|
||||||
ad_dir = os.path.join(base_dir, 'ad_12345')
|
ad_dir = os.path.join(base_dir, "ad_12345")
|
||||||
yaml_path = os.path.join(ad_dir, 'ad_12345.yaml')
|
yaml_path = os.path.join(ad_dir, "ad_12345.yaml")
|
||||||
|
|
||||||
# Configure mocks for directory checks
|
# Configure mocks for directory checks
|
||||||
existing_paths = {base_dir, ad_dir}
|
existing_paths = {base_dir, ad_dir}
|
||||||
@@ -632,32 +632,32 @@ class TestAdExtractorDownload:
|
|||||||
# Workaround for hard-coded path in download_ad
|
# Workaround for hard-coded path in download_ad
|
||||||
actual_call = mock_save_dict.call_args
|
actual_call = mock_save_dict.call_args
|
||||||
assert actual_call is not None
|
assert actual_call is not None
|
||||||
actual_path = actual_call[0][0].replace('/', os.path.sep)
|
actual_path = actual_call[0][0].replace("/", os.path.sep)
|
||||||
assert actual_path == yaml_path
|
assert actual_path == yaml_path
|
||||||
assert actual_call[0][1] == mock_extract.return_value
|
assert actual_call[0][1] == mock_extract.return_value
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
async def test_download_images_no_images(self, extractor: AdExtractor) -> None:
|
async def test_download_images_no_images(self, extractor:AdExtractor) -> None:
|
||||||
"""Test image download when no images are found."""
|
"""Test image download when no images are found."""
|
||||||
with patch.object(extractor, 'web_find', new_callable = AsyncMock, side_effect = TimeoutError):
|
with patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError):
|
||||||
image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345)
|
image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345)
|
||||||
assert len(image_paths) == 0
|
assert len(image_paths) == 0
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_download_ad(self, extractor: AdExtractor) -> None:
|
async def test_download_ad(self, extractor:AdExtractor) -> None:
|
||||||
"""Test downloading an entire ad."""
|
"""Test downloading an entire ad."""
|
||||||
with patch('os.path.exists') as mock_exists, \
|
with patch("os.path.exists") as mock_exists, \
|
||||||
patch('os.path.isdir') as mock_isdir, \
|
patch("os.path.isdir") as mock_isdir, \
|
||||||
patch('os.makedirs') as mock_makedirs, \
|
patch("os.makedirs") as mock_makedirs, \
|
||||||
patch('os.mkdir') as mock_mkdir, \
|
patch("os.mkdir") as mock_mkdir, \
|
||||||
patch('shutil.rmtree') as mock_rmtree, \
|
patch("shutil.rmtree") as mock_rmtree, \
|
||||||
patch('kleinanzeigen_bot.extract.dicts.save_dict', autospec = True) as mock_save_dict, \
|
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
||||||
patch.object(extractor, '_extract_ad_page_info', new_callable = AsyncMock) as mock_extract:
|
patch.object(extractor, "_extract_ad_page_info", new_callable = AsyncMock) as mock_extract:
|
||||||
|
|
||||||
base_dir = 'downloaded-ads'
|
base_dir = "downloaded-ads"
|
||||||
ad_dir = os.path.join(base_dir, 'ad_12345')
|
ad_dir = os.path.join(base_dir, "ad_12345")
|
||||||
yaml_path = os.path.join(ad_dir, 'ad_12345.yaml')
|
yaml_path = os.path.join(ad_dir, "ad_12345.yaml")
|
||||||
|
|
||||||
# Configure mocks for directory checks
|
# Configure mocks for directory checks
|
||||||
mock_exists.return_value = False
|
mock_exists.return_value = False
|
||||||
@@ -690,6 +690,6 @@ class TestAdExtractorDownload:
|
|||||||
# Get the actual call arguments
|
# Get the actual call arguments
|
||||||
actual_call = mock_save_dict.call_args
|
actual_call = mock_save_dict.call_args
|
||||||
assert actual_call is not None
|
assert actual_call is not None
|
||||||
actual_path = actual_call[0][0].replace('/', os.path.sep)
|
actual_path = actual_call[0][0].replace("/", os.path.sep)
|
||||||
assert actual_path == yaml_path
|
assert actual_path == yaml_path
|
||||||
assert actual_call[0][1] == mock_extract.return_value
|
assert actual_call[0][1] == mock_extract.return_value
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from kleinanzeigen_bot.utils import i18n
|
|||||||
("fr_CA", ("fr", "CA", "UTF-8")), # Test with language + region, no encoding
|
("fr_CA", ("fr", "CA", "UTF-8")), # Test with language + region, no encoding
|
||||||
("pt_BR.iso8859-1", ("pt", "BR", "ISO8859-1")), # Test with language + region + encoding
|
("pt_BR.iso8859-1", ("pt", "BR", "ISO8859-1")), # Test with language + region + encoding
|
||||||
])
|
])
|
||||||
def test_detect_locale(monkeypatch: MonkeyPatch, lang: str | None, expected: i18n.Locale) -> None:
|
def test_detect_locale(monkeypatch:MonkeyPatch, lang:str | None, expected:i18n.Locale) -> None:
|
||||||
"""
|
"""
|
||||||
Pytest test case to verify detect_system_language() behavior under various LANG values.
|
Pytest test case to verify detect_system_language() behavior under various LANG values.
|
||||||
"""
|
"""
|
||||||
@@ -49,7 +49,7 @@ def test_pluralize(
|
|||||||
noun:str,
|
noun:str,
|
||||||
count:int,
|
count:int,
|
||||||
prefix_with_count:bool,
|
prefix_with_count:bool,
|
||||||
expected: str
|
expected:str
|
||||||
) -> None:
|
) -> None:
|
||||||
i18n.set_current_locale(i18n.Locale(lang, "US", "UTF_8"))
|
i18n.set_current_locale(i18n.Locale(lang, "US", "UTF_8"))
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -26,12 +26,12 @@ from ruamel.yaml import YAML
|
|||||||
from kleinanzeigen_bot import resources
|
from kleinanzeigen_bot import resources
|
||||||
|
|
||||||
# Messages that are intentionally not translated (internal/debug messages)
|
# Messages that are intentionally not translated (internal/debug messages)
|
||||||
EXCLUDED_MESSAGES: dict[str, set[str]] = {
|
EXCLUDED_MESSAGES:dict[str, set[str]] = {
|
||||||
"kleinanzeigen_bot/__init__.py": {"############################################"}
|
"kleinanzeigen_bot/__init__.py": {"############################################"}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Special modules that are known to be needed even if not in messages_by_file
|
# Special modules that are known to be needed even if not in messages_by_file
|
||||||
KNOWN_NEEDED_MODULES = {'getopt.py'}
|
KNOWN_NEEDED_MODULES = {"getopt.py"}
|
||||||
|
|
||||||
# Type aliases for better readability
|
# Type aliases for better readability
|
||||||
ModulePath = str
|
ModulePath = str
|
||||||
@@ -45,12 +45,12 @@ MissingDict = dict[FunctionName, dict[Message, set[Message]]]
|
|||||||
@dataclass
|
@dataclass
|
||||||
class MessageLocation:
|
class MessageLocation:
|
||||||
"""Represents the location of a message in the codebase."""
|
"""Represents the location of a message in the codebase."""
|
||||||
module: str
|
module:str
|
||||||
function: str
|
function:str
|
||||||
message: str
|
message:str
|
||||||
|
|
||||||
|
|
||||||
def _get_function_name(node: ast.AST) -> str:
|
def _get_function_name(node:ast.AST) -> str:
|
||||||
"""
|
"""
|
||||||
Get the name of the function containing this AST node.
|
Get the name of the function containing this AST node.
|
||||||
This matches i18n.py's behavior which only uses the function name for translation lookups.
|
This matches i18n.py's behavior which only uses the function name for translation lookups.
|
||||||
@@ -63,14 +63,14 @@ def _get_function_name(node: ast.AST) -> str:
|
|||||||
The function name or "module" for module-level code
|
The function name or "module" for module-level code
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def find_parent_context(n: ast.AST) -> tuple[str | None, str | None]:
|
def find_parent_context(n:ast.AST) -> tuple[str | None, str | None]:
|
||||||
"""Find the containing class and function names."""
|
"""Find the containing class and function names."""
|
||||||
class_name = None
|
class_name = None
|
||||||
function_name = None
|
function_name = None
|
||||||
current = n
|
current = n
|
||||||
|
|
||||||
while hasattr(current, '_parent'):
|
while hasattr(current, "_parent"):
|
||||||
current = getattr(current, '_parent')
|
current = getattr(current, "_parent")
|
||||||
if isinstance(current, ast.ClassDef) and not class_name:
|
if isinstance(current, ast.ClassDef) and not class_name:
|
||||||
class_name = current.name
|
class_name = current.name
|
||||||
elif isinstance(current, ast.FunctionDef) or isinstance(current, ast.AsyncFunctionDef) and not function_name:
|
elif isinstance(current, ast.FunctionDef) or isinstance(current, ast.AsyncFunctionDef) and not function_name:
|
||||||
@@ -84,7 +84,7 @@ def _get_function_name(node: ast.AST) -> str:
|
|||||||
return "module" # For module-level code
|
return "module" # For module-level code
|
||||||
|
|
||||||
|
|
||||||
def _extract_log_messages(file_path: str, exclude_debug:bool = False) -> MessageDict:
|
def _extract_log_messages(file_path:str, exclude_debug:bool = False) -> MessageDict:
|
||||||
"""
|
"""
|
||||||
Extract all translatable messages from a Python file with their function context.
|
Extract all translatable messages from a Python file with their function context.
|
||||||
|
|
||||||
@@ -94,27 +94,27 @@ def _extract_log_messages(file_path: str, exclude_debug:bool = False) -> Message
|
|||||||
Returns:
|
Returns:
|
||||||
Dictionary mapping function names to their messages
|
Dictionary mapping function names to their messages
|
||||||
"""
|
"""
|
||||||
with open(file_path, 'r', encoding = 'utf-8') as file:
|
with open(file_path, "r", encoding = "utf-8") as file:
|
||||||
tree = ast.parse(file.read(), filename = file_path)
|
tree = ast.parse(file.read(), filename = file_path)
|
||||||
|
|
||||||
# Add parent references for context tracking
|
# Add parent references for context tracking
|
||||||
for parent in ast.walk(tree):
|
for parent in ast.walk(tree):
|
||||||
for child in ast.iter_child_nodes(parent):
|
for child in ast.iter_child_nodes(parent):
|
||||||
setattr(child, '_parent', parent)
|
setattr(child, "_parent", parent)
|
||||||
|
|
||||||
messages: MessageDict = defaultdict(lambda: defaultdict(set))
|
messages:MessageDict = defaultdict(lambda: defaultdict(set))
|
||||||
|
|
||||||
def add_message(function: str, msg: str) -> None:
|
def add_message(function:str, msg:str) -> None:
|
||||||
"""Add a message to the messages dictionary."""
|
"""Add a message to the messages dictionary."""
|
||||||
if function not in messages:
|
if function not in messages:
|
||||||
messages[function] = defaultdict(set)
|
messages[function] = defaultdict(set)
|
||||||
if msg not in messages[function]:
|
if msg not in messages[function]:
|
||||||
messages[function][msg] = {msg}
|
messages[function][msg] = {msg}
|
||||||
|
|
||||||
def extract_string_value(node: ast.AST) -> str | None:
|
def extract_string_value(node:ast.AST) -> str | None:
|
||||||
"""Safely extract string value from an AST node."""
|
"""Safely extract string value from an AST node."""
|
||||||
if isinstance(node, ast.Constant):
|
if isinstance(node, ast.Constant):
|
||||||
value = getattr(node, 'value', None)
|
value = getattr(node, "value", None)
|
||||||
return value if isinstance(value, str) else None
|
return value if isinstance(value, str) else None
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -127,24 +127,24 @@ def _extract_log_messages(file_path: str, exclude_debug:bool = False) -> Message
|
|||||||
# Extract messages from various call types
|
# Extract messages from various call types
|
||||||
if (isinstance(node.func, ast.Attribute) and
|
if (isinstance(node.func, ast.Attribute) and
|
||||||
isinstance(node.func.value, ast.Name) and
|
isinstance(node.func.value, ast.Name) and
|
||||||
node.func.value.id in {'LOG', 'logger', 'logging'} and
|
node.func.value.id in {"LOG", "logger", "logging"} and
|
||||||
node.func.attr in {None if exclude_debug else 'debug', 'info', 'warning', 'error', 'exception', 'critical'}):
|
node.func.attr in {None if exclude_debug else "debug", "info", "warning", "error", "exception", "critical"}):
|
||||||
if node.args:
|
if node.args:
|
||||||
msg = extract_string_value(node.args[0])
|
msg = extract_string_value(node.args[0])
|
||||||
if msg:
|
if msg:
|
||||||
add_message(function_name, msg)
|
add_message(function_name, msg)
|
||||||
|
|
||||||
# Handle gettext calls
|
# Handle gettext calls
|
||||||
elif ((isinstance(node.func, ast.Name) and node.func.id == '_') or
|
elif ((isinstance(node.func, ast.Name) and node.func.id == "_") or
|
||||||
(isinstance(node.func, ast.Attribute) and node.func.attr == 'gettext')):
|
(isinstance(node.func, ast.Attribute) and node.func.attr == "gettext")):
|
||||||
if node.args:
|
if node.args:
|
||||||
msg = extract_string_value(node.args[0])
|
msg = extract_string_value(node.args[0])
|
||||||
if msg:
|
if msg:
|
||||||
add_message(function_name, msg)
|
add_message(function_name, msg)
|
||||||
|
|
||||||
# Handle other translatable function calls
|
# Handle other translatable function calls
|
||||||
elif isinstance(node.func, ast.Name) and node.func.id in {'ainput', 'pluralize', 'ensure'}:
|
elif isinstance(node.func, ast.Name) and node.func.id in {"ainput", "pluralize", "ensure"}:
|
||||||
arg_index = 0 if node.func.id == 'ainput' else 1
|
arg_index = 0 if node.func.id == "ainput" else 1
|
||||||
if len(node.args) > arg_index:
|
if len(node.args) > arg_index:
|
||||||
msg = extract_string_value(node.args[arg_index])
|
msg = extract_string_value(node.args[arg_index])
|
||||||
if msg:
|
if msg:
|
||||||
@@ -162,10 +162,10 @@ def _get_all_log_messages(exclude_debug:bool = False) -> dict[str, MessageDict]:
|
|||||||
Returns:
|
Returns:
|
||||||
Dictionary mapping module paths to their function messages
|
Dictionary mapping module paths to their function messages
|
||||||
"""
|
"""
|
||||||
src_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'src', 'kleinanzeigen_bot')
|
src_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "src", "kleinanzeigen_bot")
|
||||||
print(f"\nScanning for messages in directory: {src_dir}")
|
print(f"\nScanning for messages in directory: {src_dir}")
|
||||||
|
|
||||||
messages_by_file: dict[str, MessageDict] = {
|
messages_by_file:dict[str, MessageDict] = {
|
||||||
# Special case for getopt.py which is imported
|
# Special case for getopt.py which is imported
|
||||||
"getopt.py": {
|
"getopt.py": {
|
||||||
"do_longs": {
|
"do_longs": {
|
||||||
@@ -187,15 +187,15 @@ def _get_all_log_messages(exclude_debug:bool = False) -> dict[str, MessageDict]:
|
|||||||
|
|
||||||
for root, _, filenames in os.walk(src_dir):
|
for root, _, filenames in os.walk(src_dir):
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
if filename.endswith('.py'):
|
if filename.endswith(".py"):
|
||||||
file_path = os.path.join(root, filename)
|
file_path = os.path.join(root, filename)
|
||||||
relative_path = os.path.relpath(file_path, src_dir)
|
relative_path = os.path.relpath(file_path, src_dir)
|
||||||
if relative_path.startswith('resources/'):
|
if relative_path.startswith("resources/"):
|
||||||
continue
|
continue
|
||||||
messages = _extract_log_messages(file_path, exclude_debug)
|
messages = _extract_log_messages(file_path, exclude_debug)
|
||||||
if messages:
|
if messages:
|
||||||
module_path = os.path.join('kleinanzeigen_bot', relative_path)
|
module_path = os.path.join("kleinanzeigen_bot", relative_path)
|
||||||
module_path = module_path.replace(os.sep, '/')
|
module_path = module_path.replace(os.sep, "/")
|
||||||
messages_by_file[module_path] = messages
|
messages_by_file[module_path] = messages
|
||||||
|
|
||||||
return messages_by_file
|
return messages_by_file
|
||||||
@@ -217,7 +217,7 @@ def _get_available_languages() -> list[str]:
|
|||||||
return sorted(languages)
|
return sorted(languages)
|
||||||
|
|
||||||
|
|
||||||
def _get_translations_for_language(lang: str) -> TranslationDict:
|
def _get_translations_for_language(lang:str) -> TranslationDict:
|
||||||
"""
|
"""
|
||||||
Get translations for a specific language from its YAML file.
|
Get translations for a specific language from its YAML file.
|
||||||
|
|
||||||
@@ -227,7 +227,7 @@ def _get_translations_for_language(lang: str) -> TranslationDict:
|
|||||||
Returns:
|
Returns:
|
||||||
Dictionary containing all translations for the language
|
Dictionary containing all translations for the language
|
||||||
"""
|
"""
|
||||||
yaml = YAML(typ = 'safe')
|
yaml = YAML(typ = "safe")
|
||||||
translation_file = f"translations.{lang}.yaml"
|
translation_file = f"translations.{lang}.yaml"
|
||||||
print(f"Loading translations from {translation_file}")
|
print(f"Loading translations from {translation_file}")
|
||||||
content = files(resources).joinpath(translation_file).read_text()
|
content = files(resources).joinpath(translation_file).read_text()
|
||||||
@@ -235,10 +235,10 @@ def _get_translations_for_language(lang: str) -> TranslationDict:
|
|||||||
return translations
|
return translations
|
||||||
|
|
||||||
|
|
||||||
def _find_translation(translations: TranslationDict,
|
def _find_translation(translations:TranslationDict,
|
||||||
module: str,
|
module:str,
|
||||||
function: str,
|
function:str,
|
||||||
message: str) -> bool:
|
message:str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if a translation exists for a given message in the exact location where i18n.py will look.
|
Check if a translation exists for a given message in the exact location where i18n.py will look.
|
||||||
This matches the lookup logic in i18n.py which uses dicts.safe_get().
|
This matches the lookup logic in i18n.py which uses dicts.safe_get().
|
||||||
@@ -253,11 +253,11 @@ def _find_translation(translations: TranslationDict,
|
|||||||
True if translation exists in the correct location, False otherwise
|
True if translation exists in the correct location, False otherwise
|
||||||
"""
|
"""
|
||||||
# Special case for getopt.py
|
# Special case for getopt.py
|
||||||
if module == 'getopt.py':
|
if module == "getopt.py":
|
||||||
return bool(translations.get(module, {}).get(function, {}).get(message))
|
return bool(translations.get(module, {}).get(function, {}).get(message))
|
||||||
|
|
||||||
# Add kleinanzeigen_bot/ prefix if not present
|
# Add kleinanzeigen_bot/ prefix if not present
|
||||||
module_path = f'kleinanzeigen_bot/{module}' if not module.startswith('kleinanzeigen_bot/') else module
|
module_path = f'kleinanzeigen_bot/{module}' if not module.startswith("kleinanzeigen_bot/") else module
|
||||||
|
|
||||||
# Check if module exists in translations
|
# Check if module exists in translations
|
||||||
module_trans = translations.get(module_path, {})
|
module_trans = translations.get(module_path, {})
|
||||||
@@ -277,10 +277,10 @@ def _find_translation(translations: TranslationDict,
|
|||||||
return has_translation
|
return has_translation
|
||||||
|
|
||||||
|
|
||||||
def _message_exists_in_code(code_messages: dict[str, MessageDict],
|
def _message_exists_in_code(code_messages:dict[str, MessageDict],
|
||||||
module: str,
|
module:str,
|
||||||
function: str,
|
function:str,
|
||||||
message: str) -> bool:
|
message:str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if a message exists in the code at the given location.
|
Check if a message exists in the code at the given location.
|
||||||
This is the reverse of _find_translation - it checks if a translation's message
|
This is the reverse of _find_translation - it checks if a translation's message
|
||||||
@@ -296,11 +296,11 @@ def _message_exists_in_code(code_messages: dict[str, MessageDict],
|
|||||||
True if message exists in the code, False otherwise
|
True if message exists in the code, False otherwise
|
||||||
"""
|
"""
|
||||||
# Special case for getopt.py
|
# Special case for getopt.py
|
||||||
if module == 'getopt.py':
|
if module == "getopt.py":
|
||||||
return bool(code_messages.get(module, {}).get(function, {}).get(message))
|
return bool(code_messages.get(module, {}).get(function, {}).get(message))
|
||||||
|
|
||||||
# Remove kleinanzeigen_bot/ prefix if present for code message lookup
|
# Remove kleinanzeigen_bot/ prefix if present for code message lookup
|
||||||
module_path = module[len('kleinanzeigen_bot/'):] if module.startswith('kleinanzeigen_bot/') else module
|
module_path = module[len("kleinanzeigen_bot/"):] if module.startswith("kleinanzeigen_bot/") else module
|
||||||
module_path = f'kleinanzeigen_bot/{module_path}'
|
module_path = f'kleinanzeigen_bot/{module_path}'
|
||||||
|
|
||||||
# Check if module exists in code messages
|
# Check if module exists in code messages
|
||||||
@@ -318,7 +318,7 @@ def _message_exists_in_code(code_messages: dict[str, MessageDict],
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("lang", _get_available_languages())
|
@pytest.mark.parametrize("lang", _get_available_languages())
|
||||||
def test_all_log_messages_have_translations(lang: str) -> None:
|
def test_all_log_messages_have_translations(lang:str) -> None:
|
||||||
"""
|
"""
|
||||||
Test that all translatable messages in the code have translations for each language.
|
Test that all translatable messages in the code have translations for each language.
|
||||||
|
|
||||||
@@ -345,7 +345,7 @@ def test_all_log_messages_have_translations(lang: str) -> None:
|
|||||||
def make_inner_dict() -> defaultdict[str, set[str]]:
|
def make_inner_dict() -> defaultdict[str, set[str]]:
|
||||||
return defaultdict(set)
|
return defaultdict(set)
|
||||||
|
|
||||||
by_module: defaultdict[str, defaultdict[str, set[str]]] = defaultdict(make_inner_dict)
|
by_module:defaultdict[str, defaultdict[str, set[str]]] = defaultdict(make_inner_dict)
|
||||||
|
|
||||||
for loc in missing_translations:
|
for loc in missing_translations:
|
||||||
assert isinstance(loc.module, str), "Module must be a string"
|
assert isinstance(loc.module, str), "Module must be a string"
|
||||||
@@ -364,7 +364,7 @@ def test_all_log_messages_have_translations(lang: str) -> None:
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("lang", _get_available_languages())
|
@pytest.mark.parametrize("lang", _get_available_languages())
|
||||||
def test_no_obsolete_translations(lang: str) -> None:
|
def test_no_obsolete_translations(lang:str) -> None:
|
||||||
"""
|
"""
|
||||||
Test that all translations in each language YAML file are actually used in the code.
|
Test that all translations in each language YAML file are actually used in the code.
|
||||||
|
|
||||||
@@ -376,7 +376,7 @@ def test_no_obsolete_translations(lang: str) -> None:
|
|||||||
"""
|
"""
|
||||||
messages_by_file = _get_all_log_messages(exclude_debug = False)
|
messages_by_file = _get_all_log_messages(exclude_debug = False)
|
||||||
translations = _get_translations_for_language(lang)
|
translations = _get_translations_for_language(lang)
|
||||||
obsolete_items: list[tuple[str, str, str]] = []
|
obsolete_items:list[tuple[str, str, str]] = []
|
||||||
|
|
||||||
for module, module_trans in translations.items():
|
for module, module_trans in translations.items():
|
||||||
if not isinstance(module_trans, dict):
|
if not isinstance(module_trans, dict):
|
||||||
@@ -402,7 +402,7 @@ def test_no_obsolete_translations(lang: str) -> None:
|
|||||||
obsolete_str = f"\nObsolete translations found for language [{lang}]:\n"
|
obsolete_str = f"\nObsolete translations found for language [{lang}]:\n"
|
||||||
|
|
||||||
# Group by module and function for better readability
|
# Group by module and function for better readability
|
||||||
by_module: defaultdict[str, defaultdict[str, list[str]]] = defaultdict(lambda: defaultdict(list))
|
by_module:defaultdict[str, defaultdict[str, list[str]]] = defaultdict(lambda: defaultdict(list))
|
||||||
|
|
||||||
for module, function, message in obsolete_items:
|
for module, function, message in obsolete_items:
|
||||||
by_module[module][function].append(message)
|
by_module[module][function].append(message)
|
||||||
|
|||||||
Reference in New Issue
Block a user