mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
2404 lines
115 KiB
Python
2404 lines
115 KiB
Python
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||
import atexit, asyncio, enum, json, os, re, signal, sys, textwrap # isort: skip
|
||
import getopt # pylint: disable=deprecated-module
|
||
import urllib.parse as urllib_parse
|
||
from datetime import datetime
|
||
from gettext import gettext as _
|
||
from pathlib import Path
|
||
from typing import Any, Final, Sequence, cast
|
||
|
||
import certifi, colorama, nodriver # isort: skip
|
||
from nodriver.core.connection import ProtocolException
|
||
from ruamel.yaml import YAML
|
||
from wcmatch import glob
|
||
|
||
from . import extract, resources
|
||
from ._version import __version__
|
||
from .model.ad_model import MAX_DESCRIPTION_LENGTH, Ad, AdPartial, Contact, calculate_auto_price, calculate_auto_price_with_trace
|
||
from .model.config_model import Config
|
||
from .update_checker import UpdateChecker
|
||
from .utils import diagnostics, dicts, error_handlers, loggers, misc, xdg_paths
|
||
from .utils.exceptions import CaptchaEncountered
|
||
from .utils.files import abspath
|
||
from .utils.i18n import Locale, get_current_locale, pluralize, set_current_locale
|
||
from .utils.misc import ainput, ensure, is_frozen
|
||
from .utils.timing_collector import TimingCollector
|
||
from .utils.web_scraping_mixin import By, Element, Is, WebScrapingMixin
|
||
|
||
# W0406: possibly a bug, see https://github.com/PyCQA/pylint/issues/3933
|
||
|
||
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
|
||
LOG.setLevel(loggers.INFO)
|
||
|
||
PUBLISH_MAX_RETRIES:Final[int] = 3
|
||
_NUMERIC_IDS_RE:Final[re.Pattern[str]] = re.compile(r"^\d+(,\d+)*$")
|
||
_LOGIN_DETECTION_SELECTORS:Final[list[tuple["By", str]]] = [
|
||
(By.CLASS_NAME, "mr-medium"),
|
||
(By.ID, "user-email"),
|
||
]
|
||
_LOGIN_DETECTION_SELECTOR_LABELS:Final[tuple[str, ...]] = ("user_info_primary", "user_info_secondary")
|
||
|
||
colorama.just_fix_windows_console()
|
||
|
||
|
||
def _format_login_detection_selectors(selectors:Sequence[tuple["By", str]]) -> str:
|
||
return ", ".join(f"{selector_type.name}={selector_value}" for selector_type, selector_value in selectors)
|
||
|
||
|
||
class AdUpdateStrategy(enum.Enum):
|
||
REPLACE = enum.auto()
|
||
MODIFY = enum.auto()
|
||
|
||
|
||
class LoginState(enum.Enum):
|
||
LOGGED_IN = enum.auto()
|
||
LOGGED_OUT = enum.auto()
|
||
UNKNOWN = enum.auto()
|
||
|
||
|
||
def _repost_cycle_ready(
|
||
ad_cfg:Ad,
|
||
ad_file_relative:str,
|
||
repost_state:tuple[int, int, int, int] | None = None,
|
||
) -> bool:
|
||
"""
|
||
Check if the repost cycle delay has been satisfied.
|
||
|
||
:param ad_cfg: The ad configuration
|
||
:param ad_file_relative: Relative path to the ad file for logging
|
||
:param repost_state: Optional precomputed repost-delay state tuple
|
||
:return: True if ready to apply price reduction, False otherwise
|
||
"""
|
||
total_reposts, delay_reposts, applied_cycles, eligible_cycles = repost_state or _repost_delay_state(ad_cfg)
|
||
|
||
if total_reposts <= delay_reposts:
|
||
remaining = (delay_reposts + 1) - total_reposts
|
||
LOG.info(
|
||
"Auto price reduction delayed for [%s]: waiting %s more reposts (completed %s, applied %s reductions)",
|
||
ad_file_relative,
|
||
max(remaining, 1), # Clamp to 1 to avoid showing "0 more reposts" when at threshold
|
||
total_reposts,
|
||
applied_cycles,
|
||
)
|
||
return False
|
||
|
||
if eligible_cycles <= applied_cycles:
|
||
LOG.info(
|
||
"Auto price reduction already applied for [%s]: %s reductions match %s eligible reposts", ad_file_relative, applied_cycles, eligible_cycles
|
||
)
|
||
return False
|
||
|
||
return True
|
||
|
||
|
||
def _day_delay_elapsed(
|
||
ad_cfg:Ad,
|
||
ad_file_relative:str,
|
||
day_delay_state:tuple[bool, int | None, datetime | None] | None = None,
|
||
) -> bool:
|
||
"""
|
||
Check if the day delay has elapsed since the ad was last published.
|
||
|
||
:param ad_cfg: The ad configuration
|
||
:param ad_file_relative: Relative path to the ad file for logging
|
||
:param day_delay_state: Optional precomputed day-delay state tuple
|
||
:return: True if the delay has elapsed, False otherwise
|
||
"""
|
||
delay_days = ad_cfg.auto_price_reduction.delay_days
|
||
ready, elapsed_days, reference = day_delay_state or _day_delay_state(ad_cfg)
|
||
|
||
if delay_days == 0:
|
||
return True
|
||
|
||
if not reference:
|
||
LOG.info("Auto price reduction delayed for [%s]: waiting %s days but publish timestamp missing", ad_file_relative, delay_days)
|
||
return False
|
||
|
||
if not ready and elapsed_days is not None:
|
||
LOG.info("Auto price reduction delayed for [%s]: waiting %s days (elapsed %s)", ad_file_relative, delay_days, elapsed_days)
|
||
return False
|
||
|
||
return True
|
||
|
||
|
||
def _repost_delay_state(ad_cfg:Ad) -> tuple[int, int, int, int]:
|
||
"""Return repost-delay state tuple.
|
||
|
||
Returns:
|
||
tuple[int, int, int, int]:
|
||
(total_reposts, delay_reposts, applied_cycles, eligible_cycles)
|
||
"""
|
||
total_reposts = ad_cfg.repost_count or 0
|
||
delay_reposts = ad_cfg.auto_price_reduction.delay_reposts
|
||
applied_cycles = ad_cfg.price_reduction_count or 0
|
||
eligible_cycles = max(total_reposts - delay_reposts, 0)
|
||
return total_reposts, delay_reposts, applied_cycles, eligible_cycles
|
||
|
||
|
||
def _day_delay_state(ad_cfg:Ad) -> tuple[bool, int | None, datetime | None]:
|
||
"""Return day-delay state tuple.
|
||
|
||
Returns:
|
||
tuple[bool, int | None, datetime | None]:
|
||
(ready_flag, elapsed_days_or_none, reference_timestamp_or_none)
|
||
"""
|
||
delay_days = ad_cfg.auto_price_reduction.delay_days
|
||
# Use getattr to support lightweight test doubles without these attributes.
|
||
reference = getattr(ad_cfg, "updated_on", None) or getattr(ad_cfg, "created_on", None)
|
||
if delay_days == 0:
|
||
return True, 0, reference
|
||
|
||
if not reference:
|
||
return False, None, None
|
||
|
||
# Note: .days truncates to whole days (e.g., 1.9 days -> 1 day)
|
||
# This is intentional: delays count complete 24-hour periods since publish
|
||
# Both misc.now() and stored timestamps use UTC (via misc.now()), ensuring consistent calculations
|
||
elapsed_days = (misc.now() - reference).days
|
||
return elapsed_days >= delay_days, elapsed_days, reference
|
||
|
||
|
||
def _relative_ad_path(ad_file:str, config_file_path:str) -> str:
|
||
"""Compute an ad file path relative to the config directory, falling back to the absolute path."""
|
||
try:
|
||
return str(Path(ad_file).relative_to(Path(config_file_path).parent))
|
||
except ValueError:
|
||
return ad_file
|
||
|
||
|
||
def apply_auto_price_reduction(ad_cfg:Ad, _ad_cfg_orig:dict[str, Any], ad_file_relative:str) -> None:
|
||
"""
|
||
Apply automatic price reduction to an ad based on repost count and configuration.
|
||
|
||
This function modifies ad_cfg in-place, updating the price and price_reduction_count
|
||
fields when a reduction is applicable.
|
||
|
||
:param ad_cfg: The ad configuration to potentially modify
|
||
:param _ad_cfg_orig: The original ad configuration (unused, kept for compatibility)
|
||
:param ad_file_relative: Relative path to the ad file for logging
|
||
"""
|
||
if not ad_cfg.auto_price_reduction.enabled:
|
||
LOG.debug("Auto price reduction: not configured for [%s]", ad_file_relative)
|
||
return
|
||
|
||
base_price = ad_cfg.price
|
||
if base_price is None:
|
||
LOG.warning("Auto price reduction is enabled for [%s] but no price is configured.", ad_file_relative)
|
||
return
|
||
|
||
if ad_cfg.auto_price_reduction.min_price is not None and ad_cfg.auto_price_reduction.min_price == base_price:
|
||
LOG.warning("Auto price reduction is enabled for [%s] but min_price equals price (%s) - no reductions will occur.", ad_file_relative, base_price)
|
||
return
|
||
|
||
repost_state = _repost_delay_state(ad_cfg)
|
||
day_delay_state = _day_delay_state(ad_cfg)
|
||
total_reposts, delay_reposts, applied_cycles, eligible_cycles = repost_state
|
||
_, elapsed_days, reference = day_delay_state
|
||
delay_days = ad_cfg.auto_price_reduction.delay_days
|
||
elapsed_display = "missing" if elapsed_days is None else str(elapsed_days)
|
||
reference_display = "missing" if reference is None else reference.isoformat(timespec = "seconds")
|
||
|
||
if not _repost_cycle_ready(ad_cfg, ad_file_relative, repost_state = repost_state):
|
||
next_repost = delay_reposts + 1 if total_reposts <= delay_reposts else delay_reposts + applied_cycles + 1
|
||
LOG.debug(
|
||
"Auto price reduction decision for [%s]: skipped (repost delay). next reduction earliest at repost >= %s and day delay %s/%s days."
|
||
" repost_count=%s eligible_cycles=%s applied_cycles=%s reference=%s",
|
||
ad_file_relative,
|
||
next_repost,
|
||
elapsed_display,
|
||
delay_days,
|
||
total_reposts,
|
||
eligible_cycles,
|
||
applied_cycles,
|
||
reference_display,
|
||
)
|
||
return
|
||
|
||
if not _day_delay_elapsed(ad_cfg, ad_file_relative, day_delay_state = day_delay_state):
|
||
LOG.debug(
|
||
"Auto price reduction decision for [%s]: skipped (day delay). next reduction earliest when elapsed_days >= %s."
|
||
" elapsed_days=%s repost_count=%s eligible_cycles=%s applied_cycles=%s reference=%s",
|
||
ad_file_relative,
|
||
delay_days,
|
||
elapsed_display,
|
||
total_reposts,
|
||
eligible_cycles,
|
||
applied_cycles,
|
||
reference_display,
|
||
)
|
||
return
|
||
|
||
LOG.debug(
|
||
"Auto price reduction decision for [%s]: applying now (eligible_cycles=%s, applied_cycles=%s, elapsed_days=%s/%s).",
|
||
ad_file_relative,
|
||
eligible_cycles,
|
||
applied_cycles,
|
||
elapsed_display,
|
||
delay_days,
|
||
)
|
||
|
||
next_cycle = applied_cycles + 1
|
||
|
||
if loggers.is_debug(LOG):
|
||
effective_price, reduction_steps, price_floor = calculate_auto_price_with_trace(
|
||
base_price = base_price,
|
||
auto_price_reduction = ad_cfg.auto_price_reduction,
|
||
target_reduction_cycle = next_cycle,
|
||
)
|
||
LOG.debug(
|
||
"Auto price reduction trace for [%s]: strategy=%s amount=%s floor=%s target_cycle=%s base_price=%s",
|
||
ad_file_relative,
|
||
ad_cfg.auto_price_reduction.strategy,
|
||
ad_cfg.auto_price_reduction.amount,
|
||
price_floor,
|
||
next_cycle,
|
||
base_price,
|
||
)
|
||
for step in reduction_steps:
|
||
LOG.debug(
|
||
" -> cycle=%s before=%s reduction=%s after_rounding=%s floor_applied=%s",
|
||
step.cycle,
|
||
step.price_before,
|
||
step.reduction_value,
|
||
step.price_after_rounding,
|
||
step.floor_applied,
|
||
)
|
||
else:
|
||
effective_price = calculate_auto_price(base_price = base_price, auto_price_reduction = ad_cfg.auto_price_reduction, target_reduction_cycle = next_cycle)
|
||
|
||
if effective_price is None:
|
||
return
|
||
|
||
if effective_price == base_price:
|
||
# Still increment counter so small fractional reductions can accumulate over multiple cycles
|
||
ad_cfg.price_reduction_count = next_cycle
|
||
LOG.info("Auto price reduction kept price %s after attempting %s reduction cycles", effective_price, next_cycle)
|
||
return
|
||
|
||
LOG.info("Auto price reduction applied: %s -> %s after %s reduction cycles", base_price, effective_price, next_cycle)
|
||
ad_cfg.price = effective_price
|
||
ad_cfg.price_reduction_count = next_cycle
|
||
# Note: price_reduction_count is persisted to ad_cfg_orig only after successful publish
|
||
|
||
|
||
class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||
def __init__(self) -> None:
|
||
# workaround for https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/295
|
||
# see https://github.com/pyinstaller/pyinstaller/issues/7229#issuecomment-1309383026
|
||
os.environ["SSL_CERT_FILE"] = certifi.where()
|
||
|
||
super().__init__()
|
||
|
||
self.root_url = "https://www.kleinanzeigen.de"
|
||
|
||
self.config:Config
|
||
self.config_file_path = abspath("config.yaml")
|
||
self.workspace:xdg_paths.Workspace | None = None
|
||
self._config_arg:str | None = None
|
||
self._workspace_mode_arg:xdg_paths.InstallationMode | None = None
|
||
|
||
self.categories:dict[str, str] = {}
|
||
|
||
self.file_log:loggers.LogFileHandle | None = None
|
||
self._log_basename = os.path.splitext(os.path.basename(sys.executable))[0] if is_frozen() else self.__module__
|
||
self.log_file_path:str | None = abspath(f"{self._log_basename}.log")
|
||
self._logfile_arg:str | None = None
|
||
self._logfile_explicitly_provided:bool = False
|
||
|
||
self.command = "help"
|
||
self.ads_selector = "due"
|
||
self._ads_selector_explicit:bool = False
|
||
self.keep_old_ads = False
|
||
|
||
self._login_detection_diagnostics_captured:bool = False
|
||
self._timing_collector:TimingCollector | None = None
|
||
|
||
def __del__(self) -> None:
|
||
if self.file_log:
|
||
self.file_log.close()
|
||
self.file_log = None
|
||
self.close_browser_session()
|
||
|
||
def get_version(self) -> str:
|
||
return __version__
|
||
|
||
def _workspace_or_raise(self) -> xdg_paths.Workspace:
|
||
if self.workspace is None:
|
||
raise AssertionError(_("Workspace must be resolved before command execution"))
|
||
return self.workspace
|
||
|
||
@property
|
||
def _update_check_state_path(self) -> Path:
|
||
return self._workspace_or_raise().state_dir / "update_check_state.json"
|
||
|
||
def _resolve_workspace(self) -> None:
|
||
"""
|
||
Resolve workspace paths after CLI args are parsed.
|
||
"""
|
||
if self.command in {"help", "version", "create-config"}:
|
||
return
|
||
effective_config_arg = self._config_arg
|
||
effective_workspace_mode = self._workspace_mode_arg
|
||
if not effective_config_arg:
|
||
default_config = (Path.cwd() / "config.yaml").resolve()
|
||
if self.config_file_path and Path(self.config_file_path).resolve() != default_config:
|
||
effective_config_arg = self.config_file_path
|
||
if effective_workspace_mode is None:
|
||
# Backward compatibility for tests/programmatic assignment of config_file_path:
|
||
# infer a stable default from the configured path location.
|
||
config_path = Path(self.config_file_path).resolve()
|
||
xdg_config_dir = xdg_paths.get_xdg_base_dir("config").resolve()
|
||
effective_workspace_mode = "xdg" if config_path.is_relative_to(xdg_config_dir) else "portable"
|
||
|
||
try:
|
||
self.workspace = xdg_paths.resolve_workspace(
|
||
config_arg = effective_config_arg,
|
||
logfile_arg = self._logfile_arg,
|
||
workspace_mode = effective_workspace_mode,
|
||
logfile_explicitly_provided = self._logfile_explicitly_provided,
|
||
log_basename = self._log_basename,
|
||
)
|
||
except ValueError as exc:
|
||
LOG.error(str(exc))
|
||
sys.exit(2)
|
||
|
||
xdg_paths.ensure_directory(self.workspace.config_file.parent, "config directory")
|
||
|
||
self.config_file_path = str(self.workspace.config_file)
|
||
self.log_file_path = str(self.workspace.log_file) if self.workspace.log_file else None
|
||
|
||
LOG.info("Config: %s", self.workspace.config_file)
|
||
LOG.info("Workspace mode: %s", self.workspace.mode)
|
||
LOG.info("Workspace: %s", self.workspace.config_dir)
|
||
if loggers.is_debug(LOG):
|
||
LOG.debug("Log file: %s", self.workspace.log_file)
|
||
LOG.debug("State dir: %s", self.workspace.state_dir)
|
||
LOG.debug("Download dir: %s", self.workspace.download_dir)
|
||
LOG.debug("Browser profile: %s", self.workspace.browser_profile_dir)
|
||
LOG.debug("Diagnostics dir: %s", self.workspace.diagnostics_dir)
|
||
|
||
async def run(self, args:list[str]) -> None:
|
||
self.parse_args(args)
|
||
self._resolve_workspace()
|
||
try:
|
||
match self.command:
|
||
case "help":
|
||
self.show_help()
|
||
return
|
||
case "version":
|
||
print(self.get_version())
|
||
case "create-config":
|
||
self.create_default_config()
|
||
return
|
||
case "diagnose":
|
||
self.configure_file_logging()
|
||
self.load_config()
|
||
self.diagnose_browser_issues()
|
||
return
|
||
case "verify":
|
||
self.configure_file_logging()
|
||
self.load_config()
|
||
# Check for updates on startup
|
||
checker = UpdateChecker(self.config, self._update_check_state_path)
|
||
checker.check_for_updates()
|
||
self.ads_selector = "all"
|
||
if ads := self.load_ads(exclude_ads_with_id = False):
|
||
for ad_file, ad_cfg, ad_cfg_orig in ads:
|
||
ad_file_relative = _relative_ad_path(ad_file, self.config_file_path)
|
||
apply_auto_price_reduction(ad_cfg, ad_cfg_orig, ad_file_relative)
|
||
LOG.info("############################################")
|
||
LOG.info("DONE: No configuration errors found.")
|
||
LOG.info("############################################")
|
||
case "update-check":
|
||
self.configure_file_logging()
|
||
self.load_config()
|
||
checker = UpdateChecker(self.config, self._update_check_state_path)
|
||
checker.check_for_updates(skip_interval_check = True)
|
||
case "update-content-hash":
|
||
self.configure_file_logging()
|
||
self.load_config()
|
||
# Check for updates on startup
|
||
checker = UpdateChecker(self.config, self._update_check_state_path)
|
||
checker.check_for_updates()
|
||
self.ads_selector = "all"
|
||
if ads := self.load_ads(exclude_ads_with_id = False):
|
||
self.update_content_hashes(ads)
|
||
else:
|
||
LOG.info("############################################")
|
||
LOG.info("DONE: No active ads found.")
|
||
LOG.info("############################################")
|
||
case "publish":
|
||
self.configure_file_logging()
|
||
self.load_config()
|
||
# Check for updates on startup
|
||
checker = UpdateChecker(self.config, self._update_check_state_path)
|
||
checker.check_for_updates()
|
||
|
||
if not self._is_valid_ads_selector({"all", "new", "due", "changed"}):
|
||
if self._ads_selector_explicit:
|
||
LOG.error('Invalid --ads selector: "%s". Valid values: all, new, due, changed, or comma-separated numeric IDs.',
|
||
self.ads_selector)
|
||
sys.exit(2)
|
||
self.ads_selector = "due"
|
||
|
||
if ads := self.load_ads():
|
||
await self.create_browser_session()
|
||
await self.login()
|
||
await self.publish_ads(ads)
|
||
else:
|
||
LOG.info("############################################")
|
||
LOG.info("DONE: No new/outdated ads found.")
|
||
LOG.info("############################################")
|
||
case "update":
|
||
self.configure_file_logging()
|
||
self.load_config()
|
||
|
||
if not self._is_valid_ads_selector({"all", "changed"}):
|
||
if self._ads_selector_explicit:
|
||
LOG.error('Invalid --ads selector: "%s". Valid values: all, changed, or comma-separated numeric IDs.',
|
||
self.ads_selector)
|
||
sys.exit(2)
|
||
self.ads_selector = "changed"
|
||
|
||
if ads := self.load_ads():
|
||
await self.create_browser_session()
|
||
await self.login()
|
||
await self.update_ads(ads)
|
||
else:
|
||
LOG.info("############################################")
|
||
LOG.info("DONE: No changed ads found.")
|
||
LOG.info("############################################")
|
||
case "delete":
|
||
self.configure_file_logging()
|
||
self.load_config()
|
||
# Check for updates on startup
|
||
checker = UpdateChecker(self.config, self._update_check_state_path)
|
||
checker.check_for_updates()
|
||
if ads := self.load_ads():
|
||
await self.create_browser_session()
|
||
await self.login()
|
||
await self.delete_ads(ads)
|
||
else:
|
||
LOG.info("############################################")
|
||
LOG.info("DONE: No ads to delete found.")
|
||
LOG.info("############################################")
|
||
case "extend":
|
||
self.configure_file_logging()
|
||
self.load_config()
|
||
# Check for updates on startup
|
||
checker = UpdateChecker(self.config, self._update_check_state_path)
|
||
checker.check_for_updates()
|
||
|
||
# Default to all ads if no selector provided, but reject invalid values
|
||
if not self._is_valid_ads_selector({"all"}):
|
||
if self._ads_selector_explicit:
|
||
LOG.error('Invalid --ads selector: "%s". Valid values: all or comma-separated numeric IDs.',
|
||
self.ads_selector)
|
||
sys.exit(2)
|
||
LOG.info("Extending all ads within 8-day window...")
|
||
self.ads_selector = "all"
|
||
|
||
if ads := self.load_ads():
|
||
await self.create_browser_session()
|
||
await self.login()
|
||
await self.extend_ads(ads)
|
||
else:
|
||
LOG.info("############################################")
|
||
LOG.info("DONE: No ads found to extend.")
|
||
LOG.info("############################################")
|
||
case "download":
|
||
self.configure_file_logging()
|
||
# ad IDs depends on selector
|
||
if not self._is_valid_ads_selector({"all", "new"}):
|
||
if self._ads_selector_explicit:
|
||
LOG.error('Invalid --ads selector: "%s". Valid values: all, new, or comma-separated numeric IDs.',
|
||
self.ads_selector)
|
||
sys.exit(2)
|
||
self.ads_selector = "new"
|
||
self.load_config()
|
||
# Check for updates on startup
|
||
checker = UpdateChecker(self.config, self._update_check_state_path)
|
||
checker.check_for_updates()
|
||
await self.create_browser_session()
|
||
await self.login()
|
||
await self.download_ads()
|
||
|
||
case _:
|
||
LOG.error("Unknown command: %s", self.command)
|
||
sys.exit(2)
|
||
finally:
|
||
self.close_browser_session()
|
||
if self._timing_collector is not None:
|
||
try:
|
||
loop = asyncio.get_running_loop()
|
||
await loop.run_in_executor(None, self._timing_collector.flush)
|
||
except Exception as exc: # noqa: BLE001
|
||
LOG.warning("Timing collector flush failed: %s", exc)
|
||
|
||
def show_help(self) -> None:
|
||
if is_frozen():
|
||
exe = sys.argv[0]
|
||
elif os.getenv("PDM_PROJECT_ROOT", ""):
|
||
exe = "pdm run app"
|
||
else:
|
||
exe = "python -m kleinanzeigen_bot"
|
||
|
||
if get_current_locale().language == "de":
|
||
print(
|
||
textwrap.dedent(
|
||
f"""\
|
||
Verwendung: {colorama.Fore.LIGHTMAGENTA_EX}{exe} BEFEHL [OPTIONEN]{colorama.Style.RESET_ALL}
|
||
|
||
Befehle:
|
||
publish - (Wieder-)Veröffentlicht Anzeigen
|
||
verify - Überprüft die Konfigurationsdateien
|
||
delete - Löscht Anzeigen
|
||
update - Aktualisiert bestehende Anzeigen
|
||
extend - Verlängert Anzeigen innerhalb des 8-Tage-Zeitfensters
|
||
download - Lädt eine oder mehrere Anzeigen herunter
|
||
update-check - Prüft auf verfügbare Updates
|
||
update-content-hash - Berechnet den content_hash aller Anzeigen anhand der aktuellen ad_defaults neu;
|
||
nach Änderungen an den config.yaml/ad_defaults verhindert es, dass alle Anzeigen als
|
||
"geändert" gelten und neu veröffentlicht werden.
|
||
create-config - Erstellt eine neue Standard-Konfigurationsdatei, falls noch nicht vorhanden
|
||
diagnose - Diagnostiziert Browser-Verbindungsprobleme und zeigt Troubleshooting-Informationen
|
||
--
|
||
help - Zeigt diese Hilfe an (Standardbefehl)
|
||
version - Zeigt die Version der Anwendung an
|
||
|
||
Optionen:
|
||
--ads=all|due|new|changed|<id(s)> (publish) - Gibt an, welche Anzeigen (erneut) veröffentlicht werden sollen (STANDARD: due)
|
||
Mögliche Werte:
|
||
* all: Veröffentlicht alle Anzeigen erneut, ignoriert republication_interval
|
||
* due: Veröffentlicht alle neuen Anzeigen und erneut entsprechend dem republication_interval
|
||
* new: Veröffentlicht nur neue Anzeigen (d.h. Anzeigen ohne ID in der Konfigurationsdatei)
|
||
* changed: Veröffentlicht nur Anzeigen, die seit der letzten Veröffentlichung geändert wurden
|
||
* <id(s)>: Gibt eine oder mehrere Anzeigen-IDs an, die veröffentlicht werden sollen, z. B. "--ads=1,2,3", ignoriert republication_interval
|
||
* Kombinationen: Sie können mehrere Selektoren mit Kommas kombinieren, z. B. "--ads=changed,due" um sowohl geänderte als auch
|
||
fällige Anzeigen zu veröffentlichen
|
||
--ads=all|new|<id(s)> (download) - Gibt an, welche Anzeigen heruntergeladen werden sollen (STANDARD: new)
|
||
Mögliche Werte:
|
||
* all: Lädt alle Anzeigen aus Ihrem Profil herunter
|
||
* new: Lädt Anzeigen aus Ihrem Profil herunter, die lokal noch nicht gespeichert sind
|
||
* <id(s)>: Gibt eine oder mehrere Anzeigen-IDs zum Herunterladen an, z. B. "--ads=1,2,3"
|
||
--ads=all|changed|<id(s)> (update) - Gibt an, welche Anzeigen aktualisiert werden sollen (STANDARD: changed)
|
||
Mögliche Werte:
|
||
* all: Aktualisiert alle Anzeigen
|
||
* changed: Aktualisiert nur Anzeigen, die seit der letzten Veröffentlichung geändert wurden
|
||
* <id(s)>: Gibt eine oder mehrere Anzeigen-IDs zum Aktualisieren an, z. B. "--ads=1,2,3"
|
||
--ads=all|<id(s)> (extend) - Gibt an, welche Anzeigen verlängert werden sollen
|
||
Mögliche Werte:
|
||
* all: Verlängert alle Anzeigen, die innerhalb von 8 Tagen ablaufen
|
||
* <id(s)>: Gibt bestimmte Anzeigen-IDs an, z. B. "--ads=1,2,3"
|
||
--force - Alias für '--ads=all'
|
||
--keep-old - Verhindert das Löschen alter Anzeigen bei erneuter Veröffentlichung
|
||
--config=<PATH> - Pfad zur YAML- oder JSON-Konfigurationsdatei (ändert den Workspace-Modus nicht implizit)
|
||
--workspace-mode=portable|xdg - Überschreibt den Workspace-Modus für diesen Lauf
|
||
--logfile=<PATH> - Pfad zur Protokolldatei (STANDARD: vom aktiven Workspace-Modus abhängig)
|
||
--lang=en|de - Anzeigesprache (STANDARD: Systemsprache, wenn unterstützt, sonst Englisch)
|
||
-v, --verbose - Aktiviert detaillierte Ausgabe – nur nützlich zur Fehlerbehebung
|
||
""".rstrip()
|
||
)
|
||
)
|
||
else:
|
||
print(
|
||
textwrap.dedent(
|
||
f"""\
|
||
Usage: {colorama.Fore.LIGHTMAGENTA_EX}{exe} COMMAND [OPTIONS]{colorama.Style.RESET_ALL}
|
||
|
||
Commands:
|
||
publish - (re-)publishes ads
|
||
verify - verifies the configuration files
|
||
delete - deletes ads
|
||
update - updates published ads
|
||
extend - extends ads within the 8-day window before expiry
|
||
download - downloads one or multiple ads
|
||
update-check - checks for available updates
|
||
update-content-hash – recalculates each ad's content_hash based on the current ad_defaults;
|
||
use this after changing config.yaml/ad_defaults to avoid every ad being marked "changed" and republished
|
||
create-config - creates a new default configuration file if one does not exist
|
||
diagnose - diagnoses browser connection issues and shows troubleshooting information
|
||
--
|
||
help - displays this help (default command)
|
||
version - displays the application version
|
||
|
||
Options:
|
||
--ads=all|due|new|changed|<id(s)> (publish) - specifies which ads to (re-)publish (DEFAULT: due)
|
||
Possible values:
|
||
* all: (re-)publish all ads ignoring republication_interval
|
||
* due: publish all new ads and republish ads according the republication_interval
|
||
* new: only publish new ads (i.e. ads that have no id in the config file)
|
||
* changed: only publish ads that have been modified since last publication
|
||
* <id(s)>: provide one or several ads by ID to (re-)publish, like e.g. "--ads=1,2,3" ignoring republication_interval
|
||
* Combinations: You can combine multiple selectors with commas, e.g. "--ads=changed,due" to publish both changed and due ads
|
||
--ads=all|new|<id(s)> (download) - specifies which ads to download (DEFAULT: new)
|
||
Possible values:
|
||
* all: downloads all ads from your profile
|
||
* new: downloads ads from your profile that are not locally saved yet
|
||
* <id(s)>: provide one or several ads by ID to download, like e.g. "--ads=1,2,3"
|
||
--ads=all|changed|<id(s)> (update) - specifies which ads to update (DEFAULT: changed)
|
||
Possible values:
|
||
* all: update all ads
|
||
* changed: only update ads that have been modified since last publication
|
||
* <id(s)>: provide one or several ads by ID to update, like e.g. "--ads=1,2,3"
|
||
--ads=all|<id(s)> (extend) - specifies which ads to extend
|
||
Possible values:
|
||
* all: extend all ads expiring within 8 days
|
||
* <id(s)>: specify ad IDs to extend, e.g. "--ads=1,2,3"
|
||
--force - alias for '--ads=all'
|
||
--keep-old - don't delete old ads on republication
|
||
--config=<PATH> - path to the config YAML or JSON file (does not implicitly change workspace mode)
|
||
--workspace-mode=portable|xdg - overrides workspace mode for this run
|
||
--logfile=<PATH> - path to the logfile (DEFAULT: depends on active workspace mode)
|
||
--lang=en|de - display language (STANDARD: system language if supported, otherwise English)
|
||
-v, --verbose - enables verbose output - only useful when troubleshooting issues
|
||
""".rstrip()
|
||
)
|
||
)
|
||
|
||
def _is_valid_ads_selector(self, valid_keywords:set[str]) -> bool:
|
||
"""Check if the current ads_selector is valid for the given set of keyword selectors.
|
||
|
||
Accepts a single keyword, a comma-separated list of keywords, or a comma-separated
|
||
list of numeric IDs. Mixed keyword+numeric selectors are not supported.
|
||
"""
|
||
return (
|
||
self.ads_selector in valid_keywords
|
||
or all(s.strip() in valid_keywords for s in self.ads_selector.split(","))
|
||
or bool(_NUMERIC_IDS_RE.match(self.ads_selector))
|
||
)
|
||
|
||
def parse_args(self, args:list[str]) -> None:
|
||
try:
|
||
options, arguments = getopt.gnu_getopt(
|
||
args[1:],
|
||
"hv",
|
||
["ads=", "config=", "force", "help", "keep-old", "logfile=", "lang=", "verbose", "workspace-mode="],
|
||
)
|
||
except getopt.error as ex:
|
||
LOG.error(ex.msg)
|
||
LOG.error("Use --help to display available options.")
|
||
sys.exit(2)
|
||
|
||
for option, value in options:
|
||
match option:
|
||
case "-h" | "--help":
|
||
self.show_help()
|
||
sys.exit(0)
|
||
case "--config":
|
||
self.config_file_path = abspath(value)
|
||
self._config_arg = value
|
||
case "--logfile":
|
||
if value:
|
||
self.log_file_path = abspath(value)
|
||
else:
|
||
self.log_file_path = None
|
||
self._logfile_arg = value
|
||
self._logfile_explicitly_provided = True
|
||
case "--workspace-mode":
|
||
mode = value.strip().lower()
|
||
if mode not in {"portable", "xdg"}:
|
||
LOG.error("Invalid --workspace-mode '%s'. Use 'portable' or 'xdg'.", value)
|
||
sys.exit(2)
|
||
self._workspace_mode_arg = cast(xdg_paths.InstallationMode, mode)
|
||
case "--ads":
|
||
self.ads_selector = value.strip().lower()
|
||
self._ads_selector_explicit = True
|
||
case "--force":
|
||
self.ads_selector = "all"
|
||
self._ads_selector_explicit = True
|
||
case "--keep-old":
|
||
self.keep_old_ads = True
|
||
case "--lang":
|
||
set_current_locale(Locale.of(value))
|
||
case "-v" | "--verbose":
|
||
LOG.setLevel(loggers.DEBUG)
|
||
loggers.get_logger("nodriver").setLevel(loggers.INFO)
|
||
|
||
match len(arguments):
|
||
case 0:
|
||
self.command = "help"
|
||
case 1:
|
||
self.command = arguments[0]
|
||
case _:
|
||
LOG.error("More than one command given: %s", arguments)
|
||
sys.exit(2)
|
||
|
||
def configure_file_logging(self) -> None:
|
||
if not self.log_file_path:
|
||
return
|
||
if self.file_log:
|
||
return
|
||
|
||
if self.workspace and self.workspace.log_file:
|
||
xdg_paths.ensure_directory(self.workspace.log_file.parent, "log directory")
|
||
|
||
LOG.info("Logging to [%s]...", self.log_file_path)
|
||
self.file_log = loggers.configure_file_logging(self.log_file_path)
|
||
|
||
LOG.info("App version: %s", self.get_version())
|
||
LOG.info("Python version: %s", sys.version)
|
||
|
||
def create_default_config(self) -> None:
|
||
"""
|
||
Create a default config.yaml in the project root if it does not exist.
|
||
If it exists, log an error and inform the user.
|
||
"""
|
||
if os.path.exists(self.config_file_path):
|
||
LOG.error("Config file %s already exists. Aborting creation.", self.config_file_path)
|
||
return
|
||
config_parent = self.workspace.config_file.parent if self.workspace else Path(self.config_file_path).parent
|
||
xdg_paths.ensure_directory(config_parent, "config directory")
|
||
default_config = Config.model_construct()
|
||
default_config.login.username = "changeme" # noqa: S105 placeholder for default config, not a real username
|
||
default_config.login.password = "changeme" # noqa: S105 placeholder for default config, not a real password
|
||
dicts.save_commented_model(
|
||
self.config_file_path,
|
||
default_config,
|
||
header = "# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/main/schemas/config.schema.json",
|
||
exclude = {
|
||
"ad_defaults": {"description"},
|
||
},
|
||
)
|
||
|
||
def load_config(self) -> None:
|
||
# write default config.yaml if config file does not exist
|
||
if not os.path.exists(self.config_file_path):
|
||
self.create_default_config()
|
||
|
||
config_yaml = dicts.load_dict_if_exists(self.config_file_path, _("config"))
|
||
self.config = Config.model_validate(config_yaml, strict = True, context = self.config_file_path)
|
||
|
||
timing_enabled = self.config.diagnostics.timing_collection
|
||
if timing_enabled and self.workspace:
|
||
timing_dir = self.workspace.diagnostics_dir.parent / "timing"
|
||
self._timing_collector = TimingCollector(timing_dir, self.command)
|
||
else:
|
||
self._timing_collector = None
|
||
|
||
# load built-in category mappings
|
||
self.categories = dicts.load_dict_from_module(resources, "categories.yaml", "")
|
||
LOG.debug("Loaded %s categories from categories.yaml", len(self.categories))
|
||
deprecated_categories = dicts.load_dict_from_module(resources, "categories_old.yaml", "")
|
||
LOG.debug("Loaded %s categories from categories_old.yaml", len(deprecated_categories))
|
||
self.categories.update(deprecated_categories)
|
||
custom_count = 0
|
||
if self.config.categories:
|
||
custom_count = len(self.config.categories)
|
||
self.categories.update(self.config.categories)
|
||
LOG.debug("Loaded %s categories from config.yaml (custom)", custom_count)
|
||
total_count = len(self.categories)
|
||
if total_count == 0:
|
||
LOG.warning("No categories loaded - category files may be missing or empty")
|
||
LOG.debug("Loaded %s categories in total", total_count)
|
||
|
||
# populate browser_config object used by WebScrapingMixin
|
||
self.browser_config.arguments = self.config.browser.arguments
|
||
self.browser_config.binary_location = self.config.browser.binary_location
|
||
self.browser_config.extensions = [abspath(item, relative_to = self.config_file_path) for item in self.config.browser.extensions]
|
||
self.browser_config.use_private_window = self.config.browser.use_private_window
|
||
if self.config.browser.user_data_dir:
|
||
self.browser_config.user_data_dir = abspath(self.config.browser.user_data_dir, relative_to = self.config_file_path)
|
||
elif self.workspace:
|
||
self.browser_config.user_data_dir = str(self.workspace.browser_profile_dir)
|
||
self.browser_config.profile_name = self.config.browser.profile_name
|
||
|
||
def __check_ad_republication(self, ad_cfg:Ad, ad_file_relative:str) -> bool:
|
||
"""
|
||
Check if an ad needs to be republished based on republication interval.
|
||
Note: This method does not check for content changes. Use __check_ad_changed for that.
|
||
|
||
Returns:
|
||
True if the ad should be republished based on the interval.
|
||
"""
|
||
if ad_cfg.updated_on:
|
||
last_updated_on = ad_cfg.updated_on
|
||
elif ad_cfg.created_on:
|
||
last_updated_on = ad_cfg.created_on
|
||
else:
|
||
return True
|
||
|
||
if not last_updated_on:
|
||
return True
|
||
|
||
# Check republication interval
|
||
ad_age = misc.now() - last_updated_on
|
||
if ad_age.days <= ad_cfg.republication_interval:
|
||
LOG.info(
|
||
" -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days",
|
||
ad_file_relative,
|
||
ad_age.days,
|
||
ad_cfg.republication_interval,
|
||
)
|
||
return False
|
||
|
||
return True
|
||
|
||
def __check_ad_changed(self, ad_cfg:Ad, ad_cfg_orig:dict[str, Any], ad_file_relative:str) -> bool:
|
||
"""
|
||
Check if an ad has been changed since last publication.
|
||
|
||
Returns:
|
||
True if the ad has been changed.
|
||
"""
|
||
if not ad_cfg.id:
|
||
# New ads are not considered "changed"
|
||
return False
|
||
|
||
# Calculate hash on original config to match what was stored
|
||
current_hash = AdPartial.model_validate(ad_cfg_orig).update_content_hash().content_hash
|
||
stored_hash = ad_cfg_orig.get("content_hash")
|
||
|
||
LOG.debug("Hash comparison for [%s]:", ad_file_relative)
|
||
LOG.debug(" Stored hash: %s", stored_hash)
|
||
LOG.debug(" Current hash: %s", current_hash)
|
||
|
||
if stored_hash and current_hash != stored_hash:
|
||
LOG.info("Changes detected in ad [%s], will republish", ad_file_relative)
|
||
# Update hash in original configuration
|
||
ad_cfg_orig["content_hash"] = current_hash
|
||
return True
|
||
|
||
return False
|
||
|
||
def load_ads(self, *, ignore_inactive:bool = True, exclude_ads_with_id:bool = True) -> list[tuple[str, Ad, dict[str, Any]]]:
|
||
"""
|
||
Load and validate all ad config files, optionally filtering out inactive or already-published ads.
|
||
|
||
Args:
|
||
ignore_inactive (bool):
|
||
Skip ads with `active=False`.
|
||
exclude_ads_with_id (bool):
|
||
Skip ads whose raw data already contains an `id`, i.e. was published before.
|
||
|
||
Returns:
|
||
list[tuple[str, Ad, dict[str, Any]]]:
|
||
Tuples of (file_path, validated Ad model, original raw data).
|
||
"""
|
||
LOG.info("Searching for ad config files...")
|
||
|
||
ad_files:dict[str, str] = {}
|
||
data_root_dir = os.path.dirname(self.config_file_path)
|
||
for file_pattern in self.config.ad_files:
|
||
for ad_file in glob.glob(file_pattern, root_dir = data_root_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB):
|
||
if not str(ad_file).endswith("ad_fields.yaml"):
|
||
ad_files[abspath(ad_file, relative_to = data_root_dir)] = ad_file
|
||
LOG.info(" -> found %s", pluralize("ad config file", ad_files))
|
||
if not ad_files:
|
||
return []
|
||
|
||
ids = []
|
||
use_specific_ads = False
|
||
selectors = self.ads_selector.split(",")
|
||
|
||
if _NUMERIC_IDS_RE.match(self.ads_selector):
|
||
ids = [int(n) for n in self.ads_selector.split(",")]
|
||
use_specific_ads = True
|
||
LOG.info("Start fetch task for the ad(s) with id(s):")
|
||
LOG.info(" | ".join([str(id_) for id_ in ids]))
|
||
|
||
ads = []
|
||
for ad_file, ad_file_relative in sorted(ad_files.items()):
|
||
ad_cfg_orig:dict[str, Any] = dicts.load_dict(ad_file, "ad")
|
||
ad_cfg:Ad = self.load_ad(ad_cfg_orig)
|
||
|
||
if ignore_inactive and not ad_cfg.active:
|
||
LOG.info(" -> SKIPPED: inactive ad [%s]", ad_file_relative)
|
||
continue
|
||
|
||
if use_specific_ads:
|
||
if ad_cfg.id not in ids:
|
||
LOG.info(" -> SKIPPED: ad [%s] is not in list of given ids.", ad_file_relative)
|
||
continue
|
||
else:
|
||
# Check if ad should be included based on selectors
|
||
should_include = False
|
||
|
||
# Check for 'changed' selector
|
||
if "changed" in selectors and self.__check_ad_changed(ad_cfg, ad_cfg_orig, ad_file_relative):
|
||
should_include = True
|
||
|
||
# Check for 'new' selector
|
||
if "new" in selectors and (not ad_cfg.id or not exclude_ads_with_id):
|
||
should_include = True
|
||
elif "new" in selectors and ad_cfg.id and exclude_ads_with_id:
|
||
LOG.info(" -> SKIPPED: ad [%s] is not new. already has an id assigned.", ad_file_relative)
|
||
|
||
# Check for 'due' selector
|
||
if "due" in selectors:
|
||
# For 'due' selector, check if the ad is due for republication based on interval
|
||
if self.__check_ad_republication(ad_cfg, ad_file_relative):
|
||
should_include = True
|
||
|
||
# Check for 'all' selector (always include)
|
||
if "all" in selectors:
|
||
should_include = True
|
||
|
||
if not should_include:
|
||
continue
|
||
|
||
ensure(self.__get_description(ad_cfg, with_affixes = False), f"-> property [description] not specified @ [{ad_file}]")
|
||
self.__get_description(ad_cfg, with_affixes = True) # validates complete description
|
||
|
||
if ad_cfg.category:
|
||
resolved_category_id = self.categories.get(ad_cfg.category)
|
||
if not resolved_category_id and ">" in ad_cfg.category:
|
||
# this maps actually to the sonstiges/weiteres sub-category
|
||
parent_category = ad_cfg.category.rpartition(">")[0].strip()
|
||
resolved_category_id = self.categories.get(parent_category)
|
||
if resolved_category_id:
|
||
LOG.warning("Category [%s] unknown. Using category [%s] with ID [%s] instead.", ad_cfg.category, parent_category, resolved_category_id)
|
||
|
||
if resolved_category_id:
|
||
ad_cfg.category = resolved_category_id
|
||
|
||
if ad_cfg.images:
|
||
images = []
|
||
ad_dir = os.path.dirname(ad_file)
|
||
for image_pattern in ad_cfg.images:
|
||
pattern_images = set()
|
||
for image_file in glob.glob(image_pattern, root_dir = ad_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB):
|
||
_, image_file_ext = os.path.splitext(image_file)
|
||
ensure(image_file_ext.lower() in {".gif", ".jpg", ".jpeg", ".png"}, f"Unsupported image file type [{image_file}]")
|
||
if os.path.isabs(image_file):
|
||
pattern_images.add(image_file)
|
||
else:
|
||
pattern_images.add(abspath(image_file, relative_to = ad_file))
|
||
images.extend(sorted(pattern_images))
|
||
ensure(images or not ad_cfg.images, f"No images found for given file patterns {ad_cfg.images} at {ad_dir}")
|
||
ad_cfg.images = list(dict.fromkeys(images))
|
||
|
||
LOG.info(" -> LOADED: ad [%s]", ad_file_relative)
|
||
ads.append((ad_file, ad_cfg, ad_cfg_orig))
|
||
|
||
LOG.info("Loaded %s", pluralize("ad", ads))
|
||
return ads
|
||
|
||
def load_ad(self, ad_cfg_orig:dict[str, Any]) -> Ad:
|
||
return AdPartial.model_validate(ad_cfg_orig).to_ad(self.config.ad_defaults)
|
||
|
||
async def check_and_wait_for_captcha(self, *, is_login_page:bool = True) -> None:
|
||
try:
|
||
captcha_timeout = self._timeout("captcha_detection")
|
||
await self.web_find(By.CSS_SELECTOR, "iframe[name^='a-'][src^='https://www.google.com/recaptcha/api2/anchor?']", timeout = captcha_timeout)
|
||
|
||
if not is_login_page and self.config.captcha.auto_restart:
|
||
LOG.warning("Captcha recognized - auto-restart enabled, abort run...")
|
||
raise CaptchaEncountered(misc.parse_duration(self.config.captcha.restart_delay))
|
||
|
||
LOG.warning("############################################")
|
||
LOG.warning("# Captcha present! Please solve the captcha.")
|
||
LOG.warning("############################################")
|
||
|
||
if not is_login_page:
|
||
await self.web_scroll_page_down()
|
||
|
||
await ainput(_("Press a key to continue..."))
|
||
except TimeoutError:
|
||
# No captcha detected within timeout.
|
||
pass
|
||
|
||
async def login(self) -> None:
|
||
LOG.info("Checking if already logged in...")
|
||
await self.web_open(f"{self.root_url}")
|
||
if getattr(self, "page", None) is not None:
|
||
LOG.debug("Current page URL after opening homepage: %s", self.page.url)
|
||
|
||
state = await self.get_login_state()
|
||
if state == LoginState.LOGGED_IN:
|
||
LOG.info("Already logged in as [%s]. Skipping login.", self.config.login.username)
|
||
return
|
||
|
||
if state == LoginState.UNKNOWN:
|
||
LOG.warning("Login state is UNKNOWN - cannot determine if already logged in. Skipping login attempt.")
|
||
return
|
||
|
||
LOG.info("Opening login page...")
|
||
await self.web_open(f"{self.root_url}/m-einloggen.html?targetUrl=/")
|
||
|
||
await self.fill_login_data_and_send()
|
||
await self.handle_after_login_logic()
|
||
|
||
# Sometimes a second login is required
|
||
state = await self.get_login_state()
|
||
if state == LoginState.UNKNOWN:
|
||
LOG.warning("Login state is UNKNOWN after first login attempt - cannot determine login status. Aborting login process.")
|
||
return
|
||
|
||
if state == LoginState.LOGGED_OUT:
|
||
LOG.debug("First login attempt did not succeed, trying second login attempt")
|
||
await self.fill_login_data_and_send()
|
||
await self.handle_after_login_logic()
|
||
|
||
state = await self.get_login_state()
|
||
if state == LoginState.LOGGED_IN:
|
||
LOG.debug("Second login attempt succeeded")
|
||
else:
|
||
LOG.warning("Second login attempt also failed - login may not have succeeded")
|
||
|
||
async def fill_login_data_and_send(self) -> None:
|
||
LOG.info("Logging in as [%s]...", self.config.login.username)
|
||
await self.web_input(By.ID, "login-email", self.config.login.username)
|
||
|
||
# clearing password input in case browser has stored login data set
|
||
await self.web_input(By.ID, "login-password", "")
|
||
await self.web_input(By.ID, "login-password", self.config.login.password)
|
||
|
||
await self.check_and_wait_for_captcha(is_login_page = True)
|
||
|
||
await self.web_click(By.CSS_SELECTOR, "form#login-form button[type='submit']")
|
||
|
||
async def handle_after_login_logic(self) -> None:
|
||
try:
|
||
sms_timeout = self._timeout("sms_verification")
|
||
await self.web_find(By.TEXT, "Wir haben dir gerade einen 6-stelligen Code für die Telefonnummer", timeout = sms_timeout)
|
||
LOG.warning("############################################")
|
||
LOG.warning("# Device verification message detected. Please follow the instruction displayed in the Browser.")
|
||
LOG.warning("############################################")
|
||
await ainput(_("Press ENTER when done..."))
|
||
except TimeoutError:
|
||
# No SMS verification prompt detected.
|
||
pass
|
||
|
||
try:
|
||
email_timeout = self._timeout("email_verification")
|
||
await self.web_find(By.TEXT, "Um dein Konto zu schützen haben wir dir eine E-Mail geschickt", timeout = email_timeout)
|
||
LOG.warning("############################################")
|
||
LOG.warning("# Device verification message detected. Please follow the instruction displayed in the Browser.")
|
||
LOG.warning("############################################")
|
||
await ainput(_("Press ENTER when done..."))
|
||
except TimeoutError:
|
||
# No email verification prompt detected.
|
||
pass
|
||
|
||
try:
|
||
LOG.info("Handling GDPR disclaimer...")
|
||
gdpr_timeout = self._timeout("gdpr_prompt")
|
||
await self.web_find(By.ID, "gdpr-banner-accept", timeout = gdpr_timeout)
|
||
await self.web_click(By.ID, "gdpr-banner-cmp-button")
|
||
await self.web_click(
|
||
By.XPATH, "//div[@id='ConsentManagementPage']//*//button//*[contains(., 'Alle ablehnen und fortfahren')]", timeout = gdpr_timeout
|
||
)
|
||
except TimeoutError:
|
||
# GDPR banner not shown within timeout.
|
||
pass
|
||
|
||
async def _auth_probe_login_state(self) -> LoginState:
|
||
"""Probe an auth-required endpoint to classify login state.
|
||
|
||
The probe is non-mutating (GET request). It is used as a fallback method by
|
||
get_login_state() when DOM-based checks are inconclusive.
|
||
"""
|
||
|
||
url = f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"
|
||
try:
|
||
response = await self.web_request(url, valid_response_codes = [200, 401, 403])
|
||
except (TimeoutError, AssertionError):
|
||
# AssertionError can occur when web_request() fails to parse the response (e.g., unexpected content type)
|
||
# Treat both timeout and assertion failures as UNKNOWN to avoid false assumptions about login state
|
||
return LoginState.UNKNOWN
|
||
|
||
status_code = response.get("statusCode")
|
||
if status_code in {401, 403}:
|
||
return LoginState.LOGGED_OUT
|
||
|
||
content = response.get("content", "")
|
||
if not isinstance(content, str):
|
||
return LoginState.UNKNOWN
|
||
|
||
try:
|
||
payload = json.loads(content)
|
||
except json.JSONDecodeError:
|
||
lowered = content.lower()
|
||
if "m-einloggen" in lowered or "login-email" in lowered or "login-password" in lowered or "login-form" in lowered:
|
||
return LoginState.LOGGED_OUT
|
||
return LoginState.UNKNOWN
|
||
|
||
if isinstance(payload, dict) and "ads" in payload:
|
||
return LoginState.LOGGED_IN
|
||
|
||
return LoginState.UNKNOWN
|
||
|
||
async def get_login_state(self) -> LoginState:
|
||
"""Determine current login state using layered detection.
|
||
|
||
Order:
|
||
1) DOM-based check via `is_logged_in(include_probe=False)` (preferred - stealthy)
|
||
2) Server-side auth probe via `_auth_probe_login_state` (fallback - more reliable)
|
||
3) If still inconclusive, capture diagnostics via
|
||
`_capture_login_detection_diagnostics_if_enabled` and return `UNKNOWN`
|
||
"""
|
||
# Prefer DOM-based checks first to minimize bot-like behavior.
|
||
# The auth probe makes a JSON API request that normal users wouldn't trigger.
|
||
if await self.is_logged_in(include_probe = False):
|
||
return LoginState.LOGGED_IN
|
||
|
||
# Fall back to the more reliable server-side auth probe.
|
||
# SPA/hydration delays can cause DOM-based checks to temporarily miss login indicators.
|
||
state = await self._auth_probe_login_state()
|
||
if state != LoginState.UNKNOWN:
|
||
return state
|
||
|
||
await self._capture_login_detection_diagnostics_if_enabled()
|
||
return LoginState.UNKNOWN
|
||
|
||
def _diagnostics_output_dir(self) -> Path:
|
||
diagnostics = getattr(self.config, "diagnostics", None)
|
||
if diagnostics is not None and diagnostics.output_dir and diagnostics.output_dir.strip():
|
||
return Path(abspath(diagnostics.output_dir, relative_to = self.config_file_path)).resolve()
|
||
|
||
workspace = self._workspace_or_raise()
|
||
xdg_paths.ensure_directory(workspace.diagnostics_dir, "diagnostics directory")
|
||
return workspace.diagnostics_dir
|
||
|
||
async def _capture_login_detection_diagnostics_if_enabled(self) -> None:
|
||
cfg = getattr(self.config, "diagnostics", None)
|
||
if cfg is None or not cfg.capture_on.login_detection:
|
||
return
|
||
|
||
if self._login_detection_diagnostics_captured:
|
||
return
|
||
|
||
page = getattr(self, "page", None)
|
||
if page is None:
|
||
return
|
||
|
||
self._login_detection_diagnostics_captured = True
|
||
|
||
try:
|
||
await diagnostics.capture_diagnostics(
|
||
output_dir = self._diagnostics_output_dir(),
|
||
base_prefix = "login_detection_unknown",
|
||
page = page,
|
||
)
|
||
except Exception as exc: # noqa: BLE001
|
||
LOG.debug(
|
||
"Login diagnostics capture failed (output_dir=%s, base_prefix=%s): %s",
|
||
self._diagnostics_output_dir(),
|
||
"login_detection_unknown",
|
||
exc,
|
||
)
|
||
|
||
if cfg.pause_on_login_detection_failure and getattr(sys.stdin, "isatty", lambda: False)():
|
||
LOG.warning("############################################")
|
||
LOG.warning("# Login detection returned UNKNOWN. Browser is paused for manual inspection.")
|
||
LOG.warning("############################################")
|
||
await ainput(_("Press a key to continue..."))
|
||
|
||
async def _capture_publish_error_diagnostics_if_enabled(
|
||
self,
|
||
ad_cfg:Ad,
|
||
ad_cfg_orig:dict[str, Any],
|
||
ad_file:str,
|
||
attempt:int,
|
||
exc:Exception,
|
||
) -> None:
|
||
"""Capture publish failure diagnostics when enabled and a page is available.
|
||
|
||
Runs only if cfg.capture_on.publish is enabled and self.page is set.
|
||
Uses the ad configuration and publish attempt details to write screenshot, HTML,
|
||
JSON payload, and optional log copy for debugging.
|
||
"""
|
||
cfg = getattr(self.config, "diagnostics", None)
|
||
if cfg is None or not cfg.capture_on.publish:
|
||
return
|
||
|
||
page = getattr(self, "page", None)
|
||
if page is None:
|
||
return
|
||
|
||
# Use the ad filename (without extension) as identifier
|
||
ad_file_stem = Path(ad_file).stem
|
||
|
||
json_payload = {
|
||
"timestamp": misc.now().isoformat(timespec = "seconds"),
|
||
"attempt": attempt,
|
||
"page_url": getattr(page, "url", None),
|
||
"exception": {
|
||
"type": exc.__class__.__name__,
|
||
"message": str(exc),
|
||
"repr": repr(exc),
|
||
},
|
||
"ad_file": ad_file,
|
||
"ad_title": ad_cfg.title,
|
||
"ad_config_effective": ad_cfg.model_dump(mode = "json"),
|
||
"ad_config_original": ad_cfg_orig,
|
||
}
|
||
|
||
try:
|
||
await diagnostics.capture_diagnostics(
|
||
output_dir = self._diagnostics_output_dir(),
|
||
base_prefix = "publish_error",
|
||
attempt = attempt,
|
||
subject = ad_file_stem,
|
||
page = page,
|
||
json_payload = json_payload,
|
||
log_file_path = self.log_file_path,
|
||
copy_log = cfg.capture_log_copy,
|
||
)
|
||
except Exception as error: # noqa: BLE001
|
||
LOG.warning("Diagnostics capture failed during publish error handling: %s", error)
|
||
|
||
async def is_logged_in(self, *, include_probe:bool = True) -> bool:
|
||
# Use login_detection timeout (10s default) instead of default (5s)
|
||
# to allow sufficient time for client-side JavaScript rendering after page load.
|
||
# This is especially important for older sessions (20+ days) that require
|
||
# additional server-side validation time.
|
||
login_check_timeout = self._timeout("login_detection")
|
||
effective_timeout = self._effective_timeout("login_detection")
|
||
username = self.config.login.username.lower()
|
||
LOG.debug(
|
||
"Starting login detection (timeout: %.1fs base, %.1fs effective with multiplier/backoff)",
|
||
login_check_timeout,
|
||
effective_timeout,
|
||
)
|
||
tried_login_selectors = _format_login_detection_selectors(_LOGIN_DETECTION_SELECTORS)
|
||
|
||
try:
|
||
user_info, matched_selector = await self.web_text_first_available(
|
||
_LOGIN_DETECTION_SELECTORS,
|
||
timeout = login_check_timeout,
|
||
key = "login_detection",
|
||
description = "login_detection(selector_group)",
|
||
)
|
||
if username in user_info.lower():
|
||
matched_selector_label = (
|
||
_LOGIN_DETECTION_SELECTOR_LABELS[matched_selector]
|
||
if 0 <= matched_selector < len(_LOGIN_DETECTION_SELECTOR_LABELS)
|
||
else f"selector_index_{matched_selector}"
|
||
)
|
||
LOG.debug("Login detected via login detection selector '%s'", matched_selector_label)
|
||
return True
|
||
except TimeoutError:
|
||
LOG.debug("Timeout waiting for login detection selector group after %.1fs", effective_timeout)
|
||
|
||
if not include_probe:
|
||
LOG.debug("No login detected via configured login detection selectors (%s)", tried_login_selectors)
|
||
return False
|
||
|
||
state = await self._auth_probe_login_state()
|
||
if state == LoginState.LOGGED_IN:
|
||
return True
|
||
|
||
LOG.debug(
|
||
"No login detected - DOM login detection selectors (%s) did not confirm login and server probe returned %s",
|
||
tried_login_selectors,
|
||
state.name,
|
||
)
|
||
return False
|
||
|
||
async def _fetch_published_ads(self) -> list[dict[str, Any]]:
|
||
"""Fetch all published ads, handling API pagination.
|
||
|
||
Returns:
|
||
List of all published ads across all pages.
|
||
"""
|
||
ads:list[dict[str, Any]] = []
|
||
page = 1
|
||
MAX_PAGE_LIMIT:Final[int] = 100
|
||
SNIPPET_LIMIT:Final[int] = 500
|
||
|
||
while True:
|
||
# Safety check: don't paginate beyond reasonable limit
|
||
if page > MAX_PAGE_LIMIT:
|
||
LOG.warning("Stopping pagination after %s pages to avoid infinite loop", MAX_PAGE_LIMIT)
|
||
break
|
||
|
||
try:
|
||
response = await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum={page}")
|
||
except TimeoutError as ex:
|
||
LOG.warning("Pagination request timed out on page %s: %s", page, ex)
|
||
break
|
||
|
||
content = response.get("content", "")
|
||
try:
|
||
json_data = json.loads(content)
|
||
except json.JSONDecodeError as ex:
|
||
if not content:
|
||
LOG.warning("Empty JSON response content on page %s", page)
|
||
break
|
||
snippet = content[:SNIPPET_LIMIT] + ("..." if len(content) > SNIPPET_LIMIT else "")
|
||
LOG.warning("Failed to parse JSON response on page %s: %s (content: %s)", page, ex, snippet)
|
||
break
|
||
|
||
if not isinstance(json_data, dict):
|
||
snippet = content[:SNIPPET_LIMIT] + ("..." if len(content) > SNIPPET_LIMIT else "")
|
||
LOG.warning("Unexpected JSON payload on page %s (content: %s)", page, snippet)
|
||
break
|
||
|
||
page_ads = json_data.get("ads", [])
|
||
if not isinstance(page_ads, list):
|
||
preview = str(page_ads)
|
||
if len(preview) > SNIPPET_LIMIT:
|
||
preview = preview[:SNIPPET_LIMIT] + "..."
|
||
LOG.warning("Unexpected 'ads' type on page %s: %s value: %s", page, type(page_ads).__name__, preview)
|
||
break
|
||
|
||
ads.extend(page_ads)
|
||
|
||
paging = json_data.get("paging")
|
||
if not isinstance(paging, dict):
|
||
LOG.debug("No paging dict found on page %s, assuming single page", page)
|
||
break
|
||
|
||
# Use only real API fields (confirmed from production data)
|
||
current_page_num = misc.coerce_page_number(paging.get("pageNum"))
|
||
total_pages = misc.coerce_page_number(paging.get("last"))
|
||
|
||
if current_page_num is None:
|
||
LOG.warning("Invalid 'pageNum' in paging info: %s, stopping pagination", paging.get("pageNum"))
|
||
break
|
||
|
||
if total_pages is None:
|
||
LOG.debug("No pagination info found, assuming single page")
|
||
break
|
||
|
||
# Stop if reached last page
|
||
if current_page_num >= total_pages:
|
||
LOG.info("Reached last page %s of %s, stopping pagination", current_page_num, total_pages)
|
||
break
|
||
|
||
# Safety: stop if no ads returned
|
||
if len(page_ads) == 0:
|
||
LOG.info("No ads found on page %s, stopping pagination", page)
|
||
break
|
||
|
||
LOG.debug("Page %s: fetched %s ads (numFound=%s)", page, len(page_ads), paging.get("numFound"))
|
||
|
||
# Use API's next field for navigation (more robust than our counter)
|
||
next_page = misc.coerce_page_number(paging.get("next"))
|
||
if next_page is None:
|
||
LOG.warning("Invalid 'next' page value in paging info: %s, stopping pagination", paging.get("next"))
|
||
break
|
||
page = next_page
|
||
|
||
return ads
|
||
|
||
async def delete_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None:
|
||
count = 0
|
||
|
||
published_ads = await self._fetch_published_ads()
|
||
|
||
for ad_file, ad_cfg, _ad_cfg_orig in ad_cfgs:
|
||
count += 1
|
||
LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg.title, ad_file)
|
||
await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = self.config.publishing.delete_old_ads_by_title)
|
||
await self.web_sleep()
|
||
|
||
LOG.info("############################################")
|
||
LOG.info("DONE: Deleted %s", pluralize("ad", count))
|
||
LOG.info("############################################")
|
||
|
||
async def delete_ad(self, ad_cfg:Ad, published_ads:list[dict[str, Any]], *, delete_old_ads_by_title:bool) -> bool:
|
||
LOG.info("Deleting ad '%s' if already present...", ad_cfg.title)
|
||
|
||
await self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
|
||
csrf_token_elem = await self.web_find(By.CSS_SELECTOR, "meta[name=_csrf]")
|
||
csrf_token = csrf_token_elem.attrs["content"]
|
||
ensure(csrf_token is not None, "Expected CSRF Token not found in HTML content!")
|
||
|
||
if delete_old_ads_by_title:
|
||
for published_ad in published_ads:
|
||
published_ad_id = int(published_ad.get("id", -1))
|
||
published_ad_title = published_ad.get("title", "")
|
||
if ad_cfg.id == published_ad_id or ad_cfg.title == published_ad_title:
|
||
LOG.info(" -> deleting %s '%s'...", published_ad_id, published_ad_title)
|
||
await self.web_request(
|
||
url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={published_ad_id}", method = "POST", headers = {"x-csrf-token": str(csrf_token)}
|
||
)
|
||
elif ad_cfg.id:
|
||
await self.web_request(
|
||
url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={ad_cfg.id}",
|
||
method = "POST",
|
||
headers = {"x-csrf-token": str(csrf_token)},
|
||
valid_response_codes = [200, 404],
|
||
)
|
||
|
||
await self.web_sleep()
|
||
ad_cfg.id = None
|
||
return True
|
||
|
||
async def extend_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None:
|
||
"""Extends ads that are close to expiry."""
|
||
# Fetch currently published ads from API
|
||
published_ads = await self._fetch_published_ads()
|
||
|
||
# Filter ads that need extension
|
||
ads_to_extend = []
|
||
for ad_file, ad_cfg, ad_cfg_orig in ad_cfgs:
|
||
# Skip unpublished ads (no ID)
|
||
if not ad_cfg.id:
|
||
LOG.info(" -> SKIPPED: ad '%s' is not published yet", ad_cfg.title)
|
||
continue
|
||
|
||
# Find ad in published list
|
||
published_ad = next((ad for ad in published_ads if ad["id"] == ad_cfg.id), None)
|
||
if not published_ad:
|
||
LOG.warning(" -> SKIPPED: ad '%s' (ID: %s) not found in published ads", ad_cfg.title, ad_cfg.id)
|
||
continue
|
||
|
||
# Skip non-active ads
|
||
if published_ad.get("state") != "active":
|
||
LOG.info(" -> SKIPPED: ad '%s' is not active (state: %s)", ad_cfg.title, published_ad.get("state"))
|
||
continue
|
||
|
||
# Check if ad is within 8-day extension window using API's endDate
|
||
end_date_str = published_ad.get("endDate")
|
||
if not end_date_str:
|
||
LOG.warning(" -> SKIPPED: ad '%s' has no endDate in API response", ad_cfg.title)
|
||
continue
|
||
|
||
# Intentionally parsing naive datetime from kleinanzeigen API's German date format, timezone not relevant for date-only comparison
|
||
end_date = datetime.strptime(end_date_str, "%d.%m.%Y") # noqa: DTZ007
|
||
days_until_expiry = (end_date.date() - misc.now().date()).days
|
||
|
||
# Magic value 8 is kleinanzeigen.de's platform policy: extensions only possible within 8 days of expiry
|
||
if days_until_expiry <= 8: # noqa: PLR2004
|
||
LOG.info(" -> ad '%s' expires in %d days, will extend", ad_cfg.title, days_until_expiry)
|
||
ads_to_extend.append((ad_file, ad_cfg, ad_cfg_orig, published_ad))
|
||
else:
|
||
LOG.info(" -> SKIPPED: ad '%s' expires in %d days (can only extend within 8 days)", ad_cfg.title, days_until_expiry)
|
||
|
||
if not ads_to_extend:
|
||
LOG.info("No ads need extension at this time.")
|
||
LOG.info("############################################")
|
||
LOG.info("DONE: No ads extended.")
|
||
LOG.info("############################################")
|
||
return
|
||
|
||
# Process extensions
|
||
success_count = 0
|
||
for idx, (ad_file, ad_cfg, ad_cfg_orig, _published_ad) in enumerate(ads_to_extend, start = 1):
|
||
LOG.info("Processing %s/%s: '%s' from [%s]...", idx, len(ads_to_extend), ad_cfg.title, ad_file)
|
||
if await self.extend_ad(ad_file, ad_cfg, ad_cfg_orig):
|
||
success_count += 1
|
||
await self.web_sleep()
|
||
|
||
LOG.info("############################################")
|
||
LOG.info("DONE: Extended %s", pluralize("ad", success_count))
|
||
LOG.info("############################################")
|
||
|
||
async def extend_ad(self, ad_file:str, ad_cfg:Ad, ad_cfg_orig:dict[str, Any]) -> bool:
|
||
"""Extends a single ad listing."""
|
||
LOG.info("Extending ad '%s' (ID: %s)...", ad_cfg.title, ad_cfg.id)
|
||
|
||
try:
|
||
# Navigate to ad management page and find extend button across all pages
|
||
extend_button_xpath = f'//li[@data-adid="{ad_cfg.id}"]//button[contains(., "Verlängern")]'
|
||
|
||
async def find_and_click_extend_button(page_num:int) -> bool:
|
||
"""Try to find and click extend button on current page."""
|
||
try:
|
||
extend_button = await self.web_find(By.XPATH, extend_button_xpath, timeout = self._timeout("quick_dom"))
|
||
LOG.info("Found extend button on page %s", page_num)
|
||
await extend_button.click()
|
||
return True # Success - stop pagination
|
||
except TimeoutError:
|
||
LOG.debug("Extend button not found on page %s", page_num)
|
||
return False # Continue to next page
|
||
|
||
success = await self._navigate_paginated_ad_overview(find_and_click_extend_button, page_url = f"{self.root_url}/m-meine-anzeigen.html")
|
||
|
||
if not success:
|
||
LOG.error(" -> FAILED: Could not find extend button for ad ID %s", ad_cfg.id)
|
||
return False
|
||
|
||
# Handle confirmation dialog
|
||
# After clicking "Verlängern", a dialog appears with:
|
||
# - Title: "Vielen Dank!"
|
||
# - Message: "Deine Anzeige ... wurde erfolgreich verlängert."
|
||
# - Paid bump-up option (skipped by closing dialog)
|
||
# Simply close the dialog with the X button (aria-label="Schließen")
|
||
try:
|
||
dialog_close_timeout = self._timeout("quick_dom")
|
||
await self.web_click(By.CSS_SELECTOR, 'button[aria-label="Schließen"]', timeout = dialog_close_timeout)
|
||
LOG.debug(" -> Closed confirmation dialog")
|
||
except TimeoutError:
|
||
LOG.warning(" -> No confirmation dialog found, extension may have completed directly")
|
||
|
||
# Update metadata in YAML file
|
||
# Update updated_on to track when ad was extended
|
||
ad_cfg_orig["updated_on"] = misc.now().isoformat(timespec = "seconds")
|
||
dicts.save_dict(ad_file, ad_cfg_orig)
|
||
|
||
LOG.info(" -> SUCCESS: ad extended with ID %s", ad_cfg.id)
|
||
return True
|
||
|
||
except TimeoutError as ex:
|
||
LOG.error(" -> FAILED: Timeout while extending ad '%s': %s", ad_cfg.title, ex)
|
||
return False
|
||
except OSError as ex:
|
||
LOG.error(" -> FAILED: Could not persist extension for ad '%s': %s", ad_cfg.title, ex)
|
||
return False
|
||
|
||
async def __check_publishing_result(self) -> bool:
|
||
# Check for success messages
|
||
return await self.web_check(By.ID, "checking-done", Is.DISPLAYED) or await self.web_check(By.ID, "not-completed", Is.DISPLAYED)
|
||
|
||
async def publish_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None:
|
||
count = 0
|
||
failed_count = 0
|
||
max_retries = PUBLISH_MAX_RETRIES
|
||
|
||
published_ads = await self._fetch_published_ads()
|
||
|
||
for ad_file, ad_cfg, ad_cfg_orig in ad_cfgs:
|
||
LOG.info("Processing %s/%s: '%s' from [%s]...", count + 1, len(ad_cfgs), ad_cfg.title, ad_file)
|
||
|
||
if [x for x in published_ads if x["id"] == ad_cfg.id and x["state"] == "paused"]:
|
||
LOG.info("Skipping because ad is reserved")
|
||
continue
|
||
|
||
count += 1
|
||
success = False
|
||
|
||
# Retry loop only for publish_ad (before submission completes)
|
||
for attempt in range(1, max_retries + 1):
|
||
try:
|
||
await self.publish_ad(ad_file, ad_cfg, ad_cfg_orig, published_ads, AdUpdateStrategy.REPLACE)
|
||
success = True
|
||
break # Publish succeeded, exit retry loop
|
||
except asyncio.CancelledError:
|
||
raise # Respect task cancellation
|
||
except (TimeoutError, ProtocolException) as ex:
|
||
await self._capture_publish_error_diagnostics_if_enabled(ad_cfg, ad_cfg_orig, ad_file, attempt, ex)
|
||
if attempt < max_retries:
|
||
LOG.warning("Attempt %s/%s failed for '%s': %s. Retrying...", attempt, max_retries, ad_cfg.title, ex)
|
||
await self.web_sleep(2) # Wait before retry
|
||
else:
|
||
LOG.error("All %s attempts failed for '%s': %s. Skipping ad.", max_retries, ad_cfg.title, ex)
|
||
failed_count += 1
|
||
|
||
# Check publishing result separately (no retry - ad is already submitted)
|
||
if success:
|
||
try:
|
||
publish_timeout = self._timeout("publishing_result")
|
||
await self.web_await(self.__check_publishing_result, timeout = publish_timeout)
|
||
except TimeoutError:
|
||
LOG.warning(" -> Could not confirm publishing for '%s', but ad may be online", ad_cfg.title)
|
||
|
||
if success and self.config.publishing.delete_old_ads == "AFTER_PUBLISH" and not self.keep_old_ads:
|
||
await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = False)
|
||
|
||
LOG.info("############################################")
|
||
if failed_count > 0:
|
||
LOG.info("DONE: (Re-)published %s (%s failed after retries)", pluralize("ad", count - failed_count), failed_count)
|
||
else:
|
||
LOG.info("DONE: (Re-)published %s", pluralize("ad", count))
|
||
LOG.info("############################################")
|
||
|
||
async def publish_ad(
|
||
self, ad_file:str, ad_cfg:Ad, ad_cfg_orig:dict[str, Any], published_ads:list[dict[str, Any]], mode:AdUpdateStrategy = AdUpdateStrategy.REPLACE
|
||
) -> None:
|
||
"""
|
||
@param ad_cfg: the effective ad config (i.e. with default values applied etc.)
|
||
@param ad_cfg_orig: the ad config as present in the YAML file
|
||
@param published_ads: json list of published ads
|
||
@param mode: the mode of ad editing, either publishing a new or updating an existing ad
|
||
"""
|
||
|
||
if mode == AdUpdateStrategy.REPLACE:
|
||
if self.config.publishing.delete_old_ads == "BEFORE_PUBLISH" and not self.keep_old_ads:
|
||
await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title = self.config.publishing.delete_old_ads_by_title)
|
||
|
||
# Apply auto price reduction only for REPLACE operations (actual reposts)
|
||
# This ensures price reductions only happen on republish, not on UPDATE
|
||
apply_auto_price_reduction(ad_cfg, ad_cfg_orig, _relative_ad_path(ad_file, self.config_file_path))
|
||
|
||
LOG.info("Publishing ad '%s'...", ad_cfg.title)
|
||
await self.web_open(f"{self.root_url}/p-anzeige-aufgeben-schritt2.html")
|
||
else:
|
||
LOG.info("Updating ad '%s'...", ad_cfg.title)
|
||
await self.web_open(f"{self.root_url}/p-anzeige-bearbeiten.html?adId={ad_cfg.id}")
|
||
|
||
if loggers.is_debug(LOG):
|
||
LOG.debug(" -> effective ad meta:")
|
||
YAML().dump(ad_cfg.model_dump(), sys.stdout)
|
||
|
||
if ad_cfg.type == "WANTED":
|
||
await self.web_click(By.ID, "adType2")
|
||
|
||
#############################
|
||
# set category (before title to avoid form reset clearing title)
|
||
#############################
|
||
await self.__set_category(ad_cfg.category, ad_file)
|
||
|
||
#############################
|
||
# set title
|
||
#############################
|
||
await self.web_input(By.ID, "postad-title", ad_cfg.title)
|
||
|
||
#############################
|
||
# set special attributes
|
||
#############################
|
||
await self.__set_special_attributes(ad_cfg)
|
||
|
||
#############################
|
||
# set shipping type/options/costs
|
||
#############################
|
||
shipping_type = ad_cfg.shipping_type
|
||
if shipping_type != "NOT_APPLICABLE":
|
||
if ad_cfg.type == "WANTED":
|
||
# special handling for ads of type WANTED since shipping is a special attribute for these
|
||
if shipping_type in {"PICKUP", "SHIPPING"}:
|
||
shipping_value = "ja" if shipping_type == "SHIPPING" else "nein"
|
||
try:
|
||
await self.web_select(By.XPATH, "//select[contains(@id, '.versand_s')]", shipping_value)
|
||
except TimeoutError:
|
||
LOG.warning("Failed to set shipping attribute for type '%s'!", shipping_type)
|
||
else:
|
||
await self.__set_shipping(ad_cfg, mode)
|
||
else:
|
||
LOG.debug("Shipping step skipped - reason: NOT_APPLICABLE")
|
||
|
||
#############################
|
||
# set price
|
||
#############################
|
||
price_type = ad_cfg.price_type
|
||
if price_type != "NOT_APPLICABLE":
|
||
try:
|
||
await self.web_select(By.CSS_SELECTOR, "select#price-type-react, select#micro-frontend-price-type, select#priceType", price_type)
|
||
except TimeoutError:
|
||
# Price type selector not present on this page variant.
|
||
pass
|
||
if ad_cfg.price:
|
||
if mode == AdUpdateStrategy.MODIFY:
|
||
# Clear the price field first to prevent concatenation of old and new values
|
||
# This is needed because some input fields don't clear properly with just clear_input()
|
||
price_field = await self.web_find(By.CSS_SELECTOR, "input#post-ad-frontend-price, input#micro-frontend-price, input#pstad-price")
|
||
await price_field.clear_input()
|
||
await price_field.send_keys("") # Ensure field is completely empty
|
||
await self.web_sleep(500) # Brief pause to ensure clearing is complete
|
||
await self.web_input(By.CSS_SELECTOR, "input#post-ad-frontend-price, input#micro-frontend-price, input#pstad-price", str(ad_cfg.price))
|
||
|
||
#############################
|
||
# set sell_directly
|
||
#############################
|
||
sell_directly = ad_cfg.sell_directly
|
||
try:
|
||
if ad_cfg.shipping_type == "SHIPPING":
|
||
if sell_directly and ad_cfg.shipping_options and price_type in {"FIXED", "NEGOTIABLE"}:
|
||
if not await self.web_check(By.ID, "radio-buy-now-yes", Is.SELECTED):
|
||
await self.web_click(By.ID, "radio-buy-now-yes")
|
||
elif not await self.web_check(By.ID, "radio-buy-now-no", Is.SELECTED):
|
||
await self.web_click(By.ID, "radio-buy-now-no")
|
||
except TimeoutError as ex:
|
||
LOG.debug(ex, exc_info = True)
|
||
|
||
#############################
|
||
# set description
|
||
#############################
|
||
description = self.__get_description(ad_cfg, with_affixes = True)
|
||
await self.web_execute("document.querySelector('#pstad-descrptn').value = `" + description.replace("`", "'") + "`")
|
||
|
||
await self.__set_contact_fields(ad_cfg.contact)
|
||
|
||
if mode == AdUpdateStrategy.MODIFY:
|
||
#############################
|
||
# delete previous images because we don't know which have changed
|
||
#############################
|
||
img_items = await self.web_find_all(By.CSS_SELECTOR, "ul#j-pictureupload-thumbnails > li:not(.is-placeholder)")
|
||
for element in img_items:
|
||
btn = await self.web_find(By.CSS_SELECTOR, "button.pictureupload-thumbnails-remove", parent = element)
|
||
await btn.click()
|
||
|
||
#############################
|
||
# upload images
|
||
#############################
|
||
await self.__upload_images(ad_cfg)
|
||
|
||
#############################
|
||
# wait for captcha
|
||
#############################
|
||
await self.check_and_wait_for_captcha(is_login_page = False)
|
||
|
||
#############################
|
||
# submit
|
||
#############################
|
||
try:
|
||
await self.web_click(By.ID, "pstad-submit")
|
||
except TimeoutError:
|
||
# https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/40
|
||
await self.web_click(By.XPATH, "//fieldset[@id='postad-publish']//*[contains(., 'Anzeige aufgeben')]")
|
||
await self.web_click(By.ID, "imprint-guidance-submit")
|
||
|
||
# check for no image question
|
||
try:
|
||
image_hint_xpath = '//button[contains(., "Ohne Bild veröffentlichen")]'
|
||
if not ad_cfg.images and await self.web_check(By.XPATH, image_hint_xpath, Is.DISPLAYED):
|
||
await self.web_click(By.XPATH, image_hint_xpath)
|
||
except TimeoutError:
|
||
# Image hint not shown; continue publish flow.
|
||
pass # nosec
|
||
|
||
#############################
|
||
# wait for payment form if commercial account is used
|
||
#############################
|
||
try:
|
||
short_timeout = self._timeout("quick_dom")
|
||
await self.web_find(By.ID, "myftr-shppngcrt-frm", timeout = short_timeout)
|
||
|
||
LOG.warning("############################################")
|
||
LOG.warning("# Payment form detected! Please proceed with payment.")
|
||
LOG.warning("############################################")
|
||
await self.web_scroll_page_down()
|
||
await ainput(_("Press a key to continue..."))
|
||
except TimeoutError:
|
||
# Payment form not present.
|
||
pass
|
||
|
||
confirmation_timeout = self._timeout("publishing_confirmation")
|
||
await self.web_await(lambda: "p-anzeige-aufgeben-bestaetigung.html?adId=" in self.page.url, timeout = confirmation_timeout)
|
||
|
||
# extract the ad id from the URL's query parameter
|
||
current_url_query_params = urllib_parse.parse_qs(urllib_parse.urlparse(self.page.url).query)
|
||
ad_id = int(current_url_query_params.get("adId", [])[0])
|
||
ad_cfg_orig["id"] = ad_id
|
||
|
||
# Update content hash after successful publication
|
||
# Calculate hash on original config to ensure consistent comparison on restart
|
||
ad_cfg_orig["content_hash"] = AdPartial.model_validate(ad_cfg_orig).update_content_hash().content_hash
|
||
ad_cfg_orig["updated_on"] = misc.now().isoformat(timespec = "seconds")
|
||
if not ad_cfg.created_on and not ad_cfg.id:
|
||
ad_cfg_orig["created_on"] = ad_cfg_orig["updated_on"]
|
||
|
||
# Increment repost_count and persist price_reduction_count only for REPLACE operations (actual reposts)
|
||
# This ensures counters only advance on republish, not on UPDATE
|
||
if mode == AdUpdateStrategy.REPLACE:
|
||
# Increment repost_count after successful publish
|
||
# Note: This happens AFTER publish, so price reduction logic (which runs before publish)
|
||
# sees the count from the PREVIOUS run. This is intentional: the first publish uses
|
||
# repost_count=0 (no reduction), the second publish uses repost_count=1 (first reduction), etc.
|
||
current_reposts = int(ad_cfg_orig.get("repost_count", ad_cfg.repost_count or 0))
|
||
ad_cfg_orig["repost_count"] = current_reposts + 1
|
||
ad_cfg.repost_count = ad_cfg_orig["repost_count"]
|
||
|
||
# Persist price_reduction_count after successful publish
|
||
# This ensures failed publishes don't incorrectly increment the reduction counter
|
||
if ad_cfg.price_reduction_count is not None and ad_cfg.price_reduction_count > 0:
|
||
ad_cfg_orig["price_reduction_count"] = ad_cfg.price_reduction_count
|
||
|
||
if mode == AdUpdateStrategy.REPLACE:
|
||
LOG.info(" -> SUCCESS: ad published with ID %s", ad_id)
|
||
else:
|
||
LOG.info(" -> SUCCESS: ad updated with ID %s", ad_id)
|
||
|
||
dicts.save_dict(ad_file, ad_cfg_orig)
|
||
|
||
async def __set_contact_fields(self, contact:Contact) -> None:
|
||
#############################
|
||
# set contact zipcode
|
||
#############################
|
||
if contact.zipcode:
|
||
zipcode_set = True
|
||
try:
|
||
zip_field = await self.web_find(By.ID, "pstad-zip")
|
||
if zip_field is None:
|
||
raise TimeoutError("ZIP input not found")
|
||
await zip_field.clear_input()
|
||
except TimeoutError:
|
||
# fall back to standard input below
|
||
pass
|
||
try:
|
||
await self.web_input(By.ID, "pstad-zip", contact.zipcode)
|
||
except TimeoutError:
|
||
LOG.warning("Could not set contact zipcode: %s", contact.zipcode)
|
||
zipcode_set = False
|
||
# Set city if location is specified
|
||
if contact.location and zipcode_set:
|
||
try:
|
||
options = await self.web_find_all(By.CSS_SELECTOR, "#pstad-citychsr option")
|
||
|
||
found = False
|
||
for option in options:
|
||
opt_text = option.text.strip()
|
||
target = contact.location.strip()
|
||
if opt_text == target:
|
||
await self.web_select(By.ID, "pstad-citychsr", option.attrs.value)
|
||
found = True
|
||
break
|
||
if " - " in opt_text and opt_text.split(" - ", 1)[1] == target:
|
||
await self.web_select(By.ID, "pstad-citychsr", option.attrs.value)
|
||
found = True
|
||
break
|
||
if not found:
|
||
LOG.warning("No city dropdown option matched location: %s", contact.location)
|
||
except TimeoutError:
|
||
LOG.warning("Could not set contact location: %s", contact.location)
|
||
|
||
#############################
|
||
# set contact street
|
||
#############################
|
||
if contact.street:
|
||
try:
|
||
if await self.web_check(By.ID, "pstad-street", Is.DISABLED):
|
||
await self.web_click(By.ID, "addressVisibility")
|
||
await self.web_sleep()
|
||
await self.web_input(By.ID, "pstad-street", contact.street)
|
||
except TimeoutError:
|
||
LOG.warning("Could not set contact street.")
|
||
|
||
#############################
|
||
# set contact name
|
||
#############################
|
||
if contact.name:
|
||
try:
|
||
if not await self.web_check(By.ID, "postad-contactname", Is.READONLY):
|
||
await self.web_input(By.ID, "postad-contactname", contact.name)
|
||
except TimeoutError:
|
||
LOG.warning("Could not set contact name.")
|
||
|
||
#############################
|
||
# set contact phone
|
||
#############################
|
||
if contact.phone:
|
||
try:
|
||
if await self.web_check(By.ID, "postad-phonenumber", Is.DISPLAYED):
|
||
try:
|
||
if await self.web_check(By.ID, "postad-phonenumber", Is.DISABLED):
|
||
await self.web_click(By.ID, "phoneNumberVisibility")
|
||
await self.web_sleep()
|
||
except TimeoutError:
|
||
# ignore
|
||
pass
|
||
await self.web_input(By.ID, "postad-phonenumber", contact.phone)
|
||
except TimeoutError:
|
||
LOG.warning(
|
||
_(
|
||
"Phone number field not present on page. This is expected for many private accounts; "
|
||
"commercial accounts may still support phone numbers."
|
||
)
|
||
)
|
||
|
||
async def update_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None:
|
||
"""
|
||
Updates a list of ads.
|
||
The list gets filtered, so that only already published ads will be updated.
|
||
Calls publish_ad in MODIFY mode.
|
||
|
||
Args:
|
||
ad_cfgs: List of ad configurations
|
||
|
||
Returns:
|
||
None
|
||
"""
|
||
count = 0
|
||
|
||
published_ads = await self._fetch_published_ads()
|
||
|
||
for ad_file, ad_cfg, ad_cfg_orig in ad_cfgs:
|
||
ad = next((ad for ad in published_ads if ad["id"] == ad_cfg.id), None)
|
||
|
||
if not ad:
|
||
LOG.warning(" -> SKIPPED: ad '%s' (ID: %s) not found in published ads", ad_cfg.title, ad_cfg.id)
|
||
continue
|
||
|
||
LOG.info("Processing %s/%s: '%s' from [%s]...", count + 1, len(ad_cfgs), ad_cfg.title, ad_file)
|
||
if ad["state"] == "paused":
|
||
LOG.info("Skipping because ad is reserved")
|
||
continue
|
||
|
||
count += 1
|
||
|
||
await self.publish_ad(ad_file, ad_cfg, ad_cfg_orig, published_ads, AdUpdateStrategy.MODIFY)
|
||
publish_timeout = self._timeout("publishing_result")
|
||
await self.web_await(self.__check_publishing_result, timeout = publish_timeout)
|
||
|
||
LOG.info("############################################")
|
||
LOG.info("DONE: updated %s", pluralize("ad", count))
|
||
LOG.info("############################################")
|
||
|
||
async def __set_condition(self, condition_value:str) -> None:
|
||
try:
|
||
# Open condition dialog
|
||
await self.web_click(By.XPATH, '//*[@id="j-post-listing-frontend-conditions"]//button[@aria-haspopup="true"]')
|
||
except TimeoutError:
|
||
LOG.debug("Unable to open condition dialog and select condition [%s]", condition_value, exc_info = True)
|
||
return
|
||
|
||
try:
|
||
# Click radio button
|
||
await self.web_click(By.ID, f"radio-button-{condition_value}")
|
||
except TimeoutError:
|
||
LOG.debug("Unable to select condition [%s]", condition_value, exc_info = True)
|
||
|
||
try:
|
||
# Click accept button
|
||
await self.web_click(By.XPATH, '//dialog//button[.//span[text()="Bestätigen"]]')
|
||
except TimeoutError as ex:
|
||
raise TimeoutError(_("Unable to close condition dialog!")) from ex
|
||
|
||
async def __set_category(self, category:str | None, ad_file:str) -> None:
|
||
# click on something to trigger automatic category detection
|
||
await self.web_click(By.ID, "pstad-descrptn")
|
||
|
||
is_category_auto_selected = False
|
||
try:
|
||
if await self.web_text(By.ID, "postad-category-path"):
|
||
is_category_auto_selected = True
|
||
except TimeoutError:
|
||
# Category auto-selection indicator not available within timeout.
|
||
pass
|
||
|
||
if category:
|
||
await self.web_sleep() # workaround for https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/39
|
||
await self.web_click(By.ID, "pstad-lnk-chngeCtgry")
|
||
await self.web_find(By.ID, "postad-step1-sbmt")
|
||
|
||
category_url = f"{self.root_url}/p-kategorie-aendern.html#?path={category}"
|
||
await self.web_open(category_url)
|
||
await self.web_click(By.XPATH, "//*[@id='postad-step1-sbmt']/button")
|
||
else:
|
||
ensure(is_category_auto_selected, f"No category specified in [{ad_file}] and automatic category detection failed")
|
||
|
||
async def __set_special_attributes(self, ad_cfg:Ad) -> None:
|
||
if not ad_cfg.special_attributes:
|
||
return
|
||
|
||
LOG.debug("Found %i special attributes", len(ad_cfg.special_attributes))
|
||
for special_attribute_key, special_attribute_value in ad_cfg.special_attributes.items():
|
||
# Ensure special_attribute_value is treated as a string
|
||
special_attribute_value_str = str(special_attribute_value)
|
||
|
||
if special_attribute_key == "condition_s":
|
||
await self.__set_condition(special_attribute_value_str)
|
||
continue
|
||
|
||
LOG.debug("Setting special attribute [%s] to [%s]...", special_attribute_key, special_attribute_value_str)
|
||
try:
|
||
# if the <select> element exists but is inside an invisible container, make the container visible
|
||
select_container_xpath = f"//div[@class='l-row' and descendant::select[@id='{special_attribute_key}']]"
|
||
if not await self.web_check(By.XPATH, select_container_xpath, Is.DISPLAYED):
|
||
await (await self.web_find(By.XPATH, select_container_xpath)).apply("elem => elem.singleNodeValue.style.display = 'block'")
|
||
except TimeoutError:
|
||
# Skip visibility adjustment when container cannot be located in time.
|
||
pass # nosec
|
||
|
||
try:
|
||
# finding element by name cause id are composed sometimes eg. autos.marke_s+autos.model_s for Modell by cars
|
||
special_attr_elem = await self.web_find(By.XPATH, f"//*[contains(@name, '{special_attribute_key}')]")
|
||
except TimeoutError:
|
||
# Trying to find element by ID instead cause sometimes there is NO name attribute...
|
||
try:
|
||
special_attr_elem = await self.web_find(By.ID, special_attribute_key)
|
||
except TimeoutError as ex:
|
||
LOG.debug("Attribute field '%s' could not be found.", special_attribute_key)
|
||
raise TimeoutError(_("Failed to set attribute '%s'") % special_attribute_key) from ex
|
||
|
||
try:
|
||
elem_id:str = str(special_attr_elem.attrs.id)
|
||
if special_attr_elem.local_name == "select":
|
||
LOG.debug("Attribute field '%s' seems to be a select...", special_attribute_key)
|
||
await self.web_select(By.ID, elem_id, special_attribute_value_str)
|
||
elif special_attr_elem.attrs.type == "checkbox":
|
||
LOG.debug("Attribute field '%s' seems to be a checkbox...", special_attribute_key)
|
||
await self.web_click(By.ID, elem_id)
|
||
elif special_attr_elem.attrs.type == "text" and special_attr_elem.attrs.get("role") == "combobox":
|
||
LOG.debug("Attribute field '%s' seems to be a Combobox (i.e. text input with filtering dropdown)...", special_attribute_key)
|
||
await self.web_select_combobox(By.ID, elem_id, special_attribute_value_str)
|
||
else:
|
||
LOG.debug("Attribute field '%s' seems to be a text input...", special_attribute_key)
|
||
await self.web_input(By.ID, elem_id, special_attribute_value_str)
|
||
except TimeoutError as ex:
|
||
LOG.debug("Failed to set attribute field '%s' via known input types.", special_attribute_key)
|
||
raise TimeoutError(_("Failed to set attribute '%s'") % special_attribute_key) from ex
|
||
LOG.debug("Successfully set attribute field [%s] to [%s]...", special_attribute_key, special_attribute_value_str)
|
||
|
||
async def __set_shipping(self, ad_cfg:Ad, mode:AdUpdateStrategy = AdUpdateStrategy.REPLACE) -> None:
|
||
short_timeout = self._timeout("quick_dom")
|
||
if ad_cfg.shipping_type == "PICKUP":
|
||
try:
|
||
await self.web_click(By.ID, "radio-pickup")
|
||
except TimeoutError as ex:
|
||
LOG.debug(ex, exc_info = True)
|
||
elif ad_cfg.shipping_options:
|
||
await self.web_click(By.XPATH, '//button//span[contains(., "Versandmethoden auswählen")]')
|
||
|
||
if mode == AdUpdateStrategy.MODIFY:
|
||
try:
|
||
# when "Andere Versandmethoden" is not available, go back and start over new
|
||
await self.web_find(By.XPATH, '//dialog//button[contains(., "Andere Versandmethoden")]', timeout = short_timeout)
|
||
except TimeoutError:
|
||
await self.web_click(By.XPATH, '//dialog//button[contains(., "Zurück")]')
|
||
|
||
# in some categories we need to go another dialog back
|
||
try:
|
||
await self.web_find(By.XPATH, '//dialog//button[contains(., "Andere Versandmethoden")]', timeout = short_timeout)
|
||
except TimeoutError:
|
||
await self.web_click(By.XPATH, '//dialog//button[contains(., "Zurück")]')
|
||
|
||
await self.web_click(By.XPATH, '//dialog//button[contains(., "Andere Versandmethoden")]')
|
||
await self.__set_shipping_options(ad_cfg, mode)
|
||
else:
|
||
special_shipping_selector = '//select[contains(@id, ".versand_s")]'
|
||
if await self.web_check(By.XPATH, special_shipping_selector, Is.DISPLAYED):
|
||
# try to set special attribute selector (then we have a commercial account)
|
||
shipping_value = "ja" if ad_cfg.shipping_type == "SHIPPING" else "nein"
|
||
await self.web_select(By.XPATH, special_shipping_selector, shipping_value)
|
||
else:
|
||
try:
|
||
# no options. only costs. Set custom shipping cost
|
||
await self.web_click(By.XPATH, '//button//span[contains(., "Versandmethoden auswählen")]')
|
||
try:
|
||
# when "Andere Versandmethoden" is not available, then we are already on the individual page
|
||
await self.web_click(By.XPATH, '//dialog//button[contains(., "Andere Versandmethoden")]')
|
||
except TimeoutError:
|
||
# Dialog option not present; already on the individual shipping page.
|
||
pass
|
||
|
||
try:
|
||
# only click on "Individueller Versand" when "IndividualShippingInput" is not available, otherwise its already checked
|
||
# (important for mode = UPDATE)
|
||
await self.web_find(By.XPATH, '//input[contains(@placeholder, "Versandkosten (optional)")]', timeout = short_timeout)
|
||
except TimeoutError:
|
||
# Input not visible yet; click the individual shipping option.
|
||
await self.web_click(By.XPATH, '//*[contains(@id, "INDIVIDUAL") and contains(@data-testid, "Individueller Versand")]')
|
||
|
||
if ad_cfg.shipping_costs is not None:
|
||
await self.web_input(
|
||
By.XPATH, '//input[contains(@placeholder, "Versandkosten (optional)")]', str.replace(str(ad_cfg.shipping_costs), ".", ",")
|
||
)
|
||
await self.web_click(By.XPATH, '//dialog//button[contains(., "Fertig")]')
|
||
except TimeoutError as ex:
|
||
LOG.debug(ex, exc_info = True)
|
||
raise TimeoutError(_("Unable to close shipping dialog!")) from ex
|
||
|
||
async def __set_shipping_options(self, ad_cfg:Ad, mode:AdUpdateStrategy = AdUpdateStrategy.REPLACE) -> None:
|
||
if not ad_cfg.shipping_options:
|
||
return
|
||
|
||
shipping_options_mapping = {
|
||
"DHL_2": ("Klein", "SMALL", "Paket 2 kg"),
|
||
"Hermes_Päckchen": ("Klein", "SMALL", "Päckchen"),
|
||
"Hermes_S": ("Klein", "SMALL", "S-Paket"),
|
||
"DHL_5": ("Mittel", "MEDIUM", "Paket 5 kg"),
|
||
"Hermes_M": ("Mittel", "MEDIUM", "M-Paket"),
|
||
"DHL_10": ("Groß", "LARGE", "Paket 10 kg"),
|
||
"DHL_20": ("Groß", "LARGE", "Paket 20 kg"),
|
||
"DHL_31,5": ("Groß", "LARGE", "Paket 31,5 kg"),
|
||
"Hermes_L": ("Groß", "LARGE", "L-Paket"),
|
||
}
|
||
try:
|
||
mapped_shipping_options = [shipping_options_mapping[option] for option in set(ad_cfg.shipping_options)]
|
||
except KeyError as ex:
|
||
raise KeyError(f"Unknown shipping option(s), please refer to the documentation/README: {ad_cfg.shipping_options}") from ex
|
||
|
||
shipping_sizes, shipping_selector, shipping_packages = zip(*mapped_shipping_options, strict = False)
|
||
|
||
try:
|
||
(shipping_size,) = set(shipping_sizes)
|
||
except ValueError as ex:
|
||
raise ValueError("You can only specify shipping options for one package size!") from ex
|
||
|
||
try:
|
||
shipping_radio_selector = shipping_selector[0]
|
||
shipping_size_radio = await self.web_find(By.ID, f"radio-button-{shipping_radio_selector}")
|
||
shipping_size_radio_is_checked = hasattr(shipping_size_radio.attrs, "checked")
|
||
|
||
if shipping_size_radio_is_checked:
|
||
# in the same size category all options are preselected, so deselect the unwanted ones
|
||
unwanted_shipping_packages = [
|
||
package for size, selector, package in shipping_options_mapping.values() if size == shipping_size and package not in shipping_packages
|
||
]
|
||
to_be_clicked_shipping_packages = unwanted_shipping_packages
|
||
else:
|
||
# in a different size category nothing is preselected, so select all we want
|
||
await self.web_click(By.ID, f"radio-button-{shipping_radio_selector}")
|
||
to_be_clicked_shipping_packages = list(shipping_packages)
|
||
|
||
await self.web_click(By.XPATH, '//dialog//button[contains(., "Weiter")]')
|
||
|
||
if mode == AdUpdateStrategy.MODIFY:
|
||
# in update mode we cannot rely on any information and have to (de-)select every package
|
||
LOG.debug("Using MODIFY mode logic for shipping options")
|
||
|
||
# get only correct size
|
||
selected_size_shipping_packages = [package for size, selector, package in shipping_options_mapping.values() if size == shipping_size]
|
||
LOG.debug("Processing %d packages for size '%s'", len(selected_size_shipping_packages), shipping_size)
|
||
|
||
for shipping_package in selected_size_shipping_packages:
|
||
shipping_package_xpath = f'//dialog//input[contains(@data-testid, "{shipping_package}")]'
|
||
shipping_package_checkbox = await self.web_find(By.XPATH, shipping_package_xpath)
|
||
shipping_package_checkbox_is_checked = hasattr(shipping_package_checkbox.attrs, "checked")
|
||
|
||
LOG.debug(
|
||
"Package '%s': checked=%s, wanted=%s", shipping_package, shipping_package_checkbox_is_checked, shipping_package in shipping_packages
|
||
)
|
||
|
||
# select wanted packages if not checked already
|
||
if shipping_package in shipping_packages:
|
||
if not shipping_package_checkbox_is_checked:
|
||
# select
|
||
LOG.debug("Selecting package '%s'", shipping_package)
|
||
await self.web_click(By.XPATH, shipping_package_xpath)
|
||
# deselect unwanted if selected
|
||
elif shipping_package_checkbox_is_checked:
|
||
LOG.debug("Deselecting package '%s'", shipping_package)
|
||
await self.web_click(By.XPATH, shipping_package_xpath)
|
||
else:
|
||
for shipping_package in to_be_clicked_shipping_packages:
|
||
await self.web_click(By.XPATH, f'//dialog//input[contains(@data-testid, "{shipping_package}")]')
|
||
except TimeoutError as ex:
|
||
LOG.debug(ex, exc_info = True)
|
||
try:
|
||
# Click apply button
|
||
await self.web_click(By.XPATH, '//dialog//button[contains(., "Fertig")]')
|
||
except TimeoutError as ex:
|
||
raise TimeoutError(_("Unable to close shipping dialog!")) from ex
|
||
|
||
async def __upload_images(self, ad_cfg:Ad) -> None:
|
||
if not ad_cfg.images:
|
||
return
|
||
|
||
LOG.info(" -> found %s", pluralize("image", ad_cfg.images))
|
||
image_upload:Element = await self.web_find(By.CSS_SELECTOR, "input[type=file]")
|
||
|
||
for image in ad_cfg.images:
|
||
LOG.info(" -> uploading image [%s]", image)
|
||
await image_upload.send_file(image)
|
||
await self.web_sleep()
|
||
|
||
# Wait for all images to be processed and thumbnails to appear
|
||
expected_count = len(ad_cfg.images)
|
||
LOG.info(" -> waiting for %s to be processed...", pluralize("image", ad_cfg.images))
|
||
|
||
async def check_thumbnails_uploaded() -> bool:
|
||
try:
|
||
thumbnails = await self.web_find_all(
|
||
By.CSS_SELECTOR,
|
||
"ul#j-pictureupload-thumbnails > li:not(.is-placeholder)",
|
||
timeout = self._timeout("quick_dom"), # Fast timeout for polling
|
||
)
|
||
current_count = len(thumbnails)
|
||
if current_count < expected_count:
|
||
LOG.debug(" -> %d of %d images processed", current_count, expected_count)
|
||
return current_count == expected_count
|
||
except TimeoutError:
|
||
# No thumbnails found yet, continue polling
|
||
return False
|
||
|
||
try:
|
||
await self.web_await(check_thumbnails_uploaded, timeout = self._timeout("image_upload"), timeout_error_message = _("Image upload timeout exceeded"))
|
||
except TimeoutError as ex:
|
||
# Get current count for better error message
|
||
try:
|
||
thumbnails = await self.web_find_all(
|
||
By.CSS_SELECTOR, "ul#j-pictureupload-thumbnails > li:not(.is-placeholder)", timeout = self._timeout("quick_dom")
|
||
)
|
||
current_count = len(thumbnails)
|
||
except TimeoutError:
|
||
# Still no thumbnails after full timeout
|
||
current_count = 0
|
||
raise TimeoutError(
|
||
_("Not all images were uploaded within timeout. Expected %(expected)d, found %(found)d thumbnails.")
|
||
% {"expected": expected_count, "found": current_count}
|
||
) from ex
|
||
|
||
LOG.info(" -> all images uploaded successfully")
|
||
|
||
async def download_ads(self) -> None:
|
||
"""
|
||
Determines which download mode was chosen with the arguments, and calls the specified download routine.
|
||
This downloads either all, only unsaved (new), or specific ads given by ID.
|
||
"""
|
||
# Fetch published ads once from manage-ads JSON to avoid repetitive API calls during extraction
|
||
# Build lookup dict inline and pass directly to extractor (no cache abstraction needed)
|
||
LOG.info("Fetching published ads...")
|
||
published_ads = await self._fetch_published_ads()
|
||
published_ads_by_id:dict[int, dict[str, Any]] = {}
|
||
for published_ad in published_ads:
|
||
try:
|
||
ad_id = published_ad.get("id")
|
||
if ad_id is not None:
|
||
published_ads_by_id[int(ad_id)] = published_ad
|
||
except (ValueError, TypeError):
|
||
LOG.warning("Skipping ad with non-numeric id: %s", published_ad.get("id"))
|
||
LOG.info("Loaded %s published ads.", len(published_ads_by_id))
|
||
|
||
workspace = self._workspace_or_raise()
|
||
xdg_paths.ensure_directory(workspace.download_dir, "downloaded ads directory")
|
||
ad_extractor = extract.AdExtractor(self.browser, self.config, workspace.download_dir, published_ads_by_id = published_ads_by_id)
|
||
|
||
# Normalize comma-separated keyword selectors; set deduplication collapses "new,new" → {"new"}
|
||
selector_tokens = {s.strip() for s in self.ads_selector.split(",")}
|
||
if "all" in selector_tokens:
|
||
effective_selector = "all"
|
||
elif len(selector_tokens) == 1:
|
||
effective_selector = next(iter(selector_tokens)) # e.g. "new,new" → "new"
|
||
else:
|
||
effective_selector = self.ads_selector # numeric IDs: "123,456" — unchanged
|
||
|
||
if effective_selector in {"all", "new"}: # explore ads overview for these two modes
|
||
LOG.info("Scanning your ad overview...")
|
||
own_ad_urls = await ad_extractor.extract_own_ads_urls()
|
||
LOG.info("%s found.", pluralize("ad", len(own_ad_urls)))
|
||
|
||
if effective_selector == "all": # download all of your adds
|
||
LOG.info("Starting download of all ads...")
|
||
|
||
success_count = 0
|
||
# call download function for each ad page
|
||
for add_url in own_ad_urls:
|
||
ad_id = ad_extractor.extract_ad_id_from_ad_url(add_url)
|
||
if await ad_extractor.navigate_to_ad_page(add_url):
|
||
await ad_extractor.download_ad(ad_id)
|
||
success_count += 1
|
||
LOG.info("%d of %d ads were downloaded from your profile.", success_count, len(own_ad_urls))
|
||
|
||
elif effective_selector == "new": # download only unsaved ads
|
||
# check which ads already saved
|
||
saved_ad_ids = []
|
||
ads = self.load_ads(ignore_inactive = False, exclude_ads_with_id = False) # do not skip because of existing IDs
|
||
for ad in ads:
|
||
saved_ad_id = ad[1].id
|
||
if saved_ad_id is None:
|
||
LOG.debug("Skipping saved ad without id (likely unpublished or manually created): %s", ad[0])
|
||
continue
|
||
saved_ad_ids.append(int(saved_ad_id))
|
||
|
||
# determine ad IDs from links
|
||
ad_id_by_url = {url: ad_extractor.extract_ad_id_from_ad_url(url) for url in own_ad_urls}
|
||
|
||
LOG.info("Starting download of not yet downloaded ads...")
|
||
new_count = 0
|
||
for ad_url, ad_id in ad_id_by_url.items():
|
||
# check if ad with ID already saved
|
||
if ad_id in saved_ad_ids:
|
||
LOG.info("The ad with id %d has already been saved.", ad_id)
|
||
continue
|
||
|
||
if await ad_extractor.navigate_to_ad_page(ad_url):
|
||
await ad_extractor.download_ad(ad_id)
|
||
new_count += 1
|
||
LOG.info("%s were downloaded from your profile.", pluralize("new ad", new_count))
|
||
|
||
elif _NUMERIC_IDS_RE.match(effective_selector): # download ad(s) with specific id(s)
|
||
ids = [int(n) for n in effective_selector.split(",")]
|
||
LOG.info("Starting download of ad(s) with the id(s):")
|
||
LOG.info(" | ".join([str(ad_id) for ad_id in ids]))
|
||
|
||
for ad_id in ids: # call download routine for every id
|
||
exists = await ad_extractor.navigate_to_ad_page(ad_id)
|
||
if exists:
|
||
await ad_extractor.download_ad(ad_id)
|
||
LOG.info("Downloaded ad with id %d", ad_id)
|
||
else:
|
||
LOG.error("The page with the id %d does not exist!", ad_id)
|
||
|
||
def __get_description(self, ad_cfg:Ad, *, with_affixes:bool) -> str:
|
||
"""Get the ad description optionally with prefix and suffix applied.
|
||
|
||
Precedence (highest to lowest):
|
||
1. Direct ad-level affixes (description_prefix/suffix)
|
||
2. Global flattened affixes (ad_defaults.description_prefix/suffix)
|
||
3. Legacy global nested affixes (ad_defaults.description.prefix/suffix)
|
||
|
||
Args:
|
||
ad_cfg: The ad configuration dictionary
|
||
|
||
Returns:
|
||
The raw or complete description with prefix and suffix applied
|
||
"""
|
||
# Get the main description text
|
||
description_text = ""
|
||
if ad_cfg.description:
|
||
description_text = ad_cfg.description
|
||
|
||
if with_affixes:
|
||
# Get prefix with precedence
|
||
prefix = (
|
||
# 1. Direct ad-level prefix
|
||
ad_cfg.description_prefix
|
||
if ad_cfg.description_prefix is not None
|
||
# 2. Global prefix from config
|
||
else self.config.ad_defaults.description_prefix or "" # Default to empty string if all sources are None
|
||
)
|
||
|
||
# Get suffix with precedence
|
||
suffix = (
|
||
# 1. Direct ad-level suffix
|
||
ad_cfg.description_suffix
|
||
if ad_cfg.description_suffix is not None
|
||
# 2. Global suffix from config
|
||
else self.config.ad_defaults.description_suffix or "" # Default to empty string if all sources are None
|
||
)
|
||
|
||
# Combine the parts and replace @ with (at)
|
||
final_description = str(prefix) + str(description_text) + str(suffix)
|
||
final_description = final_description.replace("@", "(at)")
|
||
else:
|
||
final_description = description_text
|
||
|
||
# Validate length
|
||
ensure(
|
||
len(final_description) <= MAX_DESCRIPTION_LENGTH,
|
||
f"Length of ad description including prefix and suffix exceeds {MAX_DESCRIPTION_LENGTH} chars. Description length: {len(final_description)} chars.",
|
||
)
|
||
|
||
return final_description
|
||
|
||
def update_content_hashes(self, ads:list[tuple[str, Ad, dict[str, Any]]]) -> None:
|
||
count = 0
|
||
|
||
for ad_file, ad_cfg, ad_cfg_orig in ads:
|
||
LOG.info("Processing %s/%s: '%s' from [%s]...", count + 1, len(ads), ad_cfg.title, ad_file)
|
||
ad_cfg.update_content_hash()
|
||
if ad_cfg.content_hash != ad_cfg_orig["content_hash"]:
|
||
count += 1
|
||
ad_cfg_orig["content_hash"] = ad_cfg.content_hash
|
||
dicts.save_dict(ad_file, ad_cfg_orig)
|
||
|
||
LOG.info("############################################")
|
||
LOG.info("DONE: Updated [content_hash] in %s", pluralize("ad", count))
|
||
LOG.info("############################################")
|
||
|
||
|
||
#############################
|
||
# main entry point
|
||
#############################
|
||
|
||
|
||
def main(args:list[str]) -> None:
|
||
if "version" not in args:
|
||
print(
|
||
textwrap.dedent(rf"""
|
||
_ _ _ _ _ _
|
||
| | _| | ___(_)_ __ __ _ _ __ _______(_) __ _ ___ _ __ | |__ ___ | |_
|
||
| |/ / |/ _ \ | '_ \ / _` | '_ \|_ / _ \ |/ _` |/ _ \ '_ \ ____| '_ \ / _ \| __|
|
||
| <| | __/ | | | | (_| | | | |/ / __/ | (_| | __/ | | |____| |_) | (_) | |_
|
||
|_|\_\_|\___|_|_| |_|\__,_|_| |_/___\___|_|\__, |\___|_| |_| |_.__/ \___/ \__|
|
||
|___/
|
||
https://github.com/Second-Hand-Friends/kleinanzeigen-bot
|
||
Version: {__version__}
|
||
""")[1:],
|
||
flush = True,
|
||
) # [1:] removes the first empty blank line
|
||
|
||
loggers.configure_console_logging()
|
||
|
||
signal.signal(signal.SIGINT, error_handlers.on_sigint) # capture CTRL+C
|
||
|
||
# sys.excepthook = error_handlers.on_exception
|
||
# -> commented out because it causes PyInstaller to log "[PYI-28040:ERROR] Failed to execute script '__main__' due to unhandled exception!",
|
||
# despite the exceptions being properly processed by our custom error_handlers.on_exception callback.
|
||
# We now handle exceptions explicitly using a top-level try/except block.
|
||
|
||
atexit.register(loggers.flush_all_handlers)
|
||
|
||
try:
|
||
bot = KleinanzeigenBot()
|
||
atexit.register(bot.close_browser_session)
|
||
nodriver.loop().run_until_complete(bot.run(args)) # type: ignore[attr-defined]
|
||
except CaptchaEncountered as ex:
|
||
raise ex
|
||
except Exception:
|
||
error_handlers.on_exception(*sys.exc_info())
|
||
|
||
|
||
if __name__ == "__main__":
|
||
loggers.configure_console_logging()
|
||
LOG.error("Direct execution not supported. Use 'pdm run app'")
|
||
sys.exit(1)
|