feat: integrate XDG paths into bot core (#776)

## ℹ️ Description
Wire XDG path resolution into main bot components.

- Link to the related issue(s): N/A (new feature)
- Integrates installation mode detection into bot core

## 📋 Changes Summary

- Added `finalize_installation_mode()` method for mode detection
- UpdateChecker, AdExtractor now respect installation mode
- Dynamic browser profile defaults (resolved at runtime)
- German translations for installation mode messages
- Comprehensive tests for installation mode integration

**Part 2 of 3 for XDG support**
- Depends on: PR #775 (must be merged first)
- Will rebase on main after merge of previous PR

### ⚙️ Type of Change
- [x]  New feature (adds new functionality without breaking existing
usage)

##  Checklist
- [x] I have reviewed my changes to ensure they meet the project's
standards.
- [x] I have tested my changes and ensured that all tests pass (`pdm run
test`).
- [x] I have formatted the code (`pdm run format`).
- [x] I have verified that linting passes (`pdm run lint`).
- [x] I have updated documentation where necessary.

By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Support for portable and XDG (system-wide) installation modes with
automatic detection and interactive first-run setup.
* Config and paths standardized so app stores config, downloads, logs,
and browser profiles in appropriate locations per mode.
  * Update checker improved for more reliable version/commit detection.

* **Chores**
* Moved dependency to runtime: platformdirs added to main dependencies.

* **Tests**
* Added comprehensive tests for installation modes, path utilities, and
related behaviors.

<sub>✏️ Tip: You can customize this high-level summary in your review
settings.</sub>
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Jens
2026-01-23 07:36:10 +01:00
committed by GitHub
parent 7468ef03dc
commit e8cf10101d
7 changed files with 1268 additions and 1422 deletions

View File

@@ -19,7 +19,7 @@ from ._version import __version__
from .model.ad_model import MAX_DESCRIPTION_LENGTH, Ad, AdPartial, Contact, calculate_auto_price
from .model.config_model import Config
from .update_checker import UpdateChecker
from .utils import dicts, error_handlers, loggers, misc
from .utils import dicts, error_handlers, loggers, misc, xdg_paths
from .utils.exceptions import CaptchaEncountered
from .utils.files import abspath
from .utils.i18n import Locale, get_current_locale, pluralize, set_current_locale
@@ -59,16 +59,13 @@ def _repost_cycle_ready(ad_cfg:Ad, ad_file_relative:str) -> bool:
ad_file_relative,
max(remaining, 1), # Clamp to 1 to avoid showing "0 more reposts" when at threshold
total_reposts,
applied_cycles
applied_cycles,
)
return False
if eligible_cycles <= applied_cycles:
LOG.debug(
_("Auto price reduction already applied for [%s]: %s reductions match %s eligible reposts"),
ad_file_relative,
applied_cycles,
eligible_cycles
_("Auto price reduction already applied for [%s]: %s reductions match %s eligible reposts"), ad_file_relative, applied_cycles, eligible_cycles
)
return False
@@ -89,11 +86,7 @@ def _day_delay_elapsed(ad_cfg:Ad, ad_file_relative:str) -> bool:
reference = ad_cfg.updated_on or ad_cfg.created_on
if not reference:
LOG.info(
_("Auto price reduction delayed for [%s]: waiting %s days but publish timestamp missing"),
ad_file_relative,
delay_days
)
LOG.info(_("Auto price reduction delayed for [%s]: waiting %s days but publish timestamp missing"), ad_file_relative, delay_days)
return False
# Note: .days truncates to whole days (e.g., 1.9 days -> 1 day)
@@ -101,12 +94,7 @@ def _day_delay_elapsed(ad_cfg:Ad, ad_file_relative:str) -> bool:
# Both misc.now() and stored timestamps use UTC (via misc.now()), ensuring consistent calculations
elapsed_days = (misc.now() - reference).days
if elapsed_days < delay_days:
LOG.info(
_("Auto price reduction delayed for [%s]: waiting %s days (elapsed %s)"),
ad_file_relative,
delay_days,
elapsed_days
)
LOG.info(_("Auto price reduction delayed for [%s]: waiting %s days (elapsed %s)"), ad_file_relative, delay_days, elapsed_days)
return False
return True
@@ -132,11 +120,7 @@ def apply_auto_price_reduction(ad_cfg:Ad, _ad_cfg_orig:dict[str, Any], ad_file_r
return
if ad_cfg.auto_price_reduction.min_price is not None and ad_cfg.auto_price_reduction.min_price == base_price:
LOG.warning(
_("Auto price reduction is enabled for [%s] but min_price equals price (%s) - no reductions will occur."),
ad_file_relative,
base_price
)
LOG.warning(_("Auto price reduction is enabled for [%s] but min_price equals price (%s) - no reductions will occur."), ad_file_relative, base_price)
return
if not _repost_cycle_ready(ad_cfg, ad_file_relative):
@@ -148,11 +132,7 @@ def apply_auto_price_reduction(ad_cfg:Ad, _ad_cfg_orig:dict[str, Any], ad_file_r
applied_cycles = ad_cfg.price_reduction_count or 0
next_cycle = applied_cycles + 1
effective_price = calculate_auto_price(
base_price = base_price,
auto_price_reduction = ad_cfg.auto_price_reduction,
target_reduction_cycle = next_cycle
)
effective_price = calculate_auto_price(base_price=base_price, auto_price_reduction=ad_cfg.auto_price_reduction, target_reduction_cycle=next_cycle)
if effective_price is None:
return
@@ -160,28 +140,17 @@ def apply_auto_price_reduction(ad_cfg:Ad, _ad_cfg_orig:dict[str, Any], ad_file_r
if effective_price == base_price:
# Still increment counter so small fractional reductions can accumulate over multiple cycles
ad_cfg.price_reduction_count = next_cycle
LOG.info(
_("Auto price reduction kept price %s after attempting %s reduction cycles"),
effective_price,
next_cycle
)
LOG.info(_("Auto price reduction kept price %s after attempting %s reduction cycles"), effective_price, next_cycle)
return
LOG.info(
_("Auto price reduction applied: %s -> %s after %s reduction cycles"),
base_price,
effective_price,
next_cycle
)
LOG.info(_("Auto price reduction applied: %s -> %s after %s reduction cycles"), base_price, effective_price, next_cycle)
ad_cfg.price = effective_price
ad_cfg.price_reduction_count = next_cycle
# Note: price_reduction_count is persisted to ad_cfg_orig only after successful publish
class KleinanzeigenBot(WebScrapingMixin):
def __init__(self) -> None:
# workaround for https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/295
# see https://github.com/pyinstaller/pyinstaller/issues/7229#issuecomment-1309383026
os.environ["SSL_CERT_FILE"] = certifi.where()
@@ -192,12 +161,17 @@ class KleinanzeigenBot(WebScrapingMixin):
self.config: Config
self.config_file_path = abspath("config.yaml")
self.config_explicitly_provided = False
self.installation_mode: xdg_paths.InstallationMode | None = None
self.categories: dict[str, str] = {}
self.file_log: loggers.LogFileHandle | None = None
log_file_basename = is_frozen() and os.path.splitext(os.path.basename(sys.executable))[0] or self.__module__
self.log_file_path: str | None = abspath(f"{log_file_basename}.log")
self.log_file_basename = log_file_basename
self.log_file_explicitly_provided = False
self.command = "help"
self.ads_selector = "due"
@@ -209,11 +183,71 @@ class KleinanzeigenBot(WebScrapingMixin):
self.file_log = None
self.close_browser_session()
@property
def installation_mode_or_portable(self) -> xdg_paths.InstallationMode:
return self.installation_mode or "portable"
def get_version(self) -> str:
return __version__
def finalize_installation_mode(self) -> None:
"""
Finalize installation mode detection after CLI args are parsed.
Must be called after parse_args() to respect --config overrides.
"""
if self.command in {"help", "version"}:
return
# Check if config_file_path was already customized (by --config or tests)
default_portable_config = xdg_paths.get_config_file_path("portable").resolve()
config_path = Path(self.config_file_path).resolve() if self.config_file_path else None
config_was_customized = self.config_explicitly_provided or (config_path is not None and config_path != default_portable_config)
if config_was_customized and self.config_file_path:
# Config path was explicitly set - detect mode based on it
LOG.debug("Detecting installation mode from explicit config path: %s", self.config_file_path)
if config_path is not None and config_path == (Path.cwd() / "config.yaml").resolve():
# Explicit path points to CWD config
self.installation_mode = "portable"
LOG.debug("Explicit config is in CWD, using portable mode")
elif config_path is not None and config_path.is_relative_to(xdg_paths.get_xdg_base_dir("config").resolve()):
# Explicit path is within XDG config directory
self.installation_mode = "xdg"
LOG.debug("Explicit config is in XDG directory, using xdg mode")
else:
# Custom location - default to portable mode (all paths relative to config)
self.installation_mode = "portable"
LOG.debug("Explicit config is in custom location, defaulting to portable mode")
else:
# No explicit config - use auto-detection
LOG.debug("Detecting installation mode...")
self.installation_mode = xdg_paths.detect_installation_mode()
if self.installation_mode is None:
# First run - prompt user
LOG.info(_("First run detected, prompting user for installation mode"))
self.installation_mode = xdg_paths.prompt_installation_mode()
# Set config path based on detected mode
self.config_file_path = str(xdg_paths.get_config_file_path(self.installation_mode))
# Set log file path based on mode (unless explicitly overridden via --logfile)
using_default_portable_log = (
self.log_file_path is not None and Path(self.log_file_path).resolve() == xdg_paths.get_log_file_path(self.log_file_basename, "portable").resolve()
)
if not self.log_file_explicitly_provided and using_default_portable_log:
# Still using default portable path - update to match detected mode
self.log_file_path = str(xdg_paths.get_log_file_path(self.log_file_basename, self.installation_mode))
LOG.debug("Log file path: %s", self.log_file_path)
# Log installation mode and config location (INFO level for user visibility)
mode_display = "portable (current directory)" if self.installation_mode == "portable" else "system-wide (XDG directories)"
LOG.info(_("Installation mode: %s"), mode_display)
LOG.info(_("Config file: %s"), self.config_file_path)
async def run(self, args: list[str]) -> None:
self.parse_args(args)
self.finalize_installation_mode()
try:
match self.command:
case "help":
@@ -233,7 +267,7 @@ class KleinanzeigenBot(WebScrapingMixin):
self.configure_file_logging()
self.load_config()
# Check for updates on startup
checker = UpdateChecker(self.config)
checker = UpdateChecker(self.config, self.installation_mode_or_portable)
checker.check_for_updates()
self.load_ads()
LOG.info("############################################")
@@ -242,13 +276,13 @@ class KleinanzeigenBot(WebScrapingMixin):
case "update-check":
self.configure_file_logging()
self.load_config()
checker = UpdateChecker(self.config)
checker = UpdateChecker(self.config, self.installation_mode_or_portable)
checker.check_for_updates(skip_interval_check=True)
case "update-content-hash":
self.configure_file_logging()
self.load_config()
# Check for updates on startup
checker = UpdateChecker(self.config)
checker = UpdateChecker(self.config, self.installation_mode_or_portable)
checker.check_for_updates()
self.ads_selector = "all"
if ads := self.load_ads(exclude_ads_with_id=False):
@@ -261,12 +295,14 @@ class KleinanzeigenBot(WebScrapingMixin):
self.configure_file_logging()
self.load_config()
# Check for updates on startup
checker = UpdateChecker(self.config)
checker = UpdateChecker(self.config, self.installation_mode_or_portable)
checker.check_for_updates()
if not (self.ads_selector in {"all", "new", "due", "changed"} or
any(selector in self.ads_selector.split(",") for selector in ("all", "new", "due", "changed")) or
re.compile(r"\d+[,\d+]*").search(self.ads_selector)):
if not (
self.ads_selector in {"all", "new", "due", "changed"}
or any(selector in self.ads_selector.split(",") for selector in ("all", "new", "due", "changed"))
or re.compile(r"\d+[,\d+]*").search(self.ads_selector)
):
LOG.warning('You provided no ads selector. Defaulting to "due".')
self.ads_selector = "due"
@@ -282,10 +318,11 @@ class KleinanzeigenBot(WebScrapingMixin):
self.configure_file_logging()
self.load_config()
if not (self.ads_selector in {"all", "changed"} or
any(selector in self.ads_selector.split(",") for selector in
("all", "changed")) or
re.compile(r"\d+[,\d+]*").search(self.ads_selector)):
if not (
self.ads_selector in {"all", "changed"}
or any(selector in self.ads_selector.split(",") for selector in ("all", "changed"))
or re.compile(r"\d+[,\d+]*").search(self.ads_selector)
):
LOG.warning('You provided no ads selector. Defaulting to "changed".')
self.ads_selector = "changed"
@@ -301,7 +338,7 @@ class KleinanzeigenBot(WebScrapingMixin):
self.configure_file_logging()
self.load_config()
# Check for updates on startup
checker = UpdateChecker(self.config)
checker = UpdateChecker(self.config, self.installation_mode_or_portable)
checker.check_for_updates()
if ads := self.load_ads():
await self.create_browser_session()
@@ -315,7 +352,7 @@ class KleinanzeigenBot(WebScrapingMixin):
self.configure_file_logging()
self.load_config()
# Check for updates on startup
checker = UpdateChecker(self.config)
checker = UpdateChecker(self.config, self.installation_mode_or_portable)
checker.check_for_updates()
# Default to all ads if no selector provided
@@ -339,7 +376,7 @@ class KleinanzeigenBot(WebScrapingMixin):
self.ads_selector = "new"
self.load_config()
# Check for updates on startup
checker = UpdateChecker(self.config)
checker = UpdateChecker(self.config, self.installation_mode_or_portable)
checker.check_for_updates()
await self.create_browser_session()
await self.login()
@@ -360,7 +397,9 @@ class KleinanzeigenBot(WebScrapingMixin):
exe = "python -m kleinanzeigen_bot"
if get_current_locale().language == "de":
print(textwrap.dedent(f"""\
print(
textwrap.dedent(
f"""\
Verwendung: {colorama.Fore.LIGHTMAGENTA_EX}{exe} BEFEHL [OPTIONEN]{colorama.Style.RESET_ALL}
Befehle:
@@ -408,9 +447,13 @@ class KleinanzeigenBot(WebScrapingMixin):
--logfile=<PATH> - Pfad zur Protokolldatei (STANDARD: ./kleinanzeigen-bot.log)
--lang=en|de - Anzeigesprache (STANDARD: Systemsprache, wenn unterstützt, sonst Englisch)
-v, --verbose - Aktiviert detaillierte Ausgabe nur nützlich zur Fehlerbehebung
""".rstrip()))
""".rstrip()
)
)
else:
print(textwrap.dedent(f"""\
print(
textwrap.dedent(
f"""\
Usage: {colorama.Fore.LIGHTMAGENTA_EX}{exe} COMMAND [OPTIONS]{colorama.Style.RESET_ALL}
Commands:
@@ -456,20 +499,13 @@ class KleinanzeigenBot(WebScrapingMixin):
--logfile=<PATH> - path to the logfile (DEFAULT: ./kleinanzeigen-bot.log)
--lang=en|de - display language (STANDARD: system language if supported, otherwise English)
-v, --verbose - enables verbose output - only useful when troubleshooting issues
""".rstrip()))
""".rstrip()
)
)
def parse_args(self, args: list[str]) -> None:
try:
options, arguments = getopt.gnu_getopt(args[1:], "hv", [
"ads=",
"config=",
"force",
"help",
"keep-old",
"logfile=",
"lang=",
"verbose"
])
options, arguments = getopt.gnu_getopt(args[1:], "hv", ["ads=", "config=", "force", "help", "keep-old", "logfile=", "lang=", "verbose"])
except getopt.error as ex:
LOG.error(ex.msg)
LOG.error("Use --help to display available options.")
@@ -482,11 +518,13 @@ class KleinanzeigenBot(WebScrapingMixin):
sys.exit(0)
case "--config":
self.config_file_path = abspath(value)
self.config_explicitly_provided = True
case "--logfile":
if value:
self.log_file_path = abspath(value)
else:
self.log_file_path = None
self.log_file_explicitly_provided = True
case "--ads":
self.ads_selector = value.strip().lower()
case "--force":
@@ -538,7 +576,7 @@ class KleinanzeigenBot(WebScrapingMixin):
"# yaml-language-server: $schema="
"https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot"
"/refs/heads/main/schemas/config.schema.json"
)
),
)
def load_config(self) -> None:
@@ -591,7 +629,7 @@ class KleinanzeigenBot(WebScrapingMixin):
" -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days",
ad_file_relative,
ad_age.days,
ad_cfg.republication_interval
ad_cfg.republication_interval,
)
return False
@@ -710,9 +748,7 @@ class KleinanzeigenBot(WebScrapingMixin):
parent_category = ad_cfg.category.rpartition(">")[0].strip()
resolved_category_id = self.categories.get(parent_category)
if resolved_category_id:
LOG.warning(
"Category [%s] unknown. Using category [%s] with ID [%s] instead.",
ad_cfg.category, parent_category, resolved_category_id)
LOG.warning("Category [%s] unknown. Using category [%s] with ID [%s] instead.", ad_cfg.category, parent_category, resolved_category_id)
if resolved_category_id:
ad_cfg.category = resolved_category_id
@@ -733,11 +769,7 @@ class KleinanzeigenBot(WebScrapingMixin):
ensure(images or not ad_cfg.images, f"No images found for given file patterns {ad_cfg.images} at {ad_dir}")
ad_cfg.images = list(dict.fromkeys(images))
ads.append((
ad_file,
ad_cfg,
ad_cfg_orig
))
ads.append((ad_file, ad_cfg, ad_cfg_orig))
LOG.info("Loaded %s", pluralize("ad", ads))
return ads
@@ -748,8 +780,7 @@ class KleinanzeigenBot(WebScrapingMixin):
async def check_and_wait_for_captcha(self, *, is_login_page: bool = True) -> None:
try:
captcha_timeout = self._timeout("captcha_detection")
await self.web_find(By.CSS_SELECTOR,
"iframe[name^='a-'][src^='https://www.google.com/recaptcha/api2/anchor?']", timeout = captcha_timeout)
await self.web_find(By.CSS_SELECTOR, "iframe[name^='a-'][src^='https://www.google.com/recaptcha/api2/anchor?']", timeout=captcha_timeout)
if not is_login_page and self.config.captcha.auto_restart:
LOG.warning("Captcha recognized - auto-restart enabled, abort run...")
@@ -823,9 +854,9 @@ class KleinanzeigenBot(WebScrapingMixin):
gdpr_timeout = self._timeout("gdpr_prompt")
await self.web_find(By.ID, "gdpr-banner-accept", timeout=gdpr_timeout)
await self.web_click(By.ID, "gdpr-banner-cmp-button")
await self.web_click(By.XPATH,
"//div[@id='ConsentManagementPage']//*//button//*[contains(., 'Alle ablehnen und fortfahren')]",
timeout = gdpr_timeout)
await self.web_click(
By.XPATH, "//div[@id='ConsentManagementPage']//*//button//*[contains(., 'Alle ablehnen und fortfahren')]", timeout=gdpr_timeout
)
except TimeoutError:
# GDPR banner not shown within timeout.
pass
@@ -864,10 +895,9 @@ class KleinanzeigenBot(WebScrapingMixin):
async def delete_ads(self, ad_cfgs: list[tuple[str, Ad, dict[str, Any]]]) -> None:
count = 0
published_ads = json.loads(
(await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
for (ad_file, ad_cfg, _ad_cfg_orig) in ad_cfgs:
for ad_file, ad_cfg, _ad_cfg_orig in ad_cfgs:
count += 1
LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg.title, ad_file)
await self.delete_ad(ad_cfg, published_ads, delete_old_ads_by_title=self.config.publishing.delete_old_ads_by_title)
@@ -886,23 +916,20 @@ class KleinanzeigenBot(WebScrapingMixin):
ensure(csrf_token is not None, "Expected CSRF Token not found in HTML content!")
if delete_old_ads_by_title:
for published_ad in published_ads:
published_ad_id = int(published_ad.get("id", -1))
published_ad_title = published_ad.get("title", "")
if ad_cfg.id == published_ad_id or ad_cfg.title == published_ad_title:
LOG.info(" -> deleting %s '%s'...", published_ad_id, published_ad_title)
await self.web_request(
url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={published_ad_id}",
method = "POST",
headers = {"x-csrf-token": str(csrf_token)}
url=f"{self.root_url}/m-anzeigen-loeschen.json?ids={published_ad_id}", method="POST", headers={"x-csrf-token": str(csrf_token)}
)
elif ad_cfg.id:
await self.web_request(
url=f"{self.root_url}/m-anzeigen-loeschen.json?ids={ad_cfg.id}",
method="POST",
headers={"x-csrf-token": str(csrf_token)},
valid_response_codes = [200, 404]
valid_response_codes=[200, 404],
)
await self.web_sleep()
@@ -912,12 +939,11 @@ class KleinanzeigenBot(WebScrapingMixin):
async def extend_ads(self, ad_cfgs: list[tuple[str, Ad, dict[str, Any]]]) -> None:
"""Extends ads that are close to expiry."""
# Fetch currently published ads from API
published_ads = json.loads(
(await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
# Filter ads that need extension
ads_to_extend = []
for (ad_file, ad_cfg, ad_cfg_orig) in ad_cfgs:
for ad_file, ad_cfg, ad_cfg_orig in ad_cfgs:
# Skip unpublished ads (no ID)
if not ad_cfg.id:
LOG.info(_(" -> SKIPPED: ad '%s' is not published yet"), ad_cfg.title)
@@ -949,8 +975,7 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.info(_(" -> ad '%s' expires in %d days, will extend"), ad_cfg.title, days_until_expiry)
ads_to_extend.append((ad_file, ad_cfg, ad_cfg_orig, published_ad))
else:
LOG.info(_(" -> SKIPPED: ad '%s' expires in %d days (can only extend within 8 days)"),
ad_cfg.title, days_until_expiry)
LOG.info(_(" -> SKIPPED: ad '%s' expires in %d days (can only extend within 8 days)"), ad_cfg.title, days_until_expiry)
if not ads_to_extend:
LOG.info(_("No ads need extension at this time."))
@@ -1025,10 +1050,9 @@ class KleinanzeigenBot(WebScrapingMixin):
failed_count = 0
max_retries = 3
published_ads = json.loads(
(await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
for (ad_file, ad_cfg, ad_cfg_orig) in ad_cfgs:
for ad_file, ad_cfg, ad_cfg_orig in ad_cfgs:
LOG.info("Processing %s/%s: '%s' from [%s]...", count + 1, len(ad_cfgs), ad_cfg.title, ad_file)
if [x for x in published_ads if x["id"] == ad_cfg.id and x["state"] == "paused"]:
@@ -1072,8 +1096,9 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.info(_("DONE: (Re-)published %s"), pluralize("ad", count))
LOG.info("############################################")
async def publish_ad(self, ad_file:str, ad_cfg:Ad, ad_cfg_orig:dict[str, Any], published_ads:list[dict[str, Any]],
mode:AdUpdateStrategy = AdUpdateStrategy.REPLACE) -> None:
async def publish_ad(
self, ad_file: str, ad_cfg: Ad, ad_cfg_orig: dict[str, Any], published_ads: list[dict[str, Any]], mode: AdUpdateStrategy = AdUpdateStrategy.REPLACE
) -> None:
"""
@param ad_cfg: the effective ad config (i.e. with default values applied etc.)
@param ad_cfg_orig: the ad config as present in the YAML file
@@ -1186,8 +1211,7 @@ class KleinanzeigenBot(WebScrapingMixin):
#############################
# delete previous images because we don't know which have changed
#############################
img_items = await self.web_find_all(By.CSS_SELECTOR,
"ul#j-pictureupload-thumbnails > li:not(.is-placeholder)")
img_items = await self.web_find_all(By.CSS_SELECTOR, "ul#j-pictureupload-thumbnails > li:not(.is-placeholder)")
for element in img_items:
btn = await self.web_find(By.CSS_SELECTOR, "button.pictureupload-thumbnails-remove", parent=element)
await btn.click()
@@ -1354,8 +1378,10 @@ class KleinanzeigenBot(WebScrapingMixin):
await self.web_input(By.ID, "postad-phonenumber", contact.phone)
except TimeoutError:
LOG.warning(
_("Phone number field not present on page. This is expected for many private accounts; "
"commercial accounts may still support phone numbers.")
_(
"Phone number field not present on page. This is expected for many private accounts; "
"commercial accounts may still support phone numbers."
)
)
async def update_ads(self, ad_cfgs: list[tuple[str, Ad, dict[str, Any]]]) -> None:
@@ -1372,10 +1398,9 @@ class KleinanzeigenBot(WebScrapingMixin):
"""
count = 0
published_ads = json.loads(
(await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
for (ad_file, ad_cfg, ad_cfg_orig) in ad_cfgs:
for ad_file, ad_cfg, ad_cfg_orig in ad_cfgs:
ad = next((ad for ad in published_ads if ad["id"] == ad_cfg.id), None)
if not ad:
@@ -1511,8 +1536,7 @@ class KleinanzeigenBot(WebScrapingMixin):
# in some categories we need to go another dialog back
try:
await self.web_find(By.XPATH, '//dialog//button[contains(., "Andere Versandmethoden")]',
timeout = short_timeout)
await self.web_find(By.XPATH, '//dialog//button[contains(., "Andere Versandmethoden")]', timeout=short_timeout)
except TimeoutError:
await self.web_click(By.XPATH, '//dialog//button[contains(., "Zurück")]')
@@ -1527,12 +1551,10 @@ class KleinanzeigenBot(WebScrapingMixin):
else:
try:
# no options. only costs. Set custom shipping cost
await self.web_click(By.XPATH,
'//button//span[contains(., "Versandmethoden auswählen")]')
await self.web_click(By.XPATH, '//button//span[contains(., "Versandmethoden auswählen")]')
try:
# when "Andere Versandmethoden" is not available, then we are already on the individual page
await self.web_click(By.XPATH,
'//dialog//button[contains(., "Andere Versandmethoden")]')
await self.web_click(By.XPATH, '//dialog//button[contains(., "Andere Versandmethoden")]')
except TimeoutError:
# Dialog option not present; already on the individual shipping page.
pass
@@ -1540,16 +1562,15 @@ class KleinanzeigenBot(WebScrapingMixin):
try:
# only click on "Individueller Versand" when "IndividualShippingInput" is not available, otherwise its already checked
# (important for mode = UPDATE)
await self.web_find(By.XPATH,
'//input[contains(@placeholder, "Versandkosten (optional)")]',
timeout = short_timeout)
await self.web_find(By.XPATH, '//input[contains(@placeholder, "Versandkosten (optional)")]', timeout=short_timeout)
except TimeoutError:
# Input not visible yet; click the individual shipping option.
await self.web_click(By.XPATH, '//*[contains(@id, "INDIVIDUAL") and contains(@data-testid, "Individueller Versand")]')
if ad_cfg.shipping_costs is not None:
await self.web_input(By.XPATH, '//input[contains(@placeholder, "Versandkosten (optional)")]',
str.replace(str(ad_cfg.shipping_costs), ".", ","))
await self.web_input(
By.XPATH, '//input[contains(@placeholder, "Versandkosten (optional)")]', str.replace(str(ad_cfg.shipping_costs), ".", ",")
)
await self.web_click(By.XPATH, '//dialog//button[contains(., "Fertig")]')
except TimeoutError as ex:
LOG.debug(ex, exc_info=True)
@@ -1578,7 +1599,7 @@ class KleinanzeigenBot(WebScrapingMixin):
shipping_sizes, shipping_selector, shipping_packages = zip(*mapped_shipping_options, strict=False)
try:
shipping_size, = set(shipping_sizes)
(shipping_size,) = set(shipping_sizes)
except ValueError as ex:
raise ValueError("You can only specify shipping options for one package size!") from ex
@@ -1590,8 +1611,7 @@ class KleinanzeigenBot(WebScrapingMixin):
if shipping_size_radio_is_checked:
# in the same size category all options are preselected, so deselect the unwanted ones
unwanted_shipping_packages = [
package for size, selector, package in shipping_options_mapping.values()
if size == shipping_size and package not in shipping_packages
package for size, selector, package in shipping_options_mapping.values() if size == shipping_size and package not in shipping_packages
]
to_be_clicked_shipping_packages = unwanted_shipping_packages
else:
@@ -1606,10 +1626,7 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.debug("Using MODIFY mode logic for shipping options")
# get only correct size
selected_size_shipping_packages = [
package for size, selector, package in shipping_options_mapping.values()
if size == shipping_size
]
selected_size_shipping_packages = [package for size, selector, package in shipping_options_mapping.values() if size == shipping_size]
LOG.debug("Processing %d packages for size '%s'", len(selected_size_shipping_packages), shipping_size)
for shipping_package in selected_size_shipping_packages:
@@ -1618,10 +1635,7 @@ class KleinanzeigenBot(WebScrapingMixin):
shipping_package_checkbox_is_checked = hasattr(shipping_package_checkbox.attrs, "checked")
LOG.debug(
"Package '%s': checked=%s, wanted=%s",
shipping_package,
shipping_package_checkbox_is_checked,
shipping_package in shipping_packages
"Package '%s': checked=%s, wanted=%s", shipping_package, shipping_package_checkbox_is_checked, shipping_package in shipping_packages
)
# select wanted packages if not checked already
@@ -1636,9 +1650,7 @@ class KleinanzeigenBot(WebScrapingMixin):
await self.web_click(By.XPATH, shipping_package_xpath)
else:
for shipping_package in to_be_clicked_shipping_packages:
await self.web_click(
By.XPATH,
f'//dialog//input[contains(@data-testid, "{shipping_package}")]')
await self.web_click(By.XPATH, f'//dialog//input[contains(@data-testid, "{shipping_package}")]')
except TimeoutError as ex:
LOG.debug(ex, exc_info=True)
try:
@@ -1668,7 +1680,7 @@ class KleinanzeigenBot(WebScrapingMixin):
thumbnails = await self.web_find_all(
By.CSS_SELECTOR,
"ul#j-pictureupload-thumbnails > li:not(.is-placeholder)",
timeout = self._timeout("quick_dom") # Fast timeout for polling
timeout=self._timeout("quick_dom"), # Fast timeout for polling
)
current_count = len(thumbnails)
if current_count < expected_count:
@@ -1679,28 +1691,20 @@ class KleinanzeigenBot(WebScrapingMixin):
return False
try:
await self.web_await(
check_thumbnails_uploaded,
timeout = self._timeout("image_upload"),
timeout_error_message = _("Image upload timeout exceeded")
)
await self.web_await(check_thumbnails_uploaded, timeout=self._timeout("image_upload"), timeout_error_message=_("Image upload timeout exceeded"))
except TimeoutError as ex:
# Get current count for better error message
try:
thumbnails = await self.web_find_all(
By.CSS_SELECTOR,
"ul#j-pictureupload-thumbnails > li:not(.is-placeholder)",
timeout = self._timeout("quick_dom")
By.CSS_SELECTOR, "ul#j-pictureupload-thumbnails > li:not(.is-placeholder)", timeout=self._timeout("quick_dom")
)
current_count = len(thumbnails)
except TimeoutError:
# Still no thumbnails after full timeout
current_count = 0
raise TimeoutError(
_("Not all images were uploaded within timeout. Expected %(expected)d, found %(found)d thumbnails.") % {
"expected": expected_count,
"found": current_count
}
_("Not all images were uploaded within timeout. Expected %(expected)d, found %(found)d thumbnails.")
% {"expected": expected_count, "found": current_count}
) from ex
LOG.info(_(" -> all images uploaded successfully"))
@@ -1711,7 +1715,7 @@ class KleinanzeigenBot(WebScrapingMixin):
This downloads either all, only unsaved (new), or specific ads given by ID.
"""
ad_extractor = extract.AdExtractor(self.browser, self.config)
ad_extractor = extract.AdExtractor(self.browser, self.config, self.installation_mode_or_portable)
# use relevant download routine
if self.ads_selector in {"all", "new"}: # explore ads overview for these two modes
@@ -1738,10 +1742,7 @@ class KleinanzeigenBot(WebScrapingMixin):
for ad in ads:
saved_ad_id = ad[1].id
if saved_ad_id is None:
LOG.debug(
"Skipping saved ad without id (likely unpublished or manually created): %s",
ad[0]
)
LOG.debug("Skipping saved ad without id (likely unpublished or manually created): %s", ad[0])
continue
saved_ad_ids.append(int(saved_ad_id))
@@ -1797,19 +1798,19 @@ class KleinanzeigenBot(WebScrapingMixin):
# Get prefix with precedence
prefix = (
# 1. Direct ad-level prefix
ad_cfg.description_prefix if ad_cfg.description_prefix is not None
ad_cfg.description_prefix
if ad_cfg.description_prefix is not None
# 2. Global prefix from config
else self.config.ad_defaults.description_prefix
or "" # Default to empty string if all sources are None
else self.config.ad_defaults.description_prefix or "" # Default to empty string if all sources are None
)
# Get suffix with precedence
suffix = (
# 1. Direct ad-level suffix
ad_cfg.description_suffix if ad_cfg.description_suffix is not None
ad_cfg.description_suffix
if ad_cfg.description_suffix is not None
# 2. Global suffix from config
else self.config.ad_defaults.description_suffix
or "" # Default to empty string if all sources are None
else self.config.ad_defaults.description_suffix or "" # Default to empty string if all sources are None
)
# Combine the parts and replace @ with (at)
@@ -1819,16 +1820,17 @@ class KleinanzeigenBot(WebScrapingMixin):
final_description = description_text
# Validate length
ensure(len(final_description) <= MAX_DESCRIPTION_LENGTH,
f"Length of ad description including prefix and suffix exceeds {MAX_DESCRIPTION_LENGTH} chars. "
f"Description length: {len(final_description)} chars.")
ensure(
len(final_description) <= MAX_DESCRIPTION_LENGTH,
f"Length of ad description including prefix and suffix exceeds {MAX_DESCRIPTION_LENGTH} chars. Description length: {len(final_description)} chars.",
)
return final_description
def update_content_hashes(self, ads: list[tuple[str, Ad, dict[str, Any]]]) -> None:
count = 0
for (ad_file, ad_cfg, ad_cfg_orig) in ads:
for ad_file, ad_cfg, ad_cfg_orig in ads:
LOG.info("Processing %s/%s: '%s' from [%s]...", count + 1, len(ads), ad_cfg.title, ad_file)
ad_cfg.update_content_hash()
if ad_cfg.content_hash != ad_cfg_orig["content_hash"]:
@@ -1840,6 +1842,7 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.info("DONE: Updated [content_hash] in %s", pluralize("ad", count))
LOG.info("############################################")
#############################
# main entry point
#############################
@@ -1847,7 +1850,8 @@ class KleinanzeigenBot(WebScrapingMixin):
def main(args: list[str]) -> None:
if "version" not in args:
print(textwrap.dedent(rf"""
print(
textwrap.dedent(rf"""
_ _ _ _ _ _
| | _| | ___(_)_ __ __ _ _ __ _______(_) __ _ ___ _ __ | |__ ___ | |_
| |/ / |/ _ \ | '_ \ / _` | '_ \|_ / _ \ |/ _` |/ _ \ '_ \ ____| '_ \ / _ \| __|
@@ -1856,7 +1860,9 @@ def main(args:list[str]) -> None:
|___/
https://github.com/Second-Hand-Friends/kleinanzeigen-bot
Version: {__version__}
""")[1:], flush = True) # [1:] removes the first empty blank line
""")[1:],
flush=True,
) # [1:] removes the first empty blank line
loggers.configure_console_logging()

View File

@@ -15,7 +15,7 @@ from kleinanzeigen_bot.model.ad_model import ContactPartial
from .model.ad_model import AdPartial
from .model.config_model import Config
from .utils import dicts, files, i18n, loggers, misc, reflect
from .utils import dicts, files, i18n, loggers, misc, reflect, xdg_paths
from .utils.web_scraping_mixin import Browser, By, Element, WebScrapingMixin
__all__ = [
@@ -33,10 +33,13 @@ class AdExtractor(WebScrapingMixin):
Wrapper class for ad extraction that uses an active bot´s browser session to extract specific elements from an ad page.
"""
def __init__(self, browser:Browser, config:Config) -> None:
def __init__(self, browser:Browser, config:Config, installation_mode:xdg_paths.InstallationMode = "portable") -> None:
super().__init__()
self.browser = browser
self.config:Config = config
if installation_mode not in {"portable", "xdg"}:
raise ValueError(f"Unsupported installation mode: {installation_mode}")
self.installation_mode:xdg_paths.InstallationMode = installation_mode
async def download_ad(self, ad_id:int) -> None:
"""
@@ -47,26 +50,19 @@ class AdExtractor(WebScrapingMixin):
"""
# create sub-directory for ad(s) to download (if necessary):
relative_directory = Path("downloaded-ads")
# make sure configured base directory exists (using exist_ok=True to avoid TOCTOU race)
await asyncio.get_running_loop().run_in_executor(None, lambda: relative_directory.mkdir(exist_ok = True)) # noqa: ASYNC240
LOG.info("Ensured ads directory exists at ./%s.", relative_directory)
download_dir = xdg_paths.get_downloaded_ads_path(self.installation_mode)
LOG.info(_("Using download directory: %s"), download_dir)
# Note: xdg_paths.get_downloaded_ads_path() already creates the directory
# Extract ad info and determine final directory path
ad_cfg, final_dir = await self._extract_ad_page_info_with_directory_handling(
relative_directory, ad_id
)
ad_cfg, final_dir = await self._extract_ad_page_info_with_directory_handling(download_dir, ad_id)
# Save the ad configuration file (offload to executor to avoid blocking the event loop)
ad_file_path = str(Path(final_dir) / f"ad_{ad_id}.yaml")
header_string = (
"# yaml-language-server: $schema="
"https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/ad.schema.json"
)
await asyncio.get_running_loop().run_in_executor(
None,
lambda: dicts.save_dict(ad_file_path, ad_cfg.model_dump(), header = header_string)
"# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/ad.schema.json"
)
await asyncio.get_running_loop().run_in_executor(None, lambda: dicts.save_dict(ad_file_path, ad_cfg.model_dump(), header = header_string))
@staticmethod
def _download_and_save_image_sync(url:str, directory:str, filename_prefix:str, img_nr:int) -> str | None:
@@ -114,14 +110,7 @@ class AdExtractor(WebScrapingMixin):
if current_img_url is None:
continue
img_path = await loop.run_in_executor(
None,
self._download_and_save_image_sync,
str(current_img_url),
directory,
img_fn_prefix,
img_nr
)
img_path = await loop.run_in_executor(None, self._download_and_save_image_sync, str(current_img_url), directory, img_fn_prefix, img_nr)
if img_path:
dl_counter += 1
@@ -217,10 +206,7 @@ class AdExtractor(WebScrapingMixin):
# Extract references using the CORRECTED selector
try:
page_refs:list[str] = [
str((await self.web_find(By.CSS_SELECTOR, "div h3 a.text-onSurface", parent = li)).attrs["href"])
for li in list_items
]
page_refs:list[str] = [str((await self.web_find(By.CSS_SELECTOR, "div h3 a.text-onSurface", parent = li)).attrs["href"]) for li in list_items]
refs.extend(page_refs)
LOG.info("Successfully extracted %s refs from page %s.", len(page_refs), current_page)
except Exception as e:
@@ -361,8 +347,7 @@ class AdExtractor(WebScrapingMixin):
info["id"] = ad_id
try: # try different locations known for creation date element
creation_date = await self.web_text(By.XPATH,
"/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/div[1]/span")
creation_date = await self.web_text(By.XPATH, "/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/div[1]/span")
except TimeoutError:
creation_date = await self.web_text(By.CSS_SELECTOR, "#viewad-extra-info > div:nth-child(1) > span:nth-child(2)")
@@ -380,9 +365,7 @@ class AdExtractor(WebScrapingMixin):
return ad_cfg
async def _extract_ad_page_info_with_directory_handling(
self, relative_directory:Path, ad_id:int
) -> tuple[AdPartial, Path]:
async def _extract_ad_page_info_with_directory_handling(self, relative_directory:Path, ad_id:int) -> tuple[AdPartial, Path]:
"""
Extracts ad information and handles directory creation/renaming.
@@ -415,8 +398,7 @@ class AdExtractor(WebScrapingMixin):
if await files.exists(temp_dir):
if self.config.download.rename_existing_folders:
# Rename the old folder to the new name with title
LOG.info("Renaming folder from %s to %s for ad %s...",
temp_dir.name, final_dir.name, ad_id)
LOG.info("Renaming folder from %s to %s for ad %s...", temp_dir.name, final_dir.name, ad_id)
LOG.debug("Renaming: %s -> %s", temp_dir, final_dir)
await loop.run_in_executor(None, temp_dir.rename, final_dir)
else:
@@ -471,14 +453,8 @@ class AdExtractor(WebScrapingMixin):
category_first_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(2)", parent = category_line)
category_second_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line)
except TimeoutError as exc:
LOG.error(
"Legacy breadcrumb selectors not found within %.1f seconds (collected ids: %s)",
fallback_timeout,
category_ids
)
raise TimeoutError(
_("Unable to locate breadcrumb fallback selectors within %(seconds).1f seconds.") % {"seconds": fallback_timeout}
) from exc
LOG.error("Legacy breadcrumb selectors not found within %.1f seconds (collected ids: %s)", fallback_timeout, category_ids)
raise TimeoutError(_("Unable to locate breadcrumb fallback selectors within %(seconds).1f seconds.") % {"seconds": fallback_timeout}) from exc
href_first:str = str(category_first_part.attrs["href"])
href_second:str = str(category_second_part.attrs["href"])
cat_num_first_raw = href_first.rsplit("/", maxsplit = 1)[-1]
@@ -553,8 +529,8 @@ class AdExtractor(WebScrapingMixin):
# reading shipping option from kleinanzeigen
# and find the right one by price
shipping_costs = json.loads(
(await self.web_request("https://gateway.kleinanzeigen.de/postad/api/v1/shipping-options?posterType=PRIVATE"))
["content"])["data"]["shippingOptionsResponse"]["options"]
(await self.web_request("https://gateway.kleinanzeigen.de/postad/api/v1/shipping-options?posterType=PRIVATE"))["content"]
)["data"]["shippingOptionsResponse"]["options"]
# map to internal shipping identifiers used by kleinanzeigen-bot
shipping_option_mapping = {
@@ -566,7 +542,7 @@ class AdExtractor(WebScrapingMixin):
"HERMES_001": "Hermes_Päckchen",
"HERMES_002": "Hermes_S",
"HERMES_003": "Hermes_M",
"HERMES_004": "Hermes_L"
"HERMES_004": "Hermes_L",
}
# Convert Euro to cents and round to nearest integer

View File

@@ -19,34 +19,16 @@ _MAX_PERCENTAGE:Final[int] = 100
class AutoPriceReductionConfig(ContextualModel):
enabled:bool = Field(
default = False,
description = "automatically lower the price of reposted ads"
)
enabled: bool = Field(default=False, description="automatically lower the price of reposted ads")
strategy: Literal["FIXED", "PERCENTAGE"] | None = Field(
default = None,
description = "PERCENTAGE reduces by a percentage of the previous price, FIXED reduces by a fixed amount"
default=None, description="PERCENTAGE reduces by a percentage of the previous price, FIXED reduces by a fixed amount"
)
amount: float | None = Field(
default = None,
gt = 0,
description = "magnitude of the reduction; interpreted as percent for PERCENTAGE or currency units for FIXED"
)
min_price:float | None = Field(
default = None,
ge = 0,
description = "required when enabled is true; minimum price floor (use 0 for no lower bound)"
)
delay_reposts:int = Field(
default = 0,
ge = 0,
description = "number of reposts to wait before applying the first automatic price reduction"
)
delay_days:int = Field(
default = 0,
ge = 0,
description = "number of days to wait after publication before applying automatic price reductions"
default=None, gt=0, description="magnitude of the reduction; interpreted as percent for PERCENTAGE or currency units for FIXED"
)
min_price: float | None = Field(default=None, ge=0, description="required when enabled is true; minimum price floor (use 0 for no lower bound)")
delay_reposts: int = Field(default=0, ge=0, description="number of reposts to wait before applying the first automatic price reduction")
delay_days: int = Field(default=0, ge=0, description="number of days to wait after publication before applying automatic price reductions")
@model_validator(mode="after")
def _validate_config(self) -> "AutoPriceReductionConfig":
@@ -67,9 +49,7 @@ class ContactDefaults(ContextualModel):
street: str | None = None
zipcode: int | str | None = None
location: str | None = Field(
default = None,
description = "city or locality of the listing (can include multiple districts)",
examples = ["Sample Town - District One"]
default=None, description="city or locality of the listing (can include multiple districts)", examples=["Sample Town - District One"]
)
phone: str | None = None
@@ -87,10 +67,7 @@ class AdDefaults(ContextualModel):
description_prefix: str | None = Field(default=None, description="prefix for the ad description")
description_suffix: str | None = Field(default=None, description=" suffix for the ad description")
price_type: Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"] = "NEGOTIABLE"
auto_price_reduction:AutoPriceReductionConfig = Field(
default_factory = AutoPriceReductionConfig,
description = "automatic price reduction configuration"
)
auto_price_reduction: AutoPriceReductionConfig = Field(default_factory=AutoPriceReductionConfig, description="automatic price reduction configuration")
shipping_type: Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"] = "SHIPPING"
sell_directly: bool = Field(default=False, description="requires shipping_type SHIPPING to take effect")
images: list[str] | None = Field(default=None)
@@ -114,43 +91,29 @@ class AdDefaults(ContextualModel):
class DownloadConfig(ContextualModel):
include_all_matching_shipping_options:bool = Field(
default = False,
description = "if true, all shipping options matching the package size will be included"
)
excluded_shipping_options:list[str] = Field(
default_factory = list,
description = "list of shipping options to exclude, e.g. ['DHL_2', 'DHL_5']"
)
folder_name_max_length:int = Field(
default = 100,
ge = 10,
le = 255,
description = "maximum length for folder names when downloading ads (default: 100)"
)
rename_existing_folders:bool = Field(
default = False,
description = "if true, rename existing folders without titles to include titles (default: false)"
)
include_all_matching_shipping_options: bool = Field(default=False, description="if true, all shipping options matching the package size will be included")
excluded_shipping_options: list[str] = Field(default_factory=list, description="list of shipping options to exclude, e.g. ['DHL_2', 'DHL_5']")
folder_name_max_length: int = Field(default=100, ge=10, le=255, description="maximum length for folder names when downloading ads (default: 100)")
rename_existing_folders: bool = Field(default=False, description="if true, rename existing folders without titles to include titles (default: false)")
class BrowserConfig(ContextualModel):
arguments: list[str] = Field(
default_factory = lambda: ["--user-data-dir=.temp/browser-profile"],
description = "See https://peter.sh/experiments/chromium-command-line-switches/"
)
binary_location:str | None = Field(
default = None,
description = "path to custom browser executable, if not specified will be looked up on PATH"
)
extensions:list[str] = Field(
default_factory=list,
description = "a list of .crx extension files to be loaded"
description=(
"See https://peter.sh/experiments/chromium-command-line-switches/. "
"Browser profile path is auto-configured based on installation mode (portable/XDG)."
),
)
binary_location: str | None = Field(default=None, description="path to custom browser executable, if not specified will be looked up on PATH")
extensions: list[str] = Field(default_factory=list, description="a list of .crx extension files to be loaded")
use_private_window: bool = True
user_data_dir: str | None = Field(
default = ".temp/browser-profile",
description = "See https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md"
default=None,
description=(
"See https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md. "
"If not specified, defaults to XDG cache directory in XDG mode or .temp/browser-profile in portable mode."
),
)
profile_name: str | None = None
@@ -171,11 +134,7 @@ class CaptchaConfig(ContextualModel):
class TimeoutConfig(ContextualModel):
multiplier:float = Field(
default = 1.0,
ge = 0.1,
description = "Global multiplier applied to all timeout values."
)
multiplier: float = Field(default=1.0, ge=0.1, description="Global multiplier applied to all timeout values.")
default: float = Field(default=5.0, ge=0.0, description="Baseline timeout for DOM interactions.")
page_load: float = Field(default=15.0, ge=1.0, description="Page load timeout for web_open.")
captcha_detection: float = Field(default=2.0, ge=0.1, description="Timeout for captcha iframe detection.")
@@ -237,15 +196,14 @@ class Config(ContextualModel):
description="""
glob (wildcard) patterns to select ad configuration files
if relative paths are specified, then they are relative to this configuration file
"""
""",
) # type: ignore[call-overload]
ad_defaults:AdDefaults = Field(
default_factory = AdDefaults,
description = "Default values for ads, can be overwritten in each ad configuration file"
)
ad_defaults: AdDefaults = Field(default_factory=AdDefaults, description="Default values for ads, can be overwritten in each ad configuration file")
categories:dict[str, str] = Field(default_factory = dict, description = """
categories: dict[str, str] = Field(
default_factory=dict,
description="""
additional name to category ID mappings, see default list at
https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml
@@ -253,7 +211,8 @@ Example:
categories:
Elektronik > Notebooks: 161/278
Jobs > Praktika: 102/125
""")
""",
)
download: DownloadConfig = Field(default_factory=DownloadConfig)
publishing: PublishingConfig = Field(default_factory=PublishingConfig)
@@ -264,6 +223,4 @@ Example:
timeouts: TimeoutConfig = Field(default_factory=TimeoutConfig, description="Centralized timeout configuration.")
def with_values(self, values: dict[str, Any]) -> Config:
return Config.model_validate(
dicts.apply_defaults(copy.deepcopy(values), defaults = self.model_dump())
)
return Config.model_validate(dicts.apply_defaults(copy.deepcopy(values), defaults=self.model_dump()))

View File

@@ -112,6 +112,11 @@ kleinanzeigen_bot/__init__.py:
" -> FAILED: Timeout while extending ad '%s': %s": " -> FEHLER: Zeitüberschreitung beim Verlängern der Anzeige '%s': %s"
" -> FAILED: Could not persist extension for ad '%s': %s": " -> FEHLER: Verlängerung der Anzeige '%s' konnte nicht gespeichert werden: %s"
finalize_installation_mode:
"Config file: %s": "Konfigurationsdatei: %s"
"First run detected, prompting user for installation mode": "Erster Start erkannt, frage Benutzer nach Installationsmodus"
"Installation mode: %s": "Installationsmodus: %s"
publish_ads:
"Processing %s/%s: '%s' from [%s]...": "Verarbeite %s/%s: '%s' von [%s]..."
"Skipping because ad is reserved": "Überspringen, da Anzeige reserviert ist"
@@ -240,7 +245,7 @@ kleinanzeigen_bot/__init__.py:
kleinanzeigen_bot/extract.py:
#################################################
download_ad:
"Ensured ads directory exists at ./%s.": "Verzeichnis [%s] für Anzeige vorhanden."
"Using download directory: %s": "Verwende Download-Verzeichnis: %s"
_download_and_save_image_sync:
"Failed to download image %s: %s": "Fehler beim Herunterladen des Bildes %s: %s"

View File

@@ -7,7 +7,6 @@ from __future__ import annotations
import logging
from datetime import datetime
from gettext import gettext as _
from pathlib import Path
from typing import TYPE_CHECKING
import colorama
@@ -22,6 +21,7 @@ except ImportError:
__version__ = "unknown"
from kleinanzeigen_bot.model.update_check_state import UpdateCheckState
from kleinanzeigen_bot.utils import xdg_paths
logger = logging.getLogger(__name__)
@@ -31,15 +31,16 @@ colorama.init()
class UpdateChecker:
"""Checks for updates to the bot."""
def __init__(self, config:"Config") -> None:
def __init__(self, config: "Config", installation_mode: str | xdg_paths.InstallationMode = "portable") -> None:
"""Initialize the update checker.
Args:
config: The bot configuration.
installation_mode: Installation mode (portable/xdg).
"""
self.config = config
self.state_file = Path(".temp") / "update_check_state.json"
self.state_file.parent.mkdir(exist_ok = True) # Ensure .temp directory exists
self.state_file = xdg_paths.get_update_check_state_path(installation_mode)
# Note: xdg_paths handles directory creation
self.state = UpdateCheckState.load(self.state_file)
def get_local_version(self) -> str | None:
@@ -79,7 +80,7 @@ class UpdateChecker:
try:
response = requests.get(
f"https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/commits/{commitish}",
timeout = self._request_timeout()
timeout=self._request_timeout(),
)
response.raise_for_status()
data = response.json()
@@ -146,24 +147,16 @@ class UpdateChecker:
try:
if self.config.update_check.channel == "latest":
# Use /releases/latest endpoint for stable releases
response = requests.get(
"https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/releases/latest",
timeout = self._request_timeout()
)
response = requests.get("https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/releases/latest", timeout=self._request_timeout())
response.raise_for_status()
release = response.json()
# Defensive: ensure it's not a prerelease
if release.get("prerelease", False):
logger.warning(
_("Latest release from GitHub is a prerelease, but 'latest' channel expects a stable release.")
)
logger.warning(_("Latest release from GitHub is a prerelease, but 'latest' channel expects a stable release."))
return
elif self.config.update_check.channel == "preview":
# Use /releases endpoint and select the most recent prerelease
response = requests.get(
"https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/releases",
timeout = self._request_timeout()
)
response = requests.get("https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/releases", timeout=self._request_timeout())
response.raise_for_status()
releases = response.json()
# Find the most recent prerelease
@@ -199,7 +192,7 @@ class UpdateChecker:
_("You are on the latest version: %s (compared to %s in channel %s)"),
local_version,
self._get_short_commit_hash(release_commit),
self.config.update_check.channel
self.config.update_check.channel,
)
self.state.update_last_check()
self.state.save(self.state_file)
@@ -212,7 +205,7 @@ class UpdateChecker:
release_commit_date.strftime("%Y-%m-%d %H:%M:%S"),
local_version,
local_commit_date.strftime("%Y-%m-%d %H:%M:%S"),
self.config.update_check.channel
self.config.update_check.channel,
)
if release.get("body"):
logger.info(_("Release notes:\n%s"), release["body"])
@@ -227,7 +220,7 @@ class UpdateChecker:
self._get_short_commit_hash(local_commit),
local_commit_date.strftime("%Y-%m-%d %H:%M:%S"),
self._get_short_commit_hash(release_commit),
release_commit_date.strftime("%Y-%m-%d %H:%M:%S")
release_commit_date.strftime("%Y-%m-%d %H:%M:%S"),
)
# Update the last check time

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff