feat: integrate XDG paths into bot core (#776)

## ℹ️ Description
Wire XDG path resolution into main bot components.

- Link to the related issue(s): N/A (new feature)
- Integrates installation mode detection into bot core

## 📋 Changes Summary

- Added `finalize_installation_mode()` method for mode detection
- UpdateChecker, AdExtractor now respect installation mode
- Dynamic browser profile defaults (resolved at runtime)
- German translations for installation mode messages
- Comprehensive tests for installation mode integration

**Part 2 of 3 for XDG support**
- Depends on: PR #775 (must be merged first)
- Will rebase on main after merge of previous PR

### ⚙️ Type of Change
- [x]  New feature (adds new functionality without breaking existing
usage)

##  Checklist
- [x] I have reviewed my changes to ensure they meet the project's
standards.
- [x] I have tested my changes and ensured that all tests pass (`pdm run
test`).
- [x] I have formatted the code (`pdm run format`).
- [x] I have verified that linting passes (`pdm run lint`).
- [x] I have updated documentation where necessary.

By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Support for portable and XDG (system-wide) installation modes with
automatic detection and interactive first-run setup.
* Config and paths standardized so app stores config, downloads, logs,
and browser profiles in appropriate locations per mode.
  * Update checker improved for more reliable version/commit detection.

* **Chores**
* Moved dependency to runtime: platformdirs added to main dependencies.

* **Tests**
* Added comprehensive tests for installation modes, path utilities, and
related behaviors.

<sub>✏️ Tip: You can customize this high-level summary in your review
settings.</sub>
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Jens
2026-01-23 07:36:10 +01:00
committed by GitHub
parent 7468ef03dc
commit e8cf10101d
7 changed files with 1268 additions and 1422 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -15,7 +15,7 @@ from kleinanzeigen_bot.model.ad_model import ContactPartial
from .model.ad_model import AdPartial from .model.ad_model import AdPartial
from .model.config_model import Config from .model.config_model import Config
from .utils import dicts, files, i18n, loggers, misc, reflect from .utils import dicts, files, i18n, loggers, misc, reflect, xdg_paths
from .utils.web_scraping_mixin import Browser, By, Element, WebScrapingMixin from .utils.web_scraping_mixin import Browser, By, Element, WebScrapingMixin
__all__ = [ __all__ = [
@@ -33,10 +33,13 @@ class AdExtractor(WebScrapingMixin):
Wrapper class for ad extraction that uses an active bot´s browser session to extract specific elements from an ad page. Wrapper class for ad extraction that uses an active bot´s browser session to extract specific elements from an ad page.
""" """
def __init__(self, browser:Browser, config:Config) -> None: def __init__(self, browser:Browser, config:Config, installation_mode:xdg_paths.InstallationMode = "portable") -> None:
super().__init__() super().__init__()
self.browser = browser self.browser = browser
self.config:Config = config self.config:Config = config
if installation_mode not in {"portable", "xdg"}:
raise ValueError(f"Unsupported installation mode: {installation_mode}")
self.installation_mode:xdg_paths.InstallationMode = installation_mode
async def download_ad(self, ad_id:int) -> None: async def download_ad(self, ad_id:int) -> None:
""" """
@@ -47,26 +50,19 @@ class AdExtractor(WebScrapingMixin):
""" """
# create sub-directory for ad(s) to download (if necessary): # create sub-directory for ad(s) to download (if necessary):
relative_directory = Path("downloaded-ads") download_dir = xdg_paths.get_downloaded_ads_path(self.installation_mode)
# make sure configured base directory exists (using exist_ok=True to avoid TOCTOU race) LOG.info(_("Using download directory: %s"), download_dir)
await asyncio.get_running_loop().run_in_executor(None, lambda: relative_directory.mkdir(exist_ok = True)) # noqa: ASYNC240 # Note: xdg_paths.get_downloaded_ads_path() already creates the directory
LOG.info("Ensured ads directory exists at ./%s.", relative_directory)
# Extract ad info and determine final directory path # Extract ad info and determine final directory path
ad_cfg, final_dir = await self._extract_ad_page_info_with_directory_handling( ad_cfg, final_dir = await self._extract_ad_page_info_with_directory_handling(download_dir, ad_id)
relative_directory, ad_id
)
# Save the ad configuration file (offload to executor to avoid blocking the event loop) # Save the ad configuration file (offload to executor to avoid blocking the event loop)
ad_file_path = str(Path(final_dir) / f"ad_{ad_id}.yaml") ad_file_path = str(Path(final_dir) / f"ad_{ad_id}.yaml")
header_string = ( header_string = (
"# yaml-language-server: $schema=" "# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/ad.schema.json"
"https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/ad.schema.json"
)
await asyncio.get_running_loop().run_in_executor(
None,
lambda: dicts.save_dict(ad_file_path, ad_cfg.model_dump(), header = header_string)
) )
await asyncio.get_running_loop().run_in_executor(None, lambda: dicts.save_dict(ad_file_path, ad_cfg.model_dump(), header = header_string))
@staticmethod @staticmethod
def _download_and_save_image_sync(url:str, directory:str, filename_prefix:str, img_nr:int) -> str | None: def _download_and_save_image_sync(url:str, directory:str, filename_prefix:str, img_nr:int) -> str | None:
@@ -114,14 +110,7 @@ class AdExtractor(WebScrapingMixin):
if current_img_url is None: if current_img_url is None:
continue continue
img_path = await loop.run_in_executor( img_path = await loop.run_in_executor(None, self._download_and_save_image_sync, str(current_img_url), directory, img_fn_prefix, img_nr)
None,
self._download_and_save_image_sync,
str(current_img_url),
directory,
img_fn_prefix,
img_nr
)
if img_path: if img_path:
dl_counter += 1 dl_counter += 1
@@ -217,10 +206,7 @@ class AdExtractor(WebScrapingMixin):
# Extract references using the CORRECTED selector # Extract references using the CORRECTED selector
try: try:
page_refs:list[str] = [ page_refs:list[str] = [str((await self.web_find(By.CSS_SELECTOR, "div h3 a.text-onSurface", parent = li)).attrs["href"]) for li in list_items]
str((await self.web_find(By.CSS_SELECTOR, "div h3 a.text-onSurface", parent = li)).attrs["href"])
for li in list_items
]
refs.extend(page_refs) refs.extend(page_refs)
LOG.info("Successfully extracted %s refs from page %s.", len(page_refs), current_page) LOG.info("Successfully extracted %s refs from page %s.", len(page_refs), current_page)
except Exception as e: except Exception as e:
@@ -344,7 +330,7 @@ class AdExtractor(WebScrapingMixin):
if prefix and description_text.startswith(prefix.strip()): if prefix and description_text.startswith(prefix.strip()):
description_text = description_text[len(prefix.strip()):] description_text = description_text[len(prefix.strip()):]
if suffix and description_text.endswith(suffix.strip()): if suffix and description_text.endswith(suffix.strip()):
description_text = description_text[:-len(suffix.strip())] description_text = description_text[: -len(suffix.strip())]
info["description"] = description_text.strip() info["description"] = description_text.strip()
@@ -361,8 +347,7 @@ class AdExtractor(WebScrapingMixin):
info["id"] = ad_id info["id"] = ad_id
try: # try different locations known for creation date element try: # try different locations known for creation date element
creation_date = await self.web_text(By.XPATH, creation_date = await self.web_text(By.XPATH, "/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/div[1]/span")
"/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/div[1]/span")
except TimeoutError: except TimeoutError:
creation_date = await self.web_text(By.CSS_SELECTOR, "#viewad-extra-info > div:nth-child(1) > span:nth-child(2)") creation_date = await self.web_text(By.CSS_SELECTOR, "#viewad-extra-info > div:nth-child(1) > span:nth-child(2)")
@@ -380,9 +365,7 @@ class AdExtractor(WebScrapingMixin):
return ad_cfg return ad_cfg
async def _extract_ad_page_info_with_directory_handling( async def _extract_ad_page_info_with_directory_handling(self, relative_directory:Path, ad_id:int) -> tuple[AdPartial, Path]:
self, relative_directory:Path, ad_id:int
) -> tuple[AdPartial, Path]:
""" """
Extracts ad information and handles directory creation/renaming. Extracts ad information and handles directory creation/renaming.
@@ -415,8 +398,7 @@ class AdExtractor(WebScrapingMixin):
if await files.exists(temp_dir): if await files.exists(temp_dir):
if self.config.download.rename_existing_folders: if self.config.download.rename_existing_folders:
# Rename the old folder to the new name with title # Rename the old folder to the new name with title
LOG.info("Renaming folder from %s to %s for ad %s...", LOG.info("Renaming folder from %s to %s for ad %s...", temp_dir.name, final_dir.name, ad_id)
temp_dir.name, final_dir.name, ad_id)
LOG.debug("Renaming: %s -> %s", temp_dir, final_dir) LOG.debug("Renaming: %s -> %s", temp_dir, final_dir)
await loop.run_in_executor(None, temp_dir.rename, final_dir) await loop.run_in_executor(None, temp_dir.rename, final_dir)
else: else:
@@ -471,14 +453,8 @@ class AdExtractor(WebScrapingMixin):
category_first_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(2)", parent = category_line) category_first_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(2)", parent = category_line)
category_second_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line) category_second_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line)
except TimeoutError as exc: except TimeoutError as exc:
LOG.error( LOG.error("Legacy breadcrumb selectors not found within %.1f seconds (collected ids: %s)", fallback_timeout, category_ids)
"Legacy breadcrumb selectors not found within %.1f seconds (collected ids: %s)", raise TimeoutError(_("Unable to locate breadcrumb fallback selectors within %(seconds).1f seconds.") % {"seconds": fallback_timeout}) from exc
fallback_timeout,
category_ids
)
raise TimeoutError(
_("Unable to locate breadcrumb fallback selectors within %(seconds).1f seconds.") % {"seconds": fallback_timeout}
) from exc
href_first:str = str(category_first_part.attrs["href"]) href_first:str = str(category_first_part.attrs["href"])
href_second:str = str(category_second_part.attrs["href"]) href_second:str = str(category_second_part.attrs["href"])
cat_num_first_raw = href_first.rsplit("/", maxsplit = 1)[-1] cat_num_first_raw = href_first.rsplit("/", maxsplit = 1)[-1]
@@ -553,8 +529,8 @@ class AdExtractor(WebScrapingMixin):
# reading shipping option from kleinanzeigen # reading shipping option from kleinanzeigen
# and find the right one by price # and find the right one by price
shipping_costs = json.loads( shipping_costs = json.loads(
(await self.web_request("https://gateway.kleinanzeigen.de/postad/api/v1/shipping-options?posterType=PRIVATE")) (await self.web_request("https://gateway.kleinanzeigen.de/postad/api/v1/shipping-options?posterType=PRIVATE"))["content"]
["content"])["data"]["shippingOptionsResponse"]["options"] )["data"]["shippingOptionsResponse"]["options"]
# map to internal shipping identifiers used by kleinanzeigen-bot # map to internal shipping identifiers used by kleinanzeigen-bot
shipping_option_mapping = { shipping_option_mapping = {
@@ -566,7 +542,7 @@ class AdExtractor(WebScrapingMixin):
"HERMES_001": "Hermes_Päckchen", "HERMES_001": "Hermes_Päckchen",
"HERMES_002": "Hermes_S", "HERMES_002": "Hermes_S",
"HERMES_003": "Hermes_M", "HERMES_003": "Hermes_M",
"HERMES_004": "Hermes_L" "HERMES_004": "Hermes_L",
} }
# Convert Euro to cents and round to nearest integer # Convert Euro to cents and round to nearest integer

View File

@@ -15,40 +15,22 @@ from kleinanzeigen_bot.utils import dicts
from kleinanzeigen_bot.utils.misc import get_attr from kleinanzeigen_bot.utils.misc import get_attr
from kleinanzeigen_bot.utils.pydantics import ContextualModel from kleinanzeigen_bot.utils.pydantics import ContextualModel
_MAX_PERCENTAGE:Final[int] = 100 _MAX_PERCENTAGE: Final[int] = 100
class AutoPriceReductionConfig(ContextualModel): class AutoPriceReductionConfig(ContextualModel):
enabled:bool = Field( enabled: bool = Field(default=False, description="automatically lower the price of reposted ads")
default = False, strategy: Literal["FIXED", "PERCENTAGE"] | None = Field(
description = "automatically lower the price of reposted ads" default=None, description="PERCENTAGE reduces by a percentage of the previous price, FIXED reduces by a fixed amount"
) )
strategy:Literal["FIXED", "PERCENTAGE"] | None = Field( amount: float | None = Field(
default = None, default=None, gt=0, description="magnitude of the reduction; interpreted as percent for PERCENTAGE or currency units for FIXED"
description = "PERCENTAGE reduces by a percentage of the previous price, FIXED reduces by a fixed amount"
)
amount:float | None = Field(
default = None,
gt = 0,
description = "magnitude of the reduction; interpreted as percent for PERCENTAGE or currency units for FIXED"
)
min_price:float | None = Field(
default = None,
ge = 0,
description = "required when enabled is true; minimum price floor (use 0 for no lower bound)"
)
delay_reposts:int = Field(
default = 0,
ge = 0,
description = "number of reposts to wait before applying the first automatic price reduction"
)
delay_days:int = Field(
default = 0,
ge = 0,
description = "number of days to wait after publication before applying automatic price reductions"
) )
min_price: float | None = Field(default=None, ge=0, description="required when enabled is true; minimum price floor (use 0 for no lower bound)")
delay_reposts: int = Field(default=0, ge=0, description="number of reposts to wait before applying the first automatic price reduction")
delay_days: int = Field(default=0, ge=0, description="number of days to wait after publication before applying automatic price reductions")
@model_validator(mode = "after") @model_validator(mode="after")
def _validate_config(self) -> "AutoPriceReductionConfig": def _validate_config(self) -> "AutoPriceReductionConfig":
if self.enabled: if self.enabled:
if self.strategy is None: if self.strategy is None:
@@ -63,43 +45,38 @@ class AutoPriceReductionConfig(ContextualModel):
class ContactDefaults(ContextualModel): class ContactDefaults(ContextualModel):
name:str | None = None name: str | None = None
street:str | None = None street: str | None = None
zipcode:int | str | None = None zipcode: int | str | None = None
location:str | None = Field( location: str | None = Field(
default = None, default=None, description="city or locality of the listing (can include multiple districts)", examples=["Sample Town - District One"]
description = "city or locality of the listing (can include multiple districts)",
examples = ["Sample Town - District One"]
) )
phone:str | None = None phone: str | None = None
@deprecated("Use description_prefix/description_suffix instead") @deprecated("Use description_prefix/description_suffix instead")
class DescriptionAffixes(ContextualModel): class DescriptionAffixes(ContextualModel):
prefix:str | None = None prefix: str | None = None
suffix:str | None = None suffix: str | None = None
class AdDefaults(ContextualModel): class AdDefaults(ContextualModel):
active:bool = True active: bool = True
type:Literal["OFFER", "WANTED"] = "OFFER" type: Literal["OFFER", "WANTED"] = "OFFER"
description:DescriptionAffixes | None = None description: DescriptionAffixes | None = None
description_prefix:str | None = Field(default = None, description = "prefix for the ad description") description_prefix: str | None = Field(default=None, description="prefix for the ad description")
description_suffix:str | None = Field(default = None, description = " suffix for the ad description") description_suffix: str | None = Field(default=None, description=" suffix for the ad description")
price_type:Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"] = "NEGOTIABLE" price_type: Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"] = "NEGOTIABLE"
auto_price_reduction:AutoPriceReductionConfig = Field( auto_price_reduction: AutoPriceReductionConfig = Field(default_factory=AutoPriceReductionConfig, description="automatic price reduction configuration")
default_factory = AutoPriceReductionConfig, shipping_type: Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"] = "SHIPPING"
description = "automatic price reduction configuration" sell_directly: bool = Field(default=False, description="requires shipping_type SHIPPING to take effect")
) images: list[str] | None = Field(default=None)
shipping_type:Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"] = "SHIPPING" contact: ContactDefaults = Field(default_factory=ContactDefaults)
sell_directly:bool = Field(default = False, description = "requires shipping_type SHIPPING to take effect") republication_interval: int = 7
images:list[str] | None = Field(default = None)
contact:ContactDefaults = Field(default_factory = ContactDefaults)
republication_interval:int = 7
@model_validator(mode = "before") @model_validator(mode="before")
@classmethod @classmethod
def migrate_legacy_description(cls, values:dict[str, Any]) -> dict[str, Any]: def migrate_legacy_description(cls, values: dict[str, Any]) -> dict[str, Any]:
# Ensure flat prefix/suffix take precedence over deprecated nested "description" # Ensure flat prefix/suffix take precedence over deprecated nested "description"
description_prefix = values.get("description_prefix") description_prefix = values.get("description_prefix")
description_suffix = values.get("description_suffix") description_suffix = values.get("description_suffix")
@@ -114,89 +91,71 @@ class AdDefaults(ContextualModel):
class DownloadConfig(ContextualModel): class DownloadConfig(ContextualModel):
include_all_matching_shipping_options:bool = Field( include_all_matching_shipping_options: bool = Field(default=False, description="if true, all shipping options matching the package size will be included")
default = False, excluded_shipping_options: list[str] = Field(default_factory=list, description="list of shipping options to exclude, e.g. ['DHL_2', 'DHL_5']")
description = "if true, all shipping options matching the package size will be included" folder_name_max_length: int = Field(default=100, ge=10, le=255, description="maximum length for folder names when downloading ads (default: 100)")
) rename_existing_folders: bool = Field(default=False, description="if true, rename existing folders without titles to include titles (default: false)")
excluded_shipping_options:list[str] = Field(
default_factory = list,
description = "list of shipping options to exclude, e.g. ['DHL_2', 'DHL_5']"
)
folder_name_max_length:int = Field(
default = 100,
ge = 10,
le = 255,
description = "maximum length for folder names when downloading ads (default: 100)"
)
rename_existing_folders:bool = Field(
default = False,
description = "if true, rename existing folders without titles to include titles (default: false)"
)
class BrowserConfig(ContextualModel): class BrowserConfig(ContextualModel):
arguments:list[str] = Field( arguments: list[str] = Field(
default_factory = lambda: ["--user-data-dir=.temp/browser-profile"], default_factory=list,
description = "See https://peter.sh/experiments/chromium-command-line-switches/" description=(
"See https://peter.sh/experiments/chromium-command-line-switches/. "
"Browser profile path is auto-configured based on installation mode (portable/XDG)."
),
) )
binary_location:str | None = Field( binary_location: str | None = Field(default=None, description="path to custom browser executable, if not specified will be looked up on PATH")
default = None, extensions: list[str] = Field(default_factory=list, description="a list of .crx extension files to be loaded")
description = "path to custom browser executable, if not specified will be looked up on PATH" use_private_window: bool = True
user_data_dir: str | None = Field(
default=None,
description=(
"See https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md. "
"If not specified, defaults to XDG cache directory in XDG mode or .temp/browser-profile in portable mode."
),
) )
extensions:list[str] = Field( profile_name: str | None = None
default_factory = list,
description = "a list of .crx extension files to be loaded"
)
use_private_window:bool = True
user_data_dir:str | None = Field(
default = ".temp/browser-profile",
description = "See https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md"
)
profile_name:str | None = None
class LoginConfig(ContextualModel): class LoginConfig(ContextualModel):
username:str = Field(..., min_length = 1) username: str = Field(..., min_length=1)
password:str = Field(..., min_length = 1) password: str = Field(..., min_length=1)
class PublishingConfig(ContextualModel): class PublishingConfig(ContextualModel):
delete_old_ads:Literal["BEFORE_PUBLISH", "AFTER_PUBLISH", "NEVER"] | None = "AFTER_PUBLISH" delete_old_ads: Literal["BEFORE_PUBLISH", "AFTER_PUBLISH", "NEVER"] | None = "AFTER_PUBLISH"
delete_old_ads_by_title:bool = Field(default = True, description = "only works if delete_old_ads is set to BEFORE_PUBLISH") delete_old_ads_by_title: bool = Field(default=True, description="only works if delete_old_ads is set to BEFORE_PUBLISH")
class CaptchaConfig(ContextualModel): class CaptchaConfig(ContextualModel):
auto_restart:bool = False auto_restart: bool = False
restart_delay:str = "6h" restart_delay: str = "6h"
class TimeoutConfig(ContextualModel): class TimeoutConfig(ContextualModel):
multiplier:float = Field( multiplier: float = Field(default=1.0, ge=0.1, description="Global multiplier applied to all timeout values.")
default = 1.0, default: float = Field(default=5.0, ge=0.0, description="Baseline timeout for DOM interactions.")
ge = 0.1, page_load: float = Field(default=15.0, ge=1.0, description="Page load timeout for web_open.")
description = "Global multiplier applied to all timeout values." captcha_detection: float = Field(default=2.0, ge=0.1, description="Timeout for captcha iframe detection.")
) sms_verification: float = Field(default=4.0, ge=0.1, description="Timeout for SMS verification prompts.")
default:float = Field(default = 5.0, ge = 0.0, description = "Baseline timeout for DOM interactions.") gdpr_prompt: float = Field(default=10.0, ge=1.0, description="Timeout for GDPR/consent dialogs.")
page_load:float = Field(default = 15.0, ge = 1.0, description = "Page load timeout for web_open.") login_detection: float = Field(default=10.0, ge=1.0, description="Timeout for detecting existing login session via DOM elements.")
captcha_detection:float = Field(default = 2.0, ge = 0.1, description = "Timeout for captcha iframe detection.") publishing_result: float = Field(default=300.0, ge=10.0, description="Timeout for publishing result checks.")
sms_verification:float = Field(default = 4.0, ge = 0.1, description = "Timeout for SMS verification prompts.") publishing_confirmation: float = Field(default=20.0, ge=1.0, description="Timeout for publish confirmation redirect.")
gdpr_prompt:float = Field(default = 10.0, ge = 1.0, description = "Timeout for GDPR/consent dialogs.") image_upload: float = Field(default=30.0, ge=5.0, description="Timeout for image upload and server-side processing.")
login_detection:float = Field(default = 10.0, ge = 1.0, description = "Timeout for detecting existing login session via DOM elements.") pagination_initial: float = Field(default=10.0, ge=1.0, description="Timeout for initial pagination lookup.")
publishing_result:float = Field(default = 300.0, ge = 10.0, description = "Timeout for publishing result checks.") pagination_follow_up: float = Field(default=5.0, ge=1.0, description="Timeout for subsequent pagination navigation.")
publishing_confirmation:float = Field(default = 20.0, ge = 1.0, description = "Timeout for publish confirmation redirect.") quick_dom: float = Field(default=2.0, ge=0.1, description="Generic short timeout for transient UI.")
image_upload:float = Field(default = 30.0, ge = 5.0, description = "Timeout for image upload and server-side processing.") update_check: float = Field(default=10.0, ge=1.0, description="Timeout for GitHub update checks.")
pagination_initial:float = Field(default = 10.0, ge = 1.0, description = "Timeout for initial pagination lookup.") chrome_remote_probe: float = Field(default=2.0, ge=0.1, description="Timeout for local remote-debugging probes.")
pagination_follow_up:float = Field(default = 5.0, ge = 1.0, description = "Timeout for subsequent pagination navigation.") chrome_remote_debugging: float = Field(default=5.0, ge=1.0, description="Timeout for remote debugging API calls.")
quick_dom:float = Field(default = 2.0, ge = 0.1, description = "Generic short timeout for transient UI.") chrome_binary_detection: float = Field(default=10.0, ge=1.0, description="Timeout for chrome --version subprocesses.")
update_check:float = Field(default = 10.0, ge = 1.0, description = "Timeout for GitHub update checks.") retry_enabled: bool = Field(default=True, description="Enable built-in retry/backoff for DOM operations.")
chrome_remote_probe:float = Field(default = 2.0, ge = 0.1, description = "Timeout for local remote-debugging probes.") retry_max_attempts: int = Field(default=2, ge=1, description="Max retry attempts when retry is enabled.")
chrome_remote_debugging:float = Field(default = 5.0, ge = 1.0, description = "Timeout for remote debugging API calls.") retry_backoff_factor: float = Field(default=1.5, ge=1.0, description="Exponential factor applied per retry attempt.")
chrome_binary_detection:float = Field(default = 10.0, ge = 1.0, description = "Timeout for chrome --version subprocesses.")
retry_enabled:bool = Field(default = True, description = "Enable built-in retry/backoff for DOM operations.")
retry_max_attempts:int = Field(default = 2, ge = 1, description = "Max retry attempts when retry is enabled.")
retry_backoff_factor:float = Field(default = 1.5, ge = 1.0, description = "Exponential factor applied per retry attempt.")
def resolve(self, key:str = "default", override:float | None = None) -> float: def resolve(self, key: str = "default", override: float | None = None) -> float:
""" """
Return the base timeout (seconds) for the given key without applying modifiers. Return the base timeout (seconds) for the given key without applying modifiers.
""" """
@@ -212,16 +171,16 @@ class TimeoutConfig(ContextualModel):
return float(self.default) return float(self.default)
def effective(self, key:str = "default", override:float | None = None, *, attempt:int = 0) -> float: def effective(self, key: str = "default", override: float | None = None, *, attempt: int = 0) -> float:
""" """
Return the effective timeout (seconds) with multiplier/backoff applied. Return the effective timeout (seconds) with multiplier/backoff applied.
""" """
base = self.resolve(key, override) base = self.resolve(key, override)
backoff = self.retry_backoff_factor ** attempt if attempt > 0 else 1.0 backoff = self.retry_backoff_factor**attempt if attempt > 0 else 1.0
return base * self.multiplier * backoff return base * self.multiplier * backoff
def _validate_glob_pattern(v:str) -> str: def _validate_glob_pattern(v: str) -> str:
if not v.strip(): if not v.strip():
raise ValueError("must be a non-empty, non-blank glob pattern") raise ValueError("must be a non-empty, non-blank glob pattern")
return v return v
@@ -231,21 +190,20 @@ GlobPattern = Annotated[str, AfterValidator(_validate_glob_pattern)]
class Config(ContextualModel): class Config(ContextualModel):
ad_files:list[GlobPattern] = Field( ad_files: list[GlobPattern] = Field(
default_factory = lambda: ["./**/ad_*.{json,yml,yaml}"], default_factory=lambda: ["./**/ad_*.{json,yml,yaml}"],
min_items = 1, min_items=1,
description = """ description="""
glob (wildcard) patterns to select ad configuration files glob (wildcard) patterns to select ad configuration files
if relative paths are specified, then they are relative to this configuration file if relative paths are specified, then they are relative to this configuration file
""" """,
) # type: ignore[call-overload] ) # type: ignore[call-overload]
ad_defaults:AdDefaults = Field( ad_defaults: AdDefaults = Field(default_factory=AdDefaults, description="Default values for ads, can be overwritten in each ad configuration file")
default_factory = AdDefaults,
description = "Default values for ads, can be overwritten in each ad configuration file"
)
categories:dict[str, str] = Field(default_factory = dict, description = """ categories: dict[str, str] = Field(
default_factory=dict,
description="""
additional name to category ID mappings, see default list at additional name to category ID mappings, see default list at
https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml
@@ -253,17 +211,16 @@ Example:
categories: categories:
Elektronik > Notebooks: 161/278 Elektronik > Notebooks: 161/278
Jobs > Praktika: 102/125 Jobs > Praktika: 102/125
""") """,
)
download:DownloadConfig = Field(default_factory = DownloadConfig) download: DownloadConfig = Field(default_factory=DownloadConfig)
publishing:PublishingConfig = Field(default_factory = PublishingConfig) publishing: PublishingConfig = Field(default_factory=PublishingConfig)
browser:BrowserConfig = Field(default_factory = BrowserConfig, description = "Browser configuration") browser: BrowserConfig = Field(default_factory=BrowserConfig, description="Browser configuration")
login:LoginConfig = Field(default_factory = LoginConfig.model_construct, description = "Login credentials") login: LoginConfig = Field(default_factory=LoginConfig.model_construct, description="Login credentials")
captcha:CaptchaConfig = Field(default_factory = CaptchaConfig) captcha: CaptchaConfig = Field(default_factory=CaptchaConfig)
update_check:UpdateCheckConfig = Field(default_factory = UpdateCheckConfig, description = "Update check configuration") update_check: UpdateCheckConfig = Field(default_factory=UpdateCheckConfig, description="Update check configuration")
timeouts:TimeoutConfig = Field(default_factory = TimeoutConfig, description = "Centralized timeout configuration.") timeouts: TimeoutConfig = Field(default_factory=TimeoutConfig, description="Centralized timeout configuration.")
def with_values(self, values:dict[str, Any]) -> Config: def with_values(self, values: dict[str, Any]) -> Config:
return Config.model_validate( return Config.model_validate(dicts.apply_defaults(copy.deepcopy(values), defaults=self.model_dump()))
dicts.apply_defaults(copy.deepcopy(values), defaults = self.model_dump())
)

View File

@@ -112,6 +112,11 @@ kleinanzeigen_bot/__init__.py:
" -> FAILED: Timeout while extending ad '%s': %s": " -> FEHLER: Zeitüberschreitung beim Verlängern der Anzeige '%s': %s" " -> FAILED: Timeout while extending ad '%s': %s": " -> FEHLER: Zeitüberschreitung beim Verlängern der Anzeige '%s': %s"
" -> FAILED: Could not persist extension for ad '%s': %s": " -> FEHLER: Verlängerung der Anzeige '%s' konnte nicht gespeichert werden: %s" " -> FAILED: Could not persist extension for ad '%s': %s": " -> FEHLER: Verlängerung der Anzeige '%s' konnte nicht gespeichert werden: %s"
finalize_installation_mode:
"Config file: %s": "Konfigurationsdatei: %s"
"First run detected, prompting user for installation mode": "Erster Start erkannt, frage Benutzer nach Installationsmodus"
"Installation mode: %s": "Installationsmodus: %s"
publish_ads: publish_ads:
"Processing %s/%s: '%s' from [%s]...": "Verarbeite %s/%s: '%s' von [%s]..." "Processing %s/%s: '%s' from [%s]...": "Verarbeite %s/%s: '%s' von [%s]..."
"Skipping because ad is reserved": "Überspringen, da Anzeige reserviert ist" "Skipping because ad is reserved": "Überspringen, da Anzeige reserviert ist"
@@ -240,7 +245,7 @@ kleinanzeigen_bot/__init__.py:
kleinanzeigen_bot/extract.py: kleinanzeigen_bot/extract.py:
################################################# #################################################
download_ad: download_ad:
"Ensured ads directory exists at ./%s.": "Verzeichnis [%s] für Anzeige vorhanden." "Using download directory: %s": "Verwende Download-Verzeichnis: %s"
_download_and_save_image_sync: _download_and_save_image_sync:
"Failed to download image %s: %s": "Fehler beim Herunterladen des Bildes %s: %s" "Failed to download image %s: %s": "Fehler beim Herunterladen des Bildes %s: %s"

View File

@@ -7,7 +7,6 @@ from __future__ import annotations
import logging import logging
from datetime import datetime from datetime import datetime
from gettext import gettext as _ from gettext import gettext as _
from pathlib import Path
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
import colorama import colorama
@@ -22,6 +21,7 @@ except ImportError:
__version__ = "unknown" __version__ = "unknown"
from kleinanzeigen_bot.model.update_check_state import UpdateCheckState from kleinanzeigen_bot.model.update_check_state import UpdateCheckState
from kleinanzeigen_bot.utils import xdg_paths
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -31,15 +31,16 @@ colorama.init()
class UpdateChecker: class UpdateChecker:
"""Checks for updates to the bot.""" """Checks for updates to the bot."""
def __init__(self, config:"Config") -> None: def __init__(self, config: "Config", installation_mode: str | xdg_paths.InstallationMode = "portable") -> None:
"""Initialize the update checker. """Initialize the update checker.
Args: Args:
config: The bot configuration. config: The bot configuration.
installation_mode: Installation mode (portable/xdg).
""" """
self.config = config self.config = config
self.state_file = Path(".temp") / "update_check_state.json" self.state_file = xdg_paths.get_update_check_state_path(installation_mode)
self.state_file.parent.mkdir(exist_ok = True) # Ensure .temp directory exists # Note: xdg_paths handles directory creation
self.state = UpdateCheckState.load(self.state_file) self.state = UpdateCheckState.load(self.state_file)
def get_local_version(self) -> str | None: def get_local_version(self) -> str | None:
@@ -54,7 +55,7 @@ class UpdateChecker:
"""Return the effective timeout for HTTP calls.""" """Return the effective timeout for HTTP calls."""
return self.config.timeouts.effective("update_check") return self.config.timeouts.effective("update_check")
def _get_commit_hash(self, version:str) -> str | None: def _get_commit_hash(self, version: str) -> str | None:
"""Extract the commit hash from a version string. """Extract the commit hash from a version string.
Args: Args:
@@ -67,7 +68,7 @@ class UpdateChecker:
return version.split("+")[1] return version.split("+")[1]
return None return None
def _resolve_commitish(self, commitish:str) -> tuple[str | None, datetime | None]: def _resolve_commitish(self, commitish: str) -> tuple[str | None, datetime | None]:
"""Resolve a commit-ish to a full commit hash and date. """Resolve a commit-ish to a full commit hash and date.
Args: Args:
@@ -79,7 +80,7 @@ class UpdateChecker:
try: try:
response = requests.get( response = requests.get(
f"https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/commits/{commitish}", f"https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/commits/{commitish}",
timeout = self._request_timeout() timeout=self._request_timeout(),
) )
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()
@@ -95,7 +96,7 @@ class UpdateChecker:
logger.warning(_("Could not resolve commit '%s': %s"), commitish, e) logger.warning(_("Could not resolve commit '%s': %s"), commitish, e)
return None, None return None, None
def _get_short_commit_hash(self, commit:str) -> str: def _get_short_commit_hash(self, commit: str) -> str:
"""Get the short version of a commit hash. """Get the short version of a commit hash.
Args: Args:
@@ -106,7 +107,7 @@ class UpdateChecker:
""" """
return commit[:7] return commit[:7]
def _commits_match(self, local_commit:str, release_commit:str) -> bool: def _commits_match(self, local_commit: str, release_commit: str) -> bool:
"""Determine whether two commits refer to the same hash. """Determine whether two commits refer to the same hash.
This accounts for short vs. full hashes (e.g. 7 chars vs. 40 chars). This accounts for short vs. full hashes (e.g. 7 chars vs. 40 chars).
@@ -119,7 +120,7 @@ class UpdateChecker:
return True return True
return len(release_commit) < len(local_commit) and local_commit.startswith(release_commit) return len(release_commit) < len(local_commit) and local_commit.startswith(release_commit)
def check_for_updates(self, *, skip_interval_check:bool = False) -> None: def check_for_updates(self, *, skip_interval_check: bool = False) -> None:
"""Check for updates to the bot. """Check for updates to the bot.
Args: Args:
@@ -146,24 +147,16 @@ class UpdateChecker:
try: try:
if self.config.update_check.channel == "latest": if self.config.update_check.channel == "latest":
# Use /releases/latest endpoint for stable releases # Use /releases/latest endpoint for stable releases
response = requests.get( response = requests.get("https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/releases/latest", timeout=self._request_timeout())
"https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/releases/latest",
timeout = self._request_timeout()
)
response.raise_for_status() response.raise_for_status()
release = response.json() release = response.json()
# Defensive: ensure it's not a prerelease # Defensive: ensure it's not a prerelease
if release.get("prerelease", False): if release.get("prerelease", False):
logger.warning( logger.warning(_("Latest release from GitHub is a prerelease, but 'latest' channel expects a stable release."))
_("Latest release from GitHub is a prerelease, but 'latest' channel expects a stable release.")
)
return return
elif self.config.update_check.channel == "preview": elif self.config.update_check.channel == "preview":
# Use /releases endpoint and select the most recent prerelease # Use /releases endpoint and select the most recent prerelease
response = requests.get( response = requests.get("https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/releases", timeout=self._request_timeout())
"https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/releases",
timeout = self._request_timeout()
)
response.raise_for_status() response.raise_for_status()
releases = response.json() releases = response.json()
# Find the most recent prerelease # Find the most recent prerelease
@@ -199,7 +192,7 @@ class UpdateChecker:
_("You are on the latest version: %s (compared to %s in channel %s)"), _("You are on the latest version: %s (compared to %s in channel %s)"),
local_version, local_version,
self._get_short_commit_hash(release_commit), self._get_short_commit_hash(release_commit),
self.config.update_check.channel self.config.update_check.channel,
) )
self.state.update_last_check() self.state.update_last_check()
self.state.save(self.state_file) self.state.save(self.state_file)
@@ -212,7 +205,7 @@ class UpdateChecker:
release_commit_date.strftime("%Y-%m-%d %H:%M:%S"), release_commit_date.strftime("%Y-%m-%d %H:%M:%S"),
local_version, local_version,
local_commit_date.strftime("%Y-%m-%d %H:%M:%S"), local_commit_date.strftime("%Y-%m-%d %H:%M:%S"),
self.config.update_check.channel self.config.update_check.channel,
) )
if release.get("body"): if release.get("body"):
logger.info(_("Release notes:\n%s"), release["body"]) logger.info(_("Release notes:\n%s"), release["body"])
@@ -227,7 +220,7 @@ class UpdateChecker:
self._get_short_commit_hash(local_commit), self._get_short_commit_hash(local_commit),
local_commit_date.strftime("%Y-%m-%d %H:%M:%S"), local_commit_date.strftime("%Y-%m-%d %H:%M:%S"),
self._get_short_commit_hash(release_commit), self._get_short_commit_hash(release_commit),
release_commit_date.strftime("%Y-%m-%d %H:%M:%S") release_commit_date.strftime("%Y-%m-%d %H:%M:%S"),
) )
# Update the last check time # Update the last check time

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff