mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
feat: integrate XDG paths into bot core (#776)
## ℹ️ Description Wire XDG path resolution into main bot components. - Link to the related issue(s): N/A (new feature) - Integrates installation mode detection into bot core ## 📋 Changes Summary - Added `finalize_installation_mode()` method for mode detection - UpdateChecker, AdExtractor now respect installation mode - Dynamic browser profile defaults (resolved at runtime) - German translations for installation mode messages - Comprehensive tests for installation mode integration **Part 2 of 3 for XDG support** - Depends on: PR #775 (must be merged first) - Will rebase on main after merge of previous PR ### ⚙️ Type of Change - [x] ✨ New feature (adds new functionality without breaking existing usage) ## ✅ Checklist - [x] I have reviewed my changes to ensure they meet the project's standards. - [x] I have tested my changes and ensured that all tests pass (`pdm run test`). - [x] I have formatted the code (`pdm run format`). - [x] I have verified that linting passes (`pdm run lint`). - [x] I have updated documentation where necessary. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Support for portable and XDG (system-wide) installation modes with automatic detection and interactive first-run setup. * Config and paths standardized so app stores config, downloads, logs, and browser profiles in appropriate locations per mode. * Update checker improved for more reliable version/commit detection. * **Chores** * Moved dependency to runtime: platformdirs added to main dependencies. * **Tests** * Added comprehensive tests for installation modes, path utilities, and related behaviors. <sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub> <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -15,7 +15,7 @@ from kleinanzeigen_bot.model.ad_model import ContactPartial
|
||||
|
||||
from .model.ad_model import AdPartial
|
||||
from .model.config_model import Config
|
||||
from .utils import dicts, files, i18n, loggers, misc, reflect
|
||||
from .utils import dicts, files, i18n, loggers, misc, reflect, xdg_paths
|
||||
from .utils.web_scraping_mixin import Browser, By, Element, WebScrapingMixin
|
||||
|
||||
__all__ = [
|
||||
@@ -33,10 +33,13 @@ class AdExtractor(WebScrapingMixin):
|
||||
Wrapper class for ad extraction that uses an active bot´s browser session to extract specific elements from an ad page.
|
||||
"""
|
||||
|
||||
def __init__(self, browser:Browser, config:Config) -> None:
|
||||
def __init__(self, browser:Browser, config:Config, installation_mode:xdg_paths.InstallationMode = "portable") -> None:
|
||||
super().__init__()
|
||||
self.browser = browser
|
||||
self.config:Config = config
|
||||
if installation_mode not in {"portable", "xdg"}:
|
||||
raise ValueError(f"Unsupported installation mode: {installation_mode}")
|
||||
self.installation_mode:xdg_paths.InstallationMode = installation_mode
|
||||
|
||||
async def download_ad(self, ad_id:int) -> None:
|
||||
"""
|
||||
@@ -47,26 +50,19 @@ class AdExtractor(WebScrapingMixin):
|
||||
"""
|
||||
|
||||
# create sub-directory for ad(s) to download (if necessary):
|
||||
relative_directory = Path("downloaded-ads")
|
||||
# make sure configured base directory exists (using exist_ok=True to avoid TOCTOU race)
|
||||
await asyncio.get_running_loop().run_in_executor(None, lambda: relative_directory.mkdir(exist_ok = True)) # noqa: ASYNC240
|
||||
LOG.info("Ensured ads directory exists at ./%s.", relative_directory)
|
||||
download_dir = xdg_paths.get_downloaded_ads_path(self.installation_mode)
|
||||
LOG.info(_("Using download directory: %s"), download_dir)
|
||||
# Note: xdg_paths.get_downloaded_ads_path() already creates the directory
|
||||
|
||||
# Extract ad info and determine final directory path
|
||||
ad_cfg, final_dir = await self._extract_ad_page_info_with_directory_handling(
|
||||
relative_directory, ad_id
|
||||
)
|
||||
ad_cfg, final_dir = await self._extract_ad_page_info_with_directory_handling(download_dir, ad_id)
|
||||
|
||||
# Save the ad configuration file (offload to executor to avoid blocking the event loop)
|
||||
ad_file_path = str(Path(final_dir) / f"ad_{ad_id}.yaml")
|
||||
header_string = (
|
||||
"# yaml-language-server: $schema="
|
||||
"https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/ad.schema.json"
|
||||
)
|
||||
await asyncio.get_running_loop().run_in_executor(
|
||||
None,
|
||||
lambda: dicts.save_dict(ad_file_path, ad_cfg.model_dump(), header = header_string)
|
||||
"# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/ad.schema.json"
|
||||
)
|
||||
await asyncio.get_running_loop().run_in_executor(None, lambda: dicts.save_dict(ad_file_path, ad_cfg.model_dump(), header = header_string))
|
||||
|
||||
@staticmethod
|
||||
def _download_and_save_image_sync(url:str, directory:str, filename_prefix:str, img_nr:int) -> str | None:
|
||||
@@ -114,14 +110,7 @@ class AdExtractor(WebScrapingMixin):
|
||||
if current_img_url is None:
|
||||
continue
|
||||
|
||||
img_path = await loop.run_in_executor(
|
||||
None,
|
||||
self._download_and_save_image_sync,
|
||||
str(current_img_url),
|
||||
directory,
|
||||
img_fn_prefix,
|
||||
img_nr
|
||||
)
|
||||
img_path = await loop.run_in_executor(None, self._download_and_save_image_sync, str(current_img_url), directory, img_fn_prefix, img_nr)
|
||||
|
||||
if img_path:
|
||||
dl_counter += 1
|
||||
@@ -217,10 +206,7 @@ class AdExtractor(WebScrapingMixin):
|
||||
|
||||
# Extract references using the CORRECTED selector
|
||||
try:
|
||||
page_refs:list[str] = [
|
||||
str((await self.web_find(By.CSS_SELECTOR, "div h3 a.text-onSurface", parent = li)).attrs["href"])
|
||||
for li in list_items
|
||||
]
|
||||
page_refs:list[str] = [str((await self.web_find(By.CSS_SELECTOR, "div h3 a.text-onSurface", parent = li)).attrs["href"]) for li in list_items]
|
||||
refs.extend(page_refs)
|
||||
LOG.info("Successfully extracted %s refs from page %s.", len(page_refs), current_page)
|
||||
except Exception as e:
|
||||
@@ -344,7 +330,7 @@ class AdExtractor(WebScrapingMixin):
|
||||
if prefix and description_text.startswith(prefix.strip()):
|
||||
description_text = description_text[len(prefix.strip()):]
|
||||
if suffix and description_text.endswith(suffix.strip()):
|
||||
description_text = description_text[:-len(suffix.strip())]
|
||||
description_text = description_text[: -len(suffix.strip())]
|
||||
|
||||
info["description"] = description_text.strip()
|
||||
|
||||
@@ -361,8 +347,7 @@ class AdExtractor(WebScrapingMixin):
|
||||
info["id"] = ad_id
|
||||
|
||||
try: # try different locations known for creation date element
|
||||
creation_date = await self.web_text(By.XPATH,
|
||||
"/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/div[1]/span")
|
||||
creation_date = await self.web_text(By.XPATH, "/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/div[1]/span")
|
||||
except TimeoutError:
|
||||
creation_date = await self.web_text(By.CSS_SELECTOR, "#viewad-extra-info > div:nth-child(1) > span:nth-child(2)")
|
||||
|
||||
@@ -380,9 +365,7 @@ class AdExtractor(WebScrapingMixin):
|
||||
|
||||
return ad_cfg
|
||||
|
||||
async def _extract_ad_page_info_with_directory_handling(
|
||||
self, relative_directory:Path, ad_id:int
|
||||
) -> tuple[AdPartial, Path]:
|
||||
async def _extract_ad_page_info_with_directory_handling(self, relative_directory:Path, ad_id:int) -> tuple[AdPartial, Path]:
|
||||
"""
|
||||
Extracts ad information and handles directory creation/renaming.
|
||||
|
||||
@@ -415,8 +398,7 @@ class AdExtractor(WebScrapingMixin):
|
||||
if await files.exists(temp_dir):
|
||||
if self.config.download.rename_existing_folders:
|
||||
# Rename the old folder to the new name with title
|
||||
LOG.info("Renaming folder from %s to %s for ad %s...",
|
||||
temp_dir.name, final_dir.name, ad_id)
|
||||
LOG.info("Renaming folder from %s to %s for ad %s...", temp_dir.name, final_dir.name, ad_id)
|
||||
LOG.debug("Renaming: %s -> %s", temp_dir, final_dir)
|
||||
await loop.run_in_executor(None, temp_dir.rename, final_dir)
|
||||
else:
|
||||
@@ -471,14 +453,8 @@ class AdExtractor(WebScrapingMixin):
|
||||
category_first_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(2)", parent = category_line)
|
||||
category_second_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line)
|
||||
except TimeoutError as exc:
|
||||
LOG.error(
|
||||
"Legacy breadcrumb selectors not found within %.1f seconds (collected ids: %s)",
|
||||
fallback_timeout,
|
||||
category_ids
|
||||
)
|
||||
raise TimeoutError(
|
||||
_("Unable to locate breadcrumb fallback selectors within %(seconds).1f seconds.") % {"seconds": fallback_timeout}
|
||||
) from exc
|
||||
LOG.error("Legacy breadcrumb selectors not found within %.1f seconds (collected ids: %s)", fallback_timeout, category_ids)
|
||||
raise TimeoutError(_("Unable to locate breadcrumb fallback selectors within %(seconds).1f seconds.") % {"seconds": fallback_timeout}) from exc
|
||||
href_first:str = str(category_first_part.attrs["href"])
|
||||
href_second:str = str(category_second_part.attrs["href"])
|
||||
cat_num_first_raw = href_first.rsplit("/", maxsplit = 1)[-1]
|
||||
@@ -553,8 +529,8 @@ class AdExtractor(WebScrapingMixin):
|
||||
# reading shipping option from kleinanzeigen
|
||||
# and find the right one by price
|
||||
shipping_costs = json.loads(
|
||||
(await self.web_request("https://gateway.kleinanzeigen.de/postad/api/v1/shipping-options?posterType=PRIVATE"))
|
||||
["content"])["data"]["shippingOptionsResponse"]["options"]
|
||||
(await self.web_request("https://gateway.kleinanzeigen.de/postad/api/v1/shipping-options?posterType=PRIVATE"))["content"]
|
||||
)["data"]["shippingOptionsResponse"]["options"]
|
||||
|
||||
# map to internal shipping identifiers used by kleinanzeigen-bot
|
||||
shipping_option_mapping = {
|
||||
@@ -566,7 +542,7 @@ class AdExtractor(WebScrapingMixin):
|
||||
"HERMES_001": "Hermes_Päckchen",
|
||||
"HERMES_002": "Hermes_S",
|
||||
"HERMES_003": "Hermes_M",
|
||||
"HERMES_004": "Hermes_L"
|
||||
"HERMES_004": "Hermes_L",
|
||||
}
|
||||
|
||||
# Convert Euro to cents and round to nearest integer
|
||||
|
||||
@@ -15,40 +15,22 @@ from kleinanzeigen_bot.utils import dicts
|
||||
from kleinanzeigen_bot.utils.misc import get_attr
|
||||
from kleinanzeigen_bot.utils.pydantics import ContextualModel
|
||||
|
||||
_MAX_PERCENTAGE:Final[int] = 100
|
||||
_MAX_PERCENTAGE: Final[int] = 100
|
||||
|
||||
|
||||
class AutoPriceReductionConfig(ContextualModel):
|
||||
enabled:bool = Field(
|
||||
default = False,
|
||||
description = "automatically lower the price of reposted ads"
|
||||
enabled: bool = Field(default=False, description="automatically lower the price of reposted ads")
|
||||
strategy: Literal["FIXED", "PERCENTAGE"] | None = Field(
|
||||
default=None, description="PERCENTAGE reduces by a percentage of the previous price, FIXED reduces by a fixed amount"
|
||||
)
|
||||
strategy:Literal["FIXED", "PERCENTAGE"] | None = Field(
|
||||
default = None,
|
||||
description = "PERCENTAGE reduces by a percentage of the previous price, FIXED reduces by a fixed amount"
|
||||
)
|
||||
amount:float | None = Field(
|
||||
default = None,
|
||||
gt = 0,
|
||||
description = "magnitude of the reduction; interpreted as percent for PERCENTAGE or currency units for FIXED"
|
||||
)
|
||||
min_price:float | None = Field(
|
||||
default = None,
|
||||
ge = 0,
|
||||
description = "required when enabled is true; minimum price floor (use 0 for no lower bound)"
|
||||
)
|
||||
delay_reposts:int = Field(
|
||||
default = 0,
|
||||
ge = 0,
|
||||
description = "number of reposts to wait before applying the first automatic price reduction"
|
||||
)
|
||||
delay_days:int = Field(
|
||||
default = 0,
|
||||
ge = 0,
|
||||
description = "number of days to wait after publication before applying automatic price reductions"
|
||||
amount: float | None = Field(
|
||||
default=None, gt=0, description="magnitude of the reduction; interpreted as percent for PERCENTAGE or currency units for FIXED"
|
||||
)
|
||||
min_price: float | None = Field(default=None, ge=0, description="required when enabled is true; minimum price floor (use 0 for no lower bound)")
|
||||
delay_reposts: int = Field(default=0, ge=0, description="number of reposts to wait before applying the first automatic price reduction")
|
||||
delay_days: int = Field(default=0, ge=0, description="number of days to wait after publication before applying automatic price reductions")
|
||||
|
||||
@model_validator(mode = "after")
|
||||
@model_validator(mode="after")
|
||||
def _validate_config(self) -> "AutoPriceReductionConfig":
|
||||
if self.enabled:
|
||||
if self.strategy is None:
|
||||
@@ -63,43 +45,38 @@ class AutoPriceReductionConfig(ContextualModel):
|
||||
|
||||
|
||||
class ContactDefaults(ContextualModel):
|
||||
name:str | None = None
|
||||
street:str | None = None
|
||||
zipcode:int | str | None = None
|
||||
location:str | None = Field(
|
||||
default = None,
|
||||
description = "city or locality of the listing (can include multiple districts)",
|
||||
examples = ["Sample Town - District One"]
|
||||
name: str | None = None
|
||||
street: str | None = None
|
||||
zipcode: int | str | None = None
|
||||
location: str | None = Field(
|
||||
default=None, description="city or locality of the listing (can include multiple districts)", examples=["Sample Town - District One"]
|
||||
)
|
||||
phone:str | None = None
|
||||
phone: str | None = None
|
||||
|
||||
|
||||
@deprecated("Use description_prefix/description_suffix instead")
|
||||
class DescriptionAffixes(ContextualModel):
|
||||
prefix:str | None = None
|
||||
suffix:str | None = None
|
||||
prefix: str | None = None
|
||||
suffix: str | None = None
|
||||
|
||||
|
||||
class AdDefaults(ContextualModel):
|
||||
active:bool = True
|
||||
type:Literal["OFFER", "WANTED"] = "OFFER"
|
||||
description:DescriptionAffixes | None = None
|
||||
description_prefix:str | None = Field(default = None, description = "prefix for the ad description")
|
||||
description_suffix:str | None = Field(default = None, description = " suffix for the ad description")
|
||||
price_type:Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"] = "NEGOTIABLE"
|
||||
auto_price_reduction:AutoPriceReductionConfig = Field(
|
||||
default_factory = AutoPriceReductionConfig,
|
||||
description = "automatic price reduction configuration"
|
||||
)
|
||||
shipping_type:Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"] = "SHIPPING"
|
||||
sell_directly:bool = Field(default = False, description = "requires shipping_type SHIPPING to take effect")
|
||||
images:list[str] | None = Field(default = None)
|
||||
contact:ContactDefaults = Field(default_factory = ContactDefaults)
|
||||
republication_interval:int = 7
|
||||
active: bool = True
|
||||
type: Literal["OFFER", "WANTED"] = "OFFER"
|
||||
description: DescriptionAffixes | None = None
|
||||
description_prefix: str | None = Field(default=None, description="prefix for the ad description")
|
||||
description_suffix: str | None = Field(default=None, description=" suffix for the ad description")
|
||||
price_type: Literal["FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"] = "NEGOTIABLE"
|
||||
auto_price_reduction: AutoPriceReductionConfig = Field(default_factory=AutoPriceReductionConfig, description="automatic price reduction configuration")
|
||||
shipping_type: Literal["PICKUP", "SHIPPING", "NOT_APPLICABLE"] = "SHIPPING"
|
||||
sell_directly: bool = Field(default=False, description="requires shipping_type SHIPPING to take effect")
|
||||
images: list[str] | None = Field(default=None)
|
||||
contact: ContactDefaults = Field(default_factory=ContactDefaults)
|
||||
republication_interval: int = 7
|
||||
|
||||
@model_validator(mode = "before")
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def migrate_legacy_description(cls, values:dict[str, Any]) -> dict[str, Any]:
|
||||
def migrate_legacy_description(cls, values: dict[str, Any]) -> dict[str, Any]:
|
||||
# Ensure flat prefix/suffix take precedence over deprecated nested "description"
|
||||
description_prefix = values.get("description_prefix")
|
||||
description_suffix = values.get("description_suffix")
|
||||
@@ -114,89 +91,71 @@ class AdDefaults(ContextualModel):
|
||||
|
||||
|
||||
class DownloadConfig(ContextualModel):
|
||||
include_all_matching_shipping_options:bool = Field(
|
||||
default = False,
|
||||
description = "if true, all shipping options matching the package size will be included"
|
||||
)
|
||||
excluded_shipping_options:list[str] = Field(
|
||||
default_factory = list,
|
||||
description = "list of shipping options to exclude, e.g. ['DHL_2', 'DHL_5']"
|
||||
)
|
||||
folder_name_max_length:int = Field(
|
||||
default = 100,
|
||||
ge = 10,
|
||||
le = 255,
|
||||
description = "maximum length for folder names when downloading ads (default: 100)"
|
||||
)
|
||||
rename_existing_folders:bool = Field(
|
||||
default = False,
|
||||
description = "if true, rename existing folders without titles to include titles (default: false)"
|
||||
)
|
||||
include_all_matching_shipping_options: bool = Field(default=False, description="if true, all shipping options matching the package size will be included")
|
||||
excluded_shipping_options: list[str] = Field(default_factory=list, description="list of shipping options to exclude, e.g. ['DHL_2', 'DHL_5']")
|
||||
folder_name_max_length: int = Field(default=100, ge=10, le=255, description="maximum length for folder names when downloading ads (default: 100)")
|
||||
rename_existing_folders: bool = Field(default=False, description="if true, rename existing folders without titles to include titles (default: false)")
|
||||
|
||||
|
||||
class BrowserConfig(ContextualModel):
|
||||
arguments:list[str] = Field(
|
||||
default_factory = lambda: ["--user-data-dir=.temp/browser-profile"],
|
||||
description = "See https://peter.sh/experiments/chromium-command-line-switches/"
|
||||
arguments: list[str] = Field(
|
||||
default_factory=list,
|
||||
description=(
|
||||
"See https://peter.sh/experiments/chromium-command-line-switches/. "
|
||||
"Browser profile path is auto-configured based on installation mode (portable/XDG)."
|
||||
),
|
||||
)
|
||||
binary_location:str | None = Field(
|
||||
default = None,
|
||||
description = "path to custom browser executable, if not specified will be looked up on PATH"
|
||||
binary_location: str | None = Field(default=None, description="path to custom browser executable, if not specified will be looked up on PATH")
|
||||
extensions: list[str] = Field(default_factory=list, description="a list of .crx extension files to be loaded")
|
||||
use_private_window: bool = True
|
||||
user_data_dir: str | None = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"See https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md. "
|
||||
"If not specified, defaults to XDG cache directory in XDG mode or .temp/browser-profile in portable mode."
|
||||
),
|
||||
)
|
||||
extensions:list[str] = Field(
|
||||
default_factory = list,
|
||||
description = "a list of .crx extension files to be loaded"
|
||||
)
|
||||
use_private_window:bool = True
|
||||
user_data_dir:str | None = Field(
|
||||
default = ".temp/browser-profile",
|
||||
description = "See https://github.com/chromium/chromium/blob/main/docs/user_data_dir.md"
|
||||
)
|
||||
profile_name:str | None = None
|
||||
profile_name: str | None = None
|
||||
|
||||
|
||||
class LoginConfig(ContextualModel):
|
||||
username:str = Field(..., min_length = 1)
|
||||
password:str = Field(..., min_length = 1)
|
||||
username: str = Field(..., min_length=1)
|
||||
password: str = Field(..., min_length=1)
|
||||
|
||||
|
||||
class PublishingConfig(ContextualModel):
|
||||
delete_old_ads:Literal["BEFORE_PUBLISH", "AFTER_PUBLISH", "NEVER"] | None = "AFTER_PUBLISH"
|
||||
delete_old_ads_by_title:bool = Field(default = True, description = "only works if delete_old_ads is set to BEFORE_PUBLISH")
|
||||
delete_old_ads: Literal["BEFORE_PUBLISH", "AFTER_PUBLISH", "NEVER"] | None = "AFTER_PUBLISH"
|
||||
delete_old_ads_by_title: bool = Field(default=True, description="only works if delete_old_ads is set to BEFORE_PUBLISH")
|
||||
|
||||
|
||||
class CaptchaConfig(ContextualModel):
|
||||
auto_restart:bool = False
|
||||
restart_delay:str = "6h"
|
||||
auto_restart: bool = False
|
||||
restart_delay: str = "6h"
|
||||
|
||||
|
||||
class TimeoutConfig(ContextualModel):
|
||||
multiplier:float = Field(
|
||||
default = 1.0,
|
||||
ge = 0.1,
|
||||
description = "Global multiplier applied to all timeout values."
|
||||
)
|
||||
default:float = Field(default = 5.0, ge = 0.0, description = "Baseline timeout for DOM interactions.")
|
||||
page_load:float = Field(default = 15.0, ge = 1.0, description = "Page load timeout for web_open.")
|
||||
captcha_detection:float = Field(default = 2.0, ge = 0.1, description = "Timeout for captcha iframe detection.")
|
||||
sms_verification:float = Field(default = 4.0, ge = 0.1, description = "Timeout for SMS verification prompts.")
|
||||
gdpr_prompt:float = Field(default = 10.0, ge = 1.0, description = "Timeout for GDPR/consent dialogs.")
|
||||
login_detection:float = Field(default = 10.0, ge = 1.0, description = "Timeout for detecting existing login session via DOM elements.")
|
||||
publishing_result:float = Field(default = 300.0, ge = 10.0, description = "Timeout for publishing result checks.")
|
||||
publishing_confirmation:float = Field(default = 20.0, ge = 1.0, description = "Timeout for publish confirmation redirect.")
|
||||
image_upload:float = Field(default = 30.0, ge = 5.0, description = "Timeout for image upload and server-side processing.")
|
||||
pagination_initial:float = Field(default = 10.0, ge = 1.0, description = "Timeout for initial pagination lookup.")
|
||||
pagination_follow_up:float = Field(default = 5.0, ge = 1.0, description = "Timeout for subsequent pagination navigation.")
|
||||
quick_dom:float = Field(default = 2.0, ge = 0.1, description = "Generic short timeout for transient UI.")
|
||||
update_check:float = Field(default = 10.0, ge = 1.0, description = "Timeout for GitHub update checks.")
|
||||
chrome_remote_probe:float = Field(default = 2.0, ge = 0.1, description = "Timeout for local remote-debugging probes.")
|
||||
chrome_remote_debugging:float = Field(default = 5.0, ge = 1.0, description = "Timeout for remote debugging API calls.")
|
||||
chrome_binary_detection:float = Field(default = 10.0, ge = 1.0, description = "Timeout for chrome --version subprocesses.")
|
||||
retry_enabled:bool = Field(default = True, description = "Enable built-in retry/backoff for DOM operations.")
|
||||
retry_max_attempts:int = Field(default = 2, ge = 1, description = "Max retry attempts when retry is enabled.")
|
||||
retry_backoff_factor:float = Field(default = 1.5, ge = 1.0, description = "Exponential factor applied per retry attempt.")
|
||||
multiplier: float = Field(default=1.0, ge=0.1, description="Global multiplier applied to all timeout values.")
|
||||
default: float = Field(default=5.0, ge=0.0, description="Baseline timeout for DOM interactions.")
|
||||
page_load: float = Field(default=15.0, ge=1.0, description="Page load timeout for web_open.")
|
||||
captcha_detection: float = Field(default=2.0, ge=0.1, description="Timeout for captcha iframe detection.")
|
||||
sms_verification: float = Field(default=4.0, ge=0.1, description="Timeout for SMS verification prompts.")
|
||||
gdpr_prompt: float = Field(default=10.0, ge=1.0, description="Timeout for GDPR/consent dialogs.")
|
||||
login_detection: float = Field(default=10.0, ge=1.0, description="Timeout for detecting existing login session via DOM elements.")
|
||||
publishing_result: float = Field(default=300.0, ge=10.0, description="Timeout for publishing result checks.")
|
||||
publishing_confirmation: float = Field(default=20.0, ge=1.0, description="Timeout for publish confirmation redirect.")
|
||||
image_upload: float = Field(default=30.0, ge=5.0, description="Timeout for image upload and server-side processing.")
|
||||
pagination_initial: float = Field(default=10.0, ge=1.0, description="Timeout for initial pagination lookup.")
|
||||
pagination_follow_up: float = Field(default=5.0, ge=1.0, description="Timeout for subsequent pagination navigation.")
|
||||
quick_dom: float = Field(default=2.0, ge=0.1, description="Generic short timeout for transient UI.")
|
||||
update_check: float = Field(default=10.0, ge=1.0, description="Timeout for GitHub update checks.")
|
||||
chrome_remote_probe: float = Field(default=2.0, ge=0.1, description="Timeout for local remote-debugging probes.")
|
||||
chrome_remote_debugging: float = Field(default=5.0, ge=1.0, description="Timeout for remote debugging API calls.")
|
||||
chrome_binary_detection: float = Field(default=10.0, ge=1.0, description="Timeout for chrome --version subprocesses.")
|
||||
retry_enabled: bool = Field(default=True, description="Enable built-in retry/backoff for DOM operations.")
|
||||
retry_max_attempts: int = Field(default=2, ge=1, description="Max retry attempts when retry is enabled.")
|
||||
retry_backoff_factor: float = Field(default=1.5, ge=1.0, description="Exponential factor applied per retry attempt.")
|
||||
|
||||
def resolve(self, key:str = "default", override:float | None = None) -> float:
|
||||
def resolve(self, key: str = "default", override: float | None = None) -> float:
|
||||
"""
|
||||
Return the base timeout (seconds) for the given key without applying modifiers.
|
||||
"""
|
||||
@@ -212,16 +171,16 @@ class TimeoutConfig(ContextualModel):
|
||||
|
||||
return float(self.default)
|
||||
|
||||
def effective(self, key:str = "default", override:float | None = None, *, attempt:int = 0) -> float:
|
||||
def effective(self, key: str = "default", override: float | None = None, *, attempt: int = 0) -> float:
|
||||
"""
|
||||
Return the effective timeout (seconds) with multiplier/backoff applied.
|
||||
"""
|
||||
base = self.resolve(key, override)
|
||||
backoff = self.retry_backoff_factor ** attempt if attempt > 0 else 1.0
|
||||
backoff = self.retry_backoff_factor**attempt if attempt > 0 else 1.0
|
||||
return base * self.multiplier * backoff
|
||||
|
||||
|
||||
def _validate_glob_pattern(v:str) -> str:
|
||||
def _validate_glob_pattern(v: str) -> str:
|
||||
if not v.strip():
|
||||
raise ValueError("must be a non-empty, non-blank glob pattern")
|
||||
return v
|
||||
@@ -231,21 +190,20 @@ GlobPattern = Annotated[str, AfterValidator(_validate_glob_pattern)]
|
||||
|
||||
|
||||
class Config(ContextualModel):
|
||||
ad_files:list[GlobPattern] = Field(
|
||||
default_factory = lambda: ["./**/ad_*.{json,yml,yaml}"],
|
||||
min_items = 1,
|
||||
description = """
|
||||
ad_files: list[GlobPattern] = Field(
|
||||
default_factory=lambda: ["./**/ad_*.{json,yml,yaml}"],
|
||||
min_items=1,
|
||||
description="""
|
||||
glob (wildcard) patterns to select ad configuration files
|
||||
if relative paths are specified, then they are relative to this configuration file
|
||||
"""
|
||||
""",
|
||||
) # type: ignore[call-overload]
|
||||
|
||||
ad_defaults:AdDefaults = Field(
|
||||
default_factory = AdDefaults,
|
||||
description = "Default values for ads, can be overwritten in each ad configuration file"
|
||||
)
|
||||
ad_defaults: AdDefaults = Field(default_factory=AdDefaults, description="Default values for ads, can be overwritten in each ad configuration file")
|
||||
|
||||
categories:dict[str, str] = Field(default_factory = dict, description = """
|
||||
categories: dict[str, str] = Field(
|
||||
default_factory=dict,
|
||||
description="""
|
||||
additional name to category ID mappings, see default list at
|
||||
https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml
|
||||
|
||||
@@ -253,17 +211,16 @@ Example:
|
||||
categories:
|
||||
Elektronik > Notebooks: 161/278
|
||||
Jobs > Praktika: 102/125
|
||||
""")
|
||||
""",
|
||||
)
|
||||
|
||||
download:DownloadConfig = Field(default_factory = DownloadConfig)
|
||||
publishing:PublishingConfig = Field(default_factory = PublishingConfig)
|
||||
browser:BrowserConfig = Field(default_factory = BrowserConfig, description = "Browser configuration")
|
||||
login:LoginConfig = Field(default_factory = LoginConfig.model_construct, description = "Login credentials")
|
||||
captcha:CaptchaConfig = Field(default_factory = CaptchaConfig)
|
||||
update_check:UpdateCheckConfig = Field(default_factory = UpdateCheckConfig, description = "Update check configuration")
|
||||
timeouts:TimeoutConfig = Field(default_factory = TimeoutConfig, description = "Centralized timeout configuration.")
|
||||
download: DownloadConfig = Field(default_factory=DownloadConfig)
|
||||
publishing: PublishingConfig = Field(default_factory=PublishingConfig)
|
||||
browser: BrowserConfig = Field(default_factory=BrowserConfig, description="Browser configuration")
|
||||
login: LoginConfig = Field(default_factory=LoginConfig.model_construct, description="Login credentials")
|
||||
captcha: CaptchaConfig = Field(default_factory=CaptchaConfig)
|
||||
update_check: UpdateCheckConfig = Field(default_factory=UpdateCheckConfig, description="Update check configuration")
|
||||
timeouts: TimeoutConfig = Field(default_factory=TimeoutConfig, description="Centralized timeout configuration.")
|
||||
|
||||
def with_values(self, values:dict[str, Any]) -> Config:
|
||||
return Config.model_validate(
|
||||
dicts.apply_defaults(copy.deepcopy(values), defaults = self.model_dump())
|
||||
)
|
||||
def with_values(self, values: dict[str, Any]) -> Config:
|
||||
return Config.model_validate(dicts.apply_defaults(copy.deepcopy(values), defaults=self.model_dump()))
|
||||
|
||||
@@ -112,6 +112,11 @@ kleinanzeigen_bot/__init__.py:
|
||||
" -> FAILED: Timeout while extending ad '%s': %s": " -> FEHLER: Zeitüberschreitung beim Verlängern der Anzeige '%s': %s"
|
||||
" -> FAILED: Could not persist extension for ad '%s': %s": " -> FEHLER: Verlängerung der Anzeige '%s' konnte nicht gespeichert werden: %s"
|
||||
|
||||
finalize_installation_mode:
|
||||
"Config file: %s": "Konfigurationsdatei: %s"
|
||||
"First run detected, prompting user for installation mode": "Erster Start erkannt, frage Benutzer nach Installationsmodus"
|
||||
"Installation mode: %s": "Installationsmodus: %s"
|
||||
|
||||
publish_ads:
|
||||
"Processing %s/%s: '%s' from [%s]...": "Verarbeite %s/%s: '%s' von [%s]..."
|
||||
"Skipping because ad is reserved": "Überspringen, da Anzeige reserviert ist"
|
||||
@@ -240,7 +245,7 @@ kleinanzeigen_bot/__init__.py:
|
||||
kleinanzeigen_bot/extract.py:
|
||||
#################################################
|
||||
download_ad:
|
||||
"Ensured ads directory exists at ./%s.": "Verzeichnis [%s] für Anzeige vorhanden."
|
||||
"Using download directory: %s": "Verwende Download-Verzeichnis: %s"
|
||||
|
||||
_download_and_save_image_sync:
|
||||
"Failed to download image %s: %s": "Fehler beim Herunterladen des Bildes %s: %s"
|
||||
|
||||
@@ -7,7 +7,6 @@ from __future__ import annotations
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from gettext import gettext as _
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import colorama
|
||||
@@ -22,6 +21,7 @@ except ImportError:
|
||||
__version__ = "unknown"
|
||||
|
||||
from kleinanzeigen_bot.model.update_check_state import UpdateCheckState
|
||||
from kleinanzeigen_bot.utils import xdg_paths
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -31,15 +31,16 @@ colorama.init()
|
||||
class UpdateChecker:
|
||||
"""Checks for updates to the bot."""
|
||||
|
||||
def __init__(self, config:"Config") -> None:
|
||||
def __init__(self, config: "Config", installation_mode: str | xdg_paths.InstallationMode = "portable") -> None:
|
||||
"""Initialize the update checker.
|
||||
|
||||
Args:
|
||||
config: The bot configuration.
|
||||
installation_mode: Installation mode (portable/xdg).
|
||||
"""
|
||||
self.config = config
|
||||
self.state_file = Path(".temp") / "update_check_state.json"
|
||||
self.state_file.parent.mkdir(exist_ok = True) # Ensure .temp directory exists
|
||||
self.state_file = xdg_paths.get_update_check_state_path(installation_mode)
|
||||
# Note: xdg_paths handles directory creation
|
||||
self.state = UpdateCheckState.load(self.state_file)
|
||||
|
||||
def get_local_version(self) -> str | None:
|
||||
@@ -54,7 +55,7 @@ class UpdateChecker:
|
||||
"""Return the effective timeout for HTTP calls."""
|
||||
return self.config.timeouts.effective("update_check")
|
||||
|
||||
def _get_commit_hash(self, version:str) -> str | None:
|
||||
def _get_commit_hash(self, version: str) -> str | None:
|
||||
"""Extract the commit hash from a version string.
|
||||
|
||||
Args:
|
||||
@@ -67,7 +68,7 @@ class UpdateChecker:
|
||||
return version.split("+")[1]
|
||||
return None
|
||||
|
||||
def _resolve_commitish(self, commitish:str) -> tuple[str | None, datetime | None]:
|
||||
def _resolve_commitish(self, commitish: str) -> tuple[str | None, datetime | None]:
|
||||
"""Resolve a commit-ish to a full commit hash and date.
|
||||
|
||||
Args:
|
||||
@@ -79,7 +80,7 @@ class UpdateChecker:
|
||||
try:
|
||||
response = requests.get(
|
||||
f"https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/commits/{commitish}",
|
||||
timeout = self._request_timeout()
|
||||
timeout=self._request_timeout(),
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
@@ -95,7 +96,7 @@ class UpdateChecker:
|
||||
logger.warning(_("Could not resolve commit '%s': %s"), commitish, e)
|
||||
return None, None
|
||||
|
||||
def _get_short_commit_hash(self, commit:str) -> str:
|
||||
def _get_short_commit_hash(self, commit: str) -> str:
|
||||
"""Get the short version of a commit hash.
|
||||
|
||||
Args:
|
||||
@@ -106,7 +107,7 @@ class UpdateChecker:
|
||||
"""
|
||||
return commit[:7]
|
||||
|
||||
def _commits_match(self, local_commit:str, release_commit:str) -> bool:
|
||||
def _commits_match(self, local_commit: str, release_commit: str) -> bool:
|
||||
"""Determine whether two commits refer to the same hash.
|
||||
|
||||
This accounts for short vs. full hashes (e.g. 7 chars vs. 40 chars).
|
||||
@@ -119,7 +120,7 @@ class UpdateChecker:
|
||||
return True
|
||||
return len(release_commit) < len(local_commit) and local_commit.startswith(release_commit)
|
||||
|
||||
def check_for_updates(self, *, skip_interval_check:bool = False) -> None:
|
||||
def check_for_updates(self, *, skip_interval_check: bool = False) -> None:
|
||||
"""Check for updates to the bot.
|
||||
|
||||
Args:
|
||||
@@ -146,24 +147,16 @@ class UpdateChecker:
|
||||
try:
|
||||
if self.config.update_check.channel == "latest":
|
||||
# Use /releases/latest endpoint for stable releases
|
||||
response = requests.get(
|
||||
"https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/releases/latest",
|
||||
timeout = self._request_timeout()
|
||||
)
|
||||
response = requests.get("https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/releases/latest", timeout=self._request_timeout())
|
||||
response.raise_for_status()
|
||||
release = response.json()
|
||||
# Defensive: ensure it's not a prerelease
|
||||
if release.get("prerelease", False):
|
||||
logger.warning(
|
||||
_("Latest release from GitHub is a prerelease, but 'latest' channel expects a stable release.")
|
||||
)
|
||||
logger.warning(_("Latest release from GitHub is a prerelease, but 'latest' channel expects a stable release."))
|
||||
return
|
||||
elif self.config.update_check.channel == "preview":
|
||||
# Use /releases endpoint and select the most recent prerelease
|
||||
response = requests.get(
|
||||
"https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/releases",
|
||||
timeout = self._request_timeout()
|
||||
)
|
||||
response = requests.get("https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/releases", timeout=self._request_timeout())
|
||||
response.raise_for_status()
|
||||
releases = response.json()
|
||||
# Find the most recent prerelease
|
||||
@@ -199,7 +192,7 @@ class UpdateChecker:
|
||||
_("You are on the latest version: %s (compared to %s in channel %s)"),
|
||||
local_version,
|
||||
self._get_short_commit_hash(release_commit),
|
||||
self.config.update_check.channel
|
||||
self.config.update_check.channel,
|
||||
)
|
||||
self.state.update_last_check()
|
||||
self.state.save(self.state_file)
|
||||
@@ -212,7 +205,7 @@ class UpdateChecker:
|
||||
release_commit_date.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
local_version,
|
||||
local_commit_date.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
self.config.update_check.channel
|
||||
self.config.update_check.channel,
|
||||
)
|
||||
if release.get("body"):
|
||||
logger.info(_("Release notes:\n%s"), release["body"])
|
||||
@@ -227,7 +220,7 @@ class UpdateChecker:
|
||||
self._get_short_commit_hash(local_commit),
|
||||
local_commit_date.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
self._get_short_commit_hash(release_commit),
|
||||
release_commit_date.strftime("%Y-%m-%d %H:%M:%S")
|
||||
release_commit_date.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
)
|
||||
|
||||
# Update the last check time
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user