feat: update check (#561)

feat(update-check): add robust update check with interval support, state management, and CLI integration

- Implement version and interval-based update checks with configurable settings
- Add CLI command `kleinanzeigen-bot update-check` for manual checks
- Introduce state file with versioning, UTC timestamps, and migration logic
- Validate and normalize intervals (1d–4w) with fallback for invalid values
- Ensure correct handling of timezones and elapsed checks
- Improve error handling, logging, and internationalization (i18n)
- Add comprehensive test coverage for config, interval logic, migration, and CLI
- Align default config, translations, and schema with new functionality
- Improve help command UX by avoiding config/log loading for `--help`
- Update documentation and README with full feature overview
This commit is contained in:
Jens Bergmann
2025-06-27 07:52:40 +02:00
committed by GitHub
parent 4d4f3b4093
commit 5430f5cdc6
13 changed files with 1358 additions and 26 deletions

View File

@@ -15,6 +15,7 @@ from . import extract, resources
from ._version import __version__
from .model.ad_model import MAX_DESCRIPTION_LENGTH, Ad, AdPartial
from .model.config_model import Config
from .update_checker import UpdateChecker
from .utils import dicts, error_handlers, loggers, misc
from .utils.exceptions import CaptchaEncountered
from .utils.files import abspath
@@ -75,18 +76,30 @@ class KleinanzeigenBot(WebScrapingMixin):
match self.command:
case "help":
self.show_help()
return
case "version":
print(self.get_version())
case "verify":
self.configure_file_logging()
self.load_config()
# Check for updates on startup
checker = UpdateChecker(self.config)
checker.check_for_updates()
self.load_ads()
LOG.info("############################################")
LOG.info("DONE: No configuration errors found.")
LOG.info("############################################")
case "update-check":
self.configure_file_logging()
self.load_config()
checker = UpdateChecker(self.config)
checker.check_for_updates(skip_interval_check = True)
case "update-content-hash":
self.configure_file_logging()
self.load_config()
# Check for updates on startup
checker = UpdateChecker(self.config)
checker.check_for_updates()
self.ads_selector = "all"
if ads := self.load_ads(exclude_ads_with_id = False):
self.update_content_hashes(ads)
@@ -97,6 +110,9 @@ class KleinanzeigenBot(WebScrapingMixin):
case "publish":
self.configure_file_logging()
self.load_config()
# Check for updates on startup
checker = UpdateChecker(self.config)
checker.check_for_updates()
if not (self.ads_selector in {"all", "new", "due", "changed"} or
any(selector in self.ads_selector.split(",") for selector in ("all", "new", "due", "changed")) or
@@ -134,6 +150,9 @@ class KleinanzeigenBot(WebScrapingMixin):
case "delete":
self.configure_file_logging()
self.load_config()
# Check for updates on startup
checker = UpdateChecker(self.config)
checker.check_for_updates()
if ads := self.load_ads():
await self.create_browser_session()
await self.login()
@@ -149,6 +168,9 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.warning('You provided no ads selector. Defaulting to "new".')
self.ads_selector = "new"
self.load_config()
# Check for updates on startup
checker = UpdateChecker(self.config)
checker.check_for_updates()
await self.create_browser_session()
await self.login()
await self.download_ads()
@@ -177,6 +199,7 @@ class KleinanzeigenBot(WebScrapingMixin):
delete - Löscht Anzeigen
update - Aktualisiert bestehende Anzeigen
download - Lädt eine oder mehrere Anzeigen herunter
update-check - Prüft auf verfügbare Updates
update-content-hash - Berechnet den content_hash aller Anzeigen anhand der aktuellen ad_defaults neu;
nach Änderungen an den config.yaml/ad_defaults verhindert es, dass alle Anzeigen als
"geändert" gelten und neu veröffentlicht werden.
@@ -220,7 +243,8 @@ class KleinanzeigenBot(WebScrapingMixin):
delete - deletes ads
update - updates published ads
download - downloads one or multiple ads
update-content-hash recalculates each ads content_hash based on the current ad_defaults;
update-check - checks for available updates
update-content-hash recalculates each ad's content_hash based on the current ad_defaults;
use this after changing config.yaml/ad_defaults to avoid every ad being marked "changed" and republished
--
help - displays this help (default command)
@@ -498,8 +522,8 @@ class KleinanzeigenBot(WebScrapingMixin):
if not os.path.exists(self.config_file_path):
LOG.warning("Config file %s does not exist. Creating it with default values...", self.config_file_path)
default_config = Config.model_construct()
default_config.login.username = ""
default_config.login.password = ""
default_config.login.username = "changeme" # noqa: S105 placeholder for default config, not a real username
default_config.login.password = "changeme" # noqa: S105 placeholder for default config, not a real password
dicts.save_dict(self.config_file_path, default_config.model_dump(exclude_none = True, exclude = {
"ad_defaults": {
"description" # deprecated

View File

@@ -9,6 +9,7 @@ from typing import Annotated, Any, List, Literal
from pydantic import AfterValidator, Field, model_validator
from typing_extensions import deprecated
from kleinanzeigen_bot.model.update_check_model import UpdateCheckConfig
from kleinanzeigen_bot.utils import dicts
from kleinanzeigen_bot.utils.misc import get_attr
from kleinanzeigen_bot.utils.pydantics import ContextualModel
@@ -142,6 +143,7 @@ Example:
browser:BrowserConfig = Field(default_factory = BrowserConfig, description = "Browser configuration")
login:LoginConfig = Field(default_factory = LoginConfig.model_construct, description = "Login credentials")
captcha:CaptchaConfig = Field(default_factory = CaptchaConfig)
update_check:UpdateCheckConfig = Field(default_factory = UpdateCheckConfig, description = "Update check configuration")
def with_values(self, values:dict[str, Any]) -> Config:
return Config.model_validate(

View File

@@ -0,0 +1,27 @@
# SPDX-FileCopyrightText: © Jens Bergmann and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
from __future__ import annotations
from typing import Literal
from kleinanzeigen_bot.utils.pydantics import ContextualModel
class UpdateCheckConfig(ContextualModel):
"""Configuration for update checking functionality.
Attributes:
enabled: Whether update checking is enabled.
channel: Which release channel to check ('latest' for stable, 'preview' for prereleases).
interval: How often to check for updates (e.g. '7d', '1d').
If the interval is invalid, too short (<1d), or too long (>30d),
the bot will log a warning and use a default interval for this run:
- 1d for 'preview' channel
- 7d for 'latest' channel
The config file is not changed automatically; please fix your config to avoid repeated warnings.
"""
enabled:bool = True
channel:Literal["latest", "preview"] = "latest"
interval:str = "7d" # Default interval of 7 days

View File

@@ -0,0 +1,194 @@
# SPDX-FileCopyrightText: © Jens Bergmann and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
from __future__ import annotations
import datetime
import json
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from pathlib import Path
from kleinanzeigen_bot.utils import dicts, loggers, misc
from kleinanzeigen_bot.utils.pydantics import ContextualModel
LOG = loggers.get_logger(__name__)
# Current version of the state file format
CURRENT_STATE_VERSION = 1
# Maximum allowed interval in days
MAX_INTERVAL_DAYS = 30
class UpdateCheckState(ContextualModel):
"""State for update checking functionality."""
version:int = CURRENT_STATE_VERSION
last_check:datetime.datetime | None = None
@classmethod
def _parse_timestamp(cls, timestamp_str:str) -> datetime.datetime | None:
"""Parse a timestamp string and ensure it's in UTC.
Args:
timestamp_str: The timestamp string to parse.
Returns:
The parsed timestamp in UTC, or None if parsing fails.
"""
try:
timestamp = datetime.datetime.fromisoformat(timestamp_str)
if timestamp.tzinfo is None:
# If no timezone info, assume UTC
timestamp = timestamp.replace(tzinfo = datetime.timezone.utc)
elif timestamp.tzinfo != datetime.timezone.utc:
# Convert to UTC if in a different timezone
timestamp = timestamp.astimezone(datetime.timezone.utc)
return timestamp
except ValueError as e:
LOG.warning("Invalid timestamp format in state file: %s", e)
return None
@classmethod
def load(cls, state_file:Path) -> UpdateCheckState:
"""Load the update check state from a file.
Args:
state_file: The path to the state file.
Returns:
The loaded state.
"""
if not state_file.exists():
return cls()
if state_file.stat().st_size == 0:
return cls()
try:
data = dicts.load_dict(str(state_file))
if not data:
return cls()
# Handle version migration
version = data.get("version", 0)
if version < CURRENT_STATE_VERSION:
LOG.info("Migrating update check state from version %d to %d", version, CURRENT_STATE_VERSION)
data = cls._migrate_state(data, version)
# Parse last_check timestamp
if "last_check" in data:
data["last_check"] = cls._parse_timestamp(data["last_check"])
return cls.model_validate(data)
except (json.JSONDecodeError, ValueError) as e:
LOG.warning("Failed to load update check state: %s", e)
return cls()
@classmethod
def _migrate_state(cls, data:dict[str, Any], from_version:int) -> dict[str, Any]:
"""Migrate state data from an older version to the current version.
Args:
data: The state data to migrate.
from_version: The version of the state data.
Returns:
The migrated state data.
"""
# Version 0 to 1: Add version field
if from_version == 0:
data["version"] = CURRENT_STATE_VERSION
LOG.debug("Migrated state from version 0 to 1: Added version field")
return data
def save(self, state_file:Path) -> None:
"""Save the update check state to a file.
Args:
state_file: The path to the state file.
"""
try:
data = self.model_dump()
if data["last_check"]:
# Ensure timestamp is in UTC before saving
if data["last_check"].tzinfo != datetime.timezone.utc:
data["last_check"] = data["last_check"].astimezone(datetime.timezone.utc)
data["last_check"] = data["last_check"].isoformat()
dicts.save_dict(str(state_file), data)
except PermissionError:
LOG.warning("Permission denied when saving update check state to %s", state_file)
except Exception as e:
LOG.warning("Failed to save update check state: %s", e)
def update_last_check(self) -> None:
"""Update the last check time to now in UTC."""
self.last_check = datetime.datetime.now(datetime.timezone.utc)
def _validate_update_interval(self, interval:str) -> tuple[datetime.timedelta, bool, str]:
"""
Validate the update check interval string.
Returns (timedelta, is_valid, reason).
"""
td = misc.parse_duration(interval)
# Accept explicit zero (e.g. "0d", "0h", "0m", "0s", "0") as invalid, but distinguish from typos
if td.total_seconds() == 0:
if interval.strip() in {"0d", "0h", "0m", "0s", "0"}:
return td, False, "Interval is zero, which is not allowed."
return td, False, "Invalid interval format or unsupported unit."
if td.total_seconds() < 0:
return td, False, "Negative interval is not allowed."
return td, True, ""
def should_check(self, interval:str, channel:str = "latest") -> bool:
"""
Determine if an update check should be performed based on the provided interval.
Args:
interval: The interval string (e.g. '7d', '1d 12h', etc.)
channel: The update channel ('latest' or 'preview') for fallback default interval.
Returns:
bool: True if an update check should be performed, False otherwise.
Notes:
- If interval is invalid, negative, zero, or above max, falls back to default interval for the channel.
- Only returns True if more than the interval has passed since last_check.
- Always compares in UTC.
"""
fallback = False
td = None
reason = ""
td, is_valid, reason = self._validate_update_interval(interval)
total_days = td.total_seconds() / 86400 if td else 0
epsilon = 1e-6
if not is_valid:
if reason == "Interval is zero, which is not allowed.":
LOG.warning("Interval is zero: %s. Minimum interval is 1d. Using default interval for this run.", interval)
elif reason == "Invalid interval format or unsupported unit.":
LOG.warning("Invalid interval format or unsupported unit: %s. Using default interval for this run.", interval)
elif reason == "Negative interval is not allowed.":
LOG.warning("Negative interval: %s. Minimum interval is 1d. Using default interval for this run.", interval)
fallback = True
elif total_days > MAX_INTERVAL_DAYS + epsilon:
LOG.warning("Interval too long: %s. Maximum interval is 30d. Using default interval for this run.", interval)
fallback = True
elif total_days < 1 - epsilon:
LOG.warning("Interval too short: %s. Minimum interval is 1d. Using default interval for this run.", interval)
fallback = True
if fallback:
# Fallback to default interval based on channel
if channel == "preview":
td = misc.parse_duration("1d")
LOG.warning("Falling back to default interval: 1d (preview channel). Please fix your config to avoid this warning.")
else:
td = misc.parse_duration("7d")
LOG.warning("Falling back to default interval: 7d (latest channel). Please fix your config to avoid this warning.")
if not self.last_check:
return True
now = datetime.datetime.now(datetime.timezone.utc)
elapsed = now - self.last_check
# Compare using integer seconds to avoid microsecond-level flakiness
return int(elapsed.total_seconds()) > int(td.total_seconds())

View File

@@ -393,3 +393,51 @@ kleinanzeigen_bot/utils/web_scraping_mixin.py:
web_request:
" -> HTTP %s [%s]...": " -> HTTP %s [%s]..."
#################################################
kleinanzeigen_bot/update_checker.py:
#################################################
_get_commit_date:
"Could not get commit date: %s": "Konnte Commit-Datum nicht ermitteln: %s"
_get_release_commit:
"Could not get release commit: %s": "Konnte Release-Commit nicht ermitteln: %s"
check_for_updates:
"A new version is available: %s from %s (current: %s from %s, channel: %s)": "Eine neue Version ist verfügbar: %s vom %s (aktuell: %s vom %s, Kanal: %s)"
"Could not determine commit dates for comparison.": "Konnte Commit-Daten für den Vergleich nicht ermitteln."
"Could not determine local commit hash.": "Konnte lokalen Commit-Hash nicht ermitteln."
"Could not determine local version.": "Konnte lokale Version nicht ermitteln."
"Could not determine release commit hash.": "Konnte Release-Commit-Hash nicht ermitteln."
"Could not get releases: %s": "Konnte Releases nicht abrufen: %s"
"Failed to get commit dates: %s": "Fehler beim Abrufen der Commit-Daten: %s"
"Failed to get release commit: %s": "Fehler beim Abrufen des Release-Commits: %s"
? "Release notes:\n%s"
: "Release-Notizen:\n%s"
"You are on the latest version: %s (compared to %s in channel %s)": "Sie verwenden die neueste Version: %s (verglichen mit %s im Kanal %s)"
"Latest release from GitHub is a prerelease, but 'latest' channel expects a stable release.": "Die neueste GitHub-Version ist eine Vorabversion, aber der 'latest'-Kanal erwartet eine stabile Version."
"No prerelease found for 'preview' channel.": "Keine Vorabversion für den 'preview'-Kanal gefunden."
"Unknown update channel: %s": "Unbekannter Update-Kanal: %s"
? "You are on a different commit than the release for channel '%s' (tag: %s). This may mean you are ahead, behind, or on a different branch. Local commit: %s (%s), Release commit: %s (%s)"
: "Sie befinden sich auf einem anderen Commit als das Release für Kanal '%s' (Tag: %s). Dies kann bedeuten, dass Sie voraus, hinterher oder auf einem anderen Branch sind. Lokaler Commit: %s (%s), Release-Commit: %s (%s)"
#################################################
kleinanzeigen_bot/model/update_check_state.py:
#################################################
_parse_timestamp:
"Invalid timestamp format in state file: %s": "Ungültiges Zeitstempel-Format in der Statusdatei: %s"
load:
"Failed to load update check state: %s": "Fehler beim Laden des Update-Prüfstatus: %s"
"Migrating update check state from version %d to %d": "Migriere Update-Prüfstatus von Version %d zu %d"
save:
"Failed to save update check state: %s": "Fehler beim Speichern des Update-Prüfstatus: %s"
"Permission denied when saving update check state to %s": "Keine Berechtigung zum Speichern des Update-Prüfstatus in %s"
should_check:
"Falling back to default interval: 1d (preview channel). Please fix your config to avoid this warning.": "Falle auf das Standardintervall zurück: 1 Tag (Vorschaukanal). Bitte korrigieren Sie Ihre Konfiguration, um diese Warnung zu vermeiden."
"Falling back to default interval: 7d (latest channel). Please fix your config to avoid this warning.": "Falle auf das Standardintervall zurück: 7 Tage (Stabiler Kanal). Bitte korrigieren Sie Ihre Konfiguration, um diese Warnung zu vermeiden."
"Interval is zero: %s. Minimum interval is 1d. Using default interval for this run.": "Intervall ist null: %s. Das Mindestintervall beträgt 1 Tag. Es wird das Standardintervall für diesen Durchlauf verwendet."
"Interval too long: %s. Maximum interval is 30d. Using default interval for this run.": "Intervall zu lang: %s. Das maximale Intervall beträgt 30 Tage. Es wird das Standardintervall für diesen Durchlauf verwendet."
"Interval too short: %s. Minimum interval is 1d. Using default interval for this run.": "Intervall zu kurz: %s. Das Mindestintervall beträgt 1 Tag. Es wird das Standardintervall für diesen Durchlauf verwendet."
"Invalid interval format or unsupported unit: %s. Using default interval for this run.": "Ungültiges Intervallformat oder nicht unterstützte Einheit: %s. Es wird das Standardintervall für diesen Durchlauf verwendet."
"Negative interval: %s. Minimum interval is 1d. Using default interval for this run.": "Negatives Intervall: %s. Das Mindestintervall beträgt 1 Tag. Es wird das Standardintervall für diesen Durchlauf verwendet."

View File

@@ -0,0 +1,233 @@
# SPDX-FileCopyrightText: © Jens Bergmann and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
from __future__ import annotations
import logging
from datetime import datetime
from pathlib import Path
from typing import TYPE_CHECKING
import colorama
import requests
if TYPE_CHECKING:
from kleinanzeigen_bot.model.config_model import Config
try:
from kleinanzeigen_bot._version import __version__
except ImportError:
__version__ = "unknown"
from kleinanzeigen_bot.model.update_check_state import UpdateCheckState
logger = logging.getLogger(__name__)
colorama.init()
class UpdateChecker:
"""Checks for updates to the bot."""
def __init__(self, config:"Config") -> None:
"""Initialize the update checker.
Args:
config: The bot configuration.
"""
self.config = config
self.state_file = Path(".temp") / "update_check_state.json"
self.state_file.parent.mkdir(exist_ok = True) # Ensure .temp directory exists
self.state = UpdateCheckState.load(self.state_file)
def get_local_version(self) -> str | None:
"""Get the local version of the bot.
Returns:
The local version string, or None if it cannot be determined.
"""
return __version__
def _get_commit_hash(self, version:str) -> str | None:
"""Extract the commit hash from a version string.
Args:
version: The version string to extract the commit hash from.
Returns:
The commit hash, or None if it cannot be extracted.
"""
if "+" in version:
return version.split("+")[1]
return None
def _get_release_commit(self, tag_name:str) -> str | None:
"""Get the commit hash for a release tag.
Args:
tag_name: The release tag name (e.g. 'latest').
Returns:
The commit hash, or None if it cannot be determined.
"""
try:
response = requests.get(
f"https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/releases/tags/{tag_name}",
timeout = 10
)
response.raise_for_status()
data = response.json()
if isinstance(data, dict) and "target_commitish" in data:
return str(data["target_commitish"])
return None
except Exception as e:
logger.warning("Could not get release commit: %s", e)
return None
def _get_commit_date(self, commit:str) -> datetime | None:
"""Get the commit date for a commit hash.
Args:
commit: The commit hash.
Returns:
The commit date, or None if it cannot be determined.
"""
try:
response = requests.get(
f"https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/commits/{commit}",
timeout = 10
)
response.raise_for_status()
data = response.json()
if isinstance(data, dict) and "commit" in data and "author" in data["commit"] and "date" in data["commit"]["author"]:
return datetime.fromisoformat(data["commit"]["author"]["date"].replace("Z", "+00:00"))
return None
except Exception as e:
logger.warning("Could not get commit date: %s", e)
return None
def _get_short_commit_hash(self, commit:str) -> str:
"""Get the short version of a commit hash.
Args:
commit: The full commit hash.
Returns:
The short commit hash (first 7 characters).
"""
return commit[:7]
def check_for_updates(self, *, skip_interval_check:bool = False) -> None:
"""Check for updates to the bot.
Args:
skip_interval_check: If True, bypass the interval check and force an update check.
"""
if not self.config.update_check.enabled:
return
# Check if we should perform an update check based on the interval
if not skip_interval_check and not self.state.should_check(self.config.update_check.interval, self.config.update_check.channel):
return
local_version = self.get_local_version()
if not local_version:
logger.warning("Could not determine local version.")
return
local_commit = self._get_commit_hash(local_version)
if not local_commit:
logger.warning("Could not determine local commit hash.")
return
# --- Fetch release info from GitHub using correct endpoint per channel ---
try:
if self.config.update_check.channel == "latest":
# Use /releases/latest endpoint for stable releases
response = requests.get(
"https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/releases/latest",
timeout = 10
)
response.raise_for_status()
release = response.json()
# Defensive: ensure it's not a prerelease
if release.get("prerelease", False):
logger.warning("Latest release from GitHub is a prerelease, but 'latest' channel expects a stable release.")
return
elif self.config.update_check.channel == "preview":
# Use /releases endpoint and select the most recent prerelease
response = requests.get(
"https://api.github.com/repos/Second-Hand-Friends/kleinanzeigen-bot/releases",
timeout = 10
)
response.raise_for_status()
releases = response.json()
# Find the most recent prerelease
release = next((r for r in releases if r.get("prerelease", False)), None)
if not release:
logger.warning("No prerelease found for 'preview' channel.")
return
else:
logger.warning("Unknown update channel: %s", self.config.update_check.channel)
return
except Exception as e:
logger.warning("Could not get releases: %s", e)
return
# Get release commit
try:
release_commit = self._get_release_commit(release["tag_name"])
except Exception as e:
logger.warning("Failed to get release commit: %s", e)
return
if not release_commit:
logger.warning("Could not determine release commit hash.")
return
# Get commit dates
try:
local_commit_date = self._get_commit_date(local_commit)
release_commit_date = self._get_commit_date(release_commit)
except Exception as e:
logger.warning("Failed to get commit dates: %s", e)
return
if not local_commit_date or not release_commit_date:
logger.warning("Could not determine commit dates for comparison.")
return
if local_commit == release_commit:
logger.info(
"You are on the latest version: %s (compared to %s in channel %s)",
local_version,
self._get_short_commit_hash(release_commit),
self.config.update_check.channel
)
# We cannot reliably determine ahead/behind without git. Use commit dates as a weak heuristic, but clarify in the log.
elif local_commit_date < release_commit_date:
logger.warning(
"A new version is available: %s from %s (current: %s from %s, channel: %s)",
self._get_short_commit_hash(release_commit),
release_commit_date.strftime("%Y-%m-%d %H:%M:%S"),
local_version,
local_commit_date.strftime("%Y-%m-%d %H:%M:%S"),
self.config.update_check.channel
)
if release.get("body"):
logger.info("Release notes:\n%s", release["body"])
else:
logger.info(
"You are on a different commit than the release for channel '%s' (tag: %s). This may mean you are ahead, behind, or on a different branch. "
"Local commit: %s (%s), Release commit: %s (%s)",
self.config.update_check.channel,
release.get("tag_name", "unknown"),
self._get_short_commit_hash(local_commit),
local_commit_date.strftime("%Y-%m-%d %H:%M:%S"),
self._get_short_commit_hash(release_commit),
release_commit_date.strftime("%Y-%m-%d %H:%M:%S")
)
# Update the last check time
self.state.update_last_check()
self.state.save(self.state_file)