fix: Separate 'changed' and 'due' ad selectors (#442)

This commit implements a new 'changed' selector for the --ads option that
publishes only ads that have been modified since their last publication.
The 'due' selector now only republishes ads based on the time interval,
without considering content changes.

The implementation allows combining selectors with commas (e.g., --ads=changed,due)
to publish both changed and due ads. Documentation and translations have been
updated accordingly.

Fixes #411
This commit is contained in:
Jens Bergmann
2025-02-28 20:53:53 +01:00
committed by GitHub
parent 6b3da5bc0a
commit 772326003f
4 changed files with 199 additions and 44 deletions

View File

@@ -192,12 +192,14 @@ Commands:
version - displays the application version version - displays the application version
Options: Options:
--ads=all|due|new|<id(s)> (publish) - specifies which ads to (re-)publish (DEFAULT: due) --ads=all|due|new|changed|<id(s)> (publish) - specifies which ads to (re-)publish (DEFAULT: due)
Possible values: Possible values:
* all: (re-)publish all ads ignoring republication_interval * all: (re-)publish all ads ignoring republication_interval
* due: publish all new ads and republish ads according the republication_interval * due: publish all new ads and republish ads according the republication_interval
* new: only publish new ads (i.e. ads that have no id in the config file) * new: only publish new ads (i.e. ads that have no id in the config file)
* changed: only publish ads that have been modified since last publication
* <id(s)>: provide one or several ads by ID to (re-)publish, like e.g. "--ads=1,2,3" ignoring republication_interval * <id(s)>: provide one or several ads by ID to (re-)publish, like e.g. "--ads=1,2,3" ignoring republication_interval
* Combinations: You can combine multiple selectors with commas, e.g. "--ads=changed,due" to publish both changed and due ads
--ads=all|new|<id(s)> (download) - specifies which ads to download (DEFAULT: new) --ads=all|new|<id(s)> (download) - specifies which ads to download (DEFAULT: new)
Possible values: Possible values:
* all: downloads all ads from your profile * all: downloads all ads from your profile

View File

@@ -86,7 +86,9 @@ class KleinanzeigenBot(WebScrapingMixin):
self.configure_file_logging() self.configure_file_logging()
self.load_config() self.load_config()
if not (self.ads_selector in {'all', 'new', 'due'} or re.compile(r'\d+[,\d+]*').search(self.ads_selector)): if not (self.ads_selector in {'all', 'new', 'due', 'changed'} or
any(selector in self.ads_selector.split(',') for selector in ('all', 'new', 'due', 'changed')) or
re.compile(r'\d+[,\d+]*').search(self.ads_selector)):
LOG.warning('You provided no ads selector. Defaulting to "due".') LOG.warning('You provided no ads selector. Defaulting to "due".')
self.ads_selector = 'due' self.ads_selector = 'due'
@@ -148,12 +150,14 @@ class KleinanzeigenBot(WebScrapingMixin):
version - Zeigt die Version der Anwendung an version - Zeigt die Version der Anwendung an
Optionen: Optionen:
--ads=all|due|new|<id(s)> (publish) - Gibt an, welche Anzeigen (erneut) veröffentlicht werden sollen (STANDARD: due) --ads=all|due|new|changed|<id(s)> (publish) - Gibt an, welche Anzeigen (erneut) veröffentlicht werden sollen (STANDARD: due)
Mögliche Werte: Mögliche Werte:
* all: Veröffentlicht alle Anzeigen erneut, ignoriert republication_interval * all: Veröffentlicht alle Anzeigen erneut, ignoriert republication_interval
* due: Veröffentlicht alle neuen Anzeigen und erneut entsprechend dem republication_interval * due: Veröffentlicht alle neuen Anzeigen und erneut entsprechend dem republication_interval
* new: Veröffentlicht nur neue Anzeigen (d.h. Anzeigen ohne ID in der Konfigurationsdatei) * new: Veröffentlicht nur neue Anzeigen (d.h. Anzeigen ohne ID in der Konfigurationsdatei)
* changed: Veröffentlicht nur Anzeigen, die seit der letzten Veröffentlichung geändert wurden
* <id(s)>: Gibt eine oder mehrere Anzeigen-IDs an, die veröffentlicht werden sollen, z. B. "--ads=1,2,3", ignoriert republication_interval * <id(s)>: Gibt eine oder mehrere Anzeigen-IDs an, die veröffentlicht werden sollen, z. B. "--ads=1,2,3", ignoriert republication_interval
* Kombinationen: Sie können mehrere Selektoren mit Kommas kombinieren, z. B. "--ads=changed,due" um sowohl geänderte als auch fällige Anzeigen zu veröffentlichen
--ads=all|new|<id(s)> (download) - Gibt an, welche Anzeigen heruntergeladen werden sollen (STANDARD: new) --ads=all|new|<id(s)> (download) - Gibt an, welche Anzeigen heruntergeladen werden sollen (STANDARD: new)
Mögliche Werte: Mögliche Werte:
* all: Lädt alle Anzeigen aus Ihrem Profil herunter * all: Lädt alle Anzeigen aus Ihrem Profil herunter
@@ -180,12 +184,14 @@ class KleinanzeigenBot(WebScrapingMixin):
version - displays the application version version - displays the application version
Options: Options:
--ads=all|due|new|<id(s)> (publish) - specifies which ads to (re-)publish (DEFAULT: due) --ads=all|due|new|changed|<id(s)> (publish) - specifies which ads to (re-)publish (DEFAULT: due)
Possible values: Possible values:
* all: (re-)publish all ads ignoring republication_interval * all: (re-)publish all ads ignoring republication_interval
* due: publish all new ads and republish ads according the republication_interval * due: publish all new ads and republish ads according the republication_interval
* new: only publish new ads (i.e. ads that have no id in the config file) * new: only publish new ads (i.e. ads that have no id in the config file)
* changed: only publish ads that have been modified since last publication
* <id(s)>: provide one or several ads by ID to (re-)publish, like e.g. "--ads=1,2,3" ignoring republication_interval * <id(s)>: provide one or several ads by ID to (re-)publish, like e.g. "--ads=1,2,3" ignoring republication_interval
* Combinations: You can combine multiple selectors with commas, e.g. "--ads=changed,due" to publish both changed and due ads
--ads=all|new|<id(s)> (download) - specifies which ads to download (DEFAULT: new) --ads=all|new|<id(s)> (download) - specifies which ads to download (DEFAULT: new)
Possible values: Possible values:
* all: downloads all ads from your profile * all: downloads all ads from your profile
@@ -261,10 +267,12 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.info("App version: %s", self.get_version()) LOG.info("App version: %s", self.get_version())
LOG.info("Python version: %s", sys.version) LOG.info("Python version: %s", sys.version)
def __check_ad_republication(self, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any], ad_file_relative: str) -> bool: def __check_ad_republication(self, ad_cfg: dict[str, Any], ad_file_relative: str) -> bool:
""" """
Check if an ad needs to be republished based on changes and republication interval. Check if an ad needs to be republished based on republication interval.
Returns True if the ad should be republished. Returns True if the ad should be republished based on the interval.
Note: This method no longer checks for content changes. Use __check_ad_changed for that.
""" """
if ad_cfg["updated_on"]: if ad_cfg["updated_on"]:
last_updated_on = parse_datetime(ad_cfg["updated_on"]) last_updated_on = parse_datetime(ad_cfg["updated_on"])
@@ -276,35 +284,44 @@ class KleinanzeigenBot(WebScrapingMixin):
if not last_updated_on: if not last_updated_on:
return True return True
# Check for changes first # Check republication interval
if ad_cfg["id"]: ad_age = datetime.utcnow() - last_updated_on
# Calculate hash on original config to match what was stored if ad_age.days <= ad_cfg["republication_interval"]:
current_hash = calculate_content_hash(ad_cfg_orig) LOG.info(
stored_hash = ad_cfg_orig.get("content_hash") " -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days",
ad_file_relative,
LOG.debug("Hash comparison for [%s]:", ad_file_relative) ad_age.days,
LOG.debug(" Stored hash: %s", stored_hash) ad_cfg["republication_interval"]
LOG.debug(" Current hash: %s", current_hash) )
return False
if stored_hash and current_hash == stored_hash:
# No changes - check republication interval
ad_age = datetime.utcnow() - last_updated_on
if ad_age.days <= ad_cfg["republication_interval"]:
LOG.info(
" -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days",
ad_file_relative,
ad_age.days,
ad_cfg["republication_interval"]
)
return False
else:
LOG.info("Changes detected in ad [%s], will republish", ad_file_relative)
# Update hash in original configuration
ad_cfg_orig["content_hash"] = current_hash
return True
return True return True
def __check_ad_changed(self, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any], ad_file_relative: str) -> bool:
"""
Check if an ad has been changed since last publication.
Returns True if the ad has been changed.
"""
if not ad_cfg["id"]:
# New ads are not considered "changed"
return False
# Calculate hash on original config to match what was stored
current_hash = calculate_content_hash(ad_cfg_orig)
stored_hash = ad_cfg_orig.get("content_hash")
LOG.debug("Hash comparison for [%s]:", ad_file_relative)
LOG.debug(" Stored hash: %s", stored_hash)
LOG.debug(" Current hash: %s", current_hash)
if stored_hash and current_hash != stored_hash:
LOG.info("Changes detected in ad [%s], will republish", ad_file_relative)
# Update hash in original configuration
ad_cfg_orig["content_hash"] = current_hash
return True
return False
def load_ads(self, *, ignore_inactive:bool = True, check_id:bool = True) -> list[tuple[str, dict[str, Any], dict[str, Any]]]: def load_ads(self, *, ignore_inactive:bool = True, check_id:bool = True) -> list[tuple[str, dict[str, Any], dict[str, Any]]]:
LOG.info("Searching for ad config files...") LOG.info("Searching for ad config files...")
@@ -320,6 +337,8 @@ class KleinanzeigenBot(WebScrapingMixin):
ids = [] ids = []
use_specific_ads = False use_specific_ads = False
selectors = self.ads_selector.split(',')
if re.compile(r'\d+[,\d+]*').search(self.ads_selector): if re.compile(r'\d+[,\d+]*').search(self.ads_selector):
ids = [int(n) for n in self.ads_selector.split(',')] ids = [int(n) for n in self.ads_selector.split(',')]
use_specific_ads = True use_specific_ads = True
@@ -343,13 +362,31 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.info(" -> SKIPPED: ad [%s] is not in list of given ids.", ad_file_relative) LOG.info(" -> SKIPPED: ad [%s] is not in list of given ids.", ad_file_relative)
continue continue
else: else:
if self.ads_selector == "new" and ad_cfg["id"] and check_id: # Check if ad should be included based on selectors
LOG.info(" -> SKIPPED: ad [%s] is not new. already has an id assigned.", ad_file_relative) should_include = False
continue
if self.ads_selector == "due": # Check for 'changed' selector
if not self.__check_ad_republication(ad_cfg, ad_cfg_orig, ad_file_relative): if "changed" in selectors and self.__check_ad_changed(ad_cfg, ad_cfg_orig, ad_file_relative):
continue should_include = True
# Check for 'new' selector
if "new" in selectors and (not ad_cfg["id"] or not check_id):
should_include = True
elif "new" in selectors and ad_cfg["id"] and check_id:
LOG.info(" -> SKIPPED: ad [%s] is not new. already has an id assigned.", ad_file_relative)
# Check for 'due' selector
if "due" in selectors:
# For 'due' selector, check if the ad is due for republication based on interval
if self.__check_ad_republication(ad_cfg, ad_file_relative):
should_include = True
# Check for 'all' selector (always include)
if "all" in selectors:
should_include = True
if not should_include:
continue
# Get description with prefix/suffix from ad config if present, otherwise use defaults # Get description with prefix/suffix from ad config if present, otherwise use defaults
ad_cfg["description"] = self.__get_description_with_affixes(ad_cfg) ad_cfg["description"] = self.__get_description_with_affixes(ad_cfg)

View File

@@ -24,6 +24,12 @@ kleinanzeigen_bot/__init__.py:
"App version: %s": "App Version: %s" "App version: %s": "App Version: %s"
"Python version: %s": "Python Version: %s" "Python version: %s": "Python Version: %s"
__check_ad_changed:
"Hash comparison for [%s]:": "Hash-Vergleich für [%s]:"
" Stored hash: %s": " Gespeicherter Hash: %s"
" Current hash: %s": " Aktueller Hash: %s"
"Changes detected in ad [%s], will republish": "Änderungen in Anzeige [%s] erkannt, wird erneut veröffentlicht"
load_ads: load_ads:
"Searching for ad config files...": "Suche nach Anzeigendateien..." "Searching for ad config files...": "Suche nach Anzeigendateien..."
" -> found %s": "-> %s gefunden" " -> found %s": "-> %s gefunden"
@@ -90,11 +96,8 @@ kleinanzeigen_bot/__init__.py:
" -> uploading image [%s]": " -> Lade Bild [%s] hoch" " -> uploading image [%s]": " -> Lade Bild [%s] hoch"
__check_ad_republication: __check_ad_republication:
"Hash comparison for [%s]:": "Hash-Vergleich für [%s]:" "Changes detected in ad [%s], will republish": "Änderungen in Anzeige [%s] erkannt, wird erneut veröffentlicht"
" Stored hash: %s": " Gespeicherter Hash: %s" " -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days": " -> ÜBERSPRUNGEN: Anzeige [%s] wurde zuletzt vor %d Tagen veröffentlicht. Erneute Veröffentlichung ist erst nach %s Tagen erforderlich"
" Current hash: %s": " Aktueller Hash: %s"
"Changes detected in ad [%s], will republish": "Änderungen in Anzeige [%s] erkannt, wird neu veröffentlicht"
" -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days": " -> ÜBERSPRUNGEN: Anzeige [%s] wurde zuletzt vor %d Tagen veröffentlicht. Eine erneute Veröffentlichung ist nur alle %s Tage erforderlich"
__set_special_attributes: __set_special_attributes:
"Found %i special attributes": "%i spezielle Attribute gefunden" "Found %i special attributes": "%i spezielle Attribute gefunden"

View File

@@ -199,6 +199,9 @@ class TestKleinanzeigenBotCommandLine:
(["publish", "--keep-old"], "publish", "due", True), (["publish", "--keep-old"], "publish", "due", True),
(["publish", "--ads=all", "--keep-old"], "publish", "all", True), (["publish", "--ads=all", "--keep-old"], "publish", "all", True),
(["download", "--ads=new"], "download", "new", False), (["download", "--ads=new"], "download", "new", False),
(["publish", "--ads=changed"], "publish", "changed", False),
(["publish", "--ads=changed,due"], "publish", "changed,due", False),
(["publish", "--ads=changed,new"], "publish", "changed,new", False),
(["version"], "version", "due", False), (["version"], "version", "due", False),
]) ])
def test_parse_args_handles_valid_arguments( def test_parse_args_handles_valid_arguments(
@@ -1180,3 +1183,113 @@ class TestKleinanzeigenBotDescriptionHandling:
description = getattr(test_bot, "_KleinanzeigenBot__get_description_with_affixes")(ad_cfg) description = getattr(test_bot, "_KleinanzeigenBot__get_description_with_affixes")(ad_cfg)
assert description == "Contact: test(at)example.com" assert description == "Contact: test(at)example.com"
class TestKleinanzeigenBotChangedAds:
"""Tests for the 'changed' ads selector functionality."""
def test_load_ads_with_changed_selector(self, test_bot: KleinanzeigenBot, base_ad_config: dict[str, Any]) -> None:
"""Test that only changed ads are loaded when using the 'changed' selector."""
# Set up the bot with the 'changed' selector
test_bot.ads_selector = "changed"
test_bot.config["ad_defaults"] = {
"description": {
"prefix": "",
"suffix": ""
}
}
# Create a changed ad
changed_ad = create_ad_config(
base_ad_config,
id="12345",
title="Changed Ad",
updated_on="2024-01-01T00:00:00",
created_on="2024-01-01T00:00:00",
active=True
)
# Calculate hash for changed_ad and add it to the config
# Then modify the ad to simulate a change
changed_hash = calculate_content_hash(changed_ad)
changed_ad["content_hash"] = changed_hash
# Now modify the ad to make it "changed"
changed_ad["title"] = "Changed Ad - Modified"
# Create temporary directory and file
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
ad_dir = temp_path / "ads"
ad_dir.mkdir()
# Write the ad file
yaml = YAML()
changed_file = ad_dir / "changed_ad.yaml"
with open(changed_file, "w", encoding="utf-8") as f:
yaml.dump(changed_ad, f)
# Set config file path and use relative path for ad_files
test_bot.config_file_path = str(temp_path / "config.yaml")
test_bot.config['ad_files'] = ["ads/*.yaml"]
# Mock the loading of the ad configuration
with patch('kleinanzeigen_bot.utils.dicts.load_dict', side_effect=[
changed_ad, # First call returns the changed ad
{} # Second call for ad_fields.yaml
]):
ads_to_publish = test_bot.load_ads()
# The changed ad should be loaded
assert len(ads_to_publish) == 1
assert ads_to_publish[0][1]["title"] == "Changed Ad - Modified"
def test_load_ads_with_due_selector_includes_all_due_ads(self, test_bot: KleinanzeigenBot, base_ad_config: dict[str, Any]) -> None:
"""Test that 'due' selector includes all ads that are due for republication, regardless of changes."""
# Set up the bot with the 'due' selector
test_bot.ads_selector = "due"
test_bot.config["ad_defaults"] = {
"description": {
"prefix": "",
"suffix": ""
}
}
# Create a changed ad that is also due for republication
current_time = datetime.utcnow()
old_date = (current_time - timedelta(days=10)).isoformat() # Past republication interval
changed_ad = create_ad_config(
base_ad_config,
id="12345",
title="Changed Ad",
updated_on=old_date,
created_on=old_date,
republication_interval=7, # Due for republication after 7 days
active=True
)
# Create temporary directory and file
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
ad_dir = temp_path / "ads"
ad_dir.mkdir()
# Write the ad file
yaml = YAML()
ad_file = ad_dir / "changed_ad.yaml"
with open(ad_file, "w", encoding="utf-8") as f:
yaml.dump(changed_ad, f)
# Set config file path and use relative path for ad_files
test_bot.config_file_path = str(temp_path / "config.yaml")
test_bot.config['ad_files'] = ["ads/*.yaml"]
# Mock the loading of the ad configuration
with patch('kleinanzeigen_bot.utils.dicts.load_dict', side_effect=[
changed_ad, # First call returns the changed ad
{} # Second call for ad_fields.yaml
]):
ads_to_publish = test_bot.load_ads()
# The changed ad should be loaded with 'due' selector because it's due for republication
assert len(ads_to_publish) == 1