diff --git a/README.md b/README.md index 06fb371..cd76d51 100644 --- a/README.md +++ b/README.md @@ -192,12 +192,14 @@ Commands: version - displays the application version Options: - --ads=all|due|new| (publish) - specifies which ads to (re-)publish (DEFAULT: due) + --ads=all|due|new|changed| (publish) - specifies which ads to (re-)publish (DEFAULT: due) Possible values: * all: (re-)publish all ads ignoring republication_interval * due: publish all new ads and republish ads according the republication_interval * new: only publish new ads (i.e. ads that have no id in the config file) + * changed: only publish ads that have been modified since last publication * : provide one or several ads by ID to (re-)publish, like e.g. "--ads=1,2,3" ignoring republication_interval + * Combinations: You can combine multiple selectors with commas, e.g. "--ads=changed,due" to publish both changed and due ads --ads=all|new| (download) - specifies which ads to download (DEFAULT: new) Possible values: * all: downloads all ads from your profile diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py index afc0917..d6dfac0 100644 --- a/src/kleinanzeigen_bot/__init__.py +++ b/src/kleinanzeigen_bot/__init__.py @@ -86,7 +86,9 @@ class KleinanzeigenBot(WebScrapingMixin): self.configure_file_logging() self.load_config() - if not (self.ads_selector in {'all', 'new', 'due'} or re.compile(r'\d+[,\d+]*').search(self.ads_selector)): + if not (self.ads_selector in {'all', 'new', 'due', 'changed'} or + any(selector in self.ads_selector.split(',') for selector in ('all', 'new', 'due', 'changed')) or + re.compile(r'\d+[,\d+]*').search(self.ads_selector)): LOG.warning('You provided no ads selector. Defaulting to "due".') self.ads_selector = 'due' @@ -148,12 +150,14 @@ class KleinanzeigenBot(WebScrapingMixin): version - Zeigt die Version der Anwendung an Optionen: - --ads=all|due|new| (publish) - Gibt an, welche Anzeigen (erneut) veröffentlicht werden sollen (STANDARD: due) + --ads=all|due|new|changed| (publish) - Gibt an, welche Anzeigen (erneut) veröffentlicht werden sollen (STANDARD: due) Mögliche Werte: * all: Veröffentlicht alle Anzeigen erneut, ignoriert republication_interval * due: Veröffentlicht alle neuen Anzeigen und erneut entsprechend dem republication_interval * new: Veröffentlicht nur neue Anzeigen (d.h. Anzeigen ohne ID in der Konfigurationsdatei) + * changed: Veröffentlicht nur Anzeigen, die seit der letzten Veröffentlichung geändert wurden * : Gibt eine oder mehrere Anzeigen-IDs an, die veröffentlicht werden sollen, z. B. "--ads=1,2,3", ignoriert republication_interval + * Kombinationen: Sie können mehrere Selektoren mit Kommas kombinieren, z. B. "--ads=changed,due" um sowohl geänderte als auch fällige Anzeigen zu veröffentlichen --ads=all|new| (download) - Gibt an, welche Anzeigen heruntergeladen werden sollen (STANDARD: new) Mögliche Werte: * all: Lädt alle Anzeigen aus Ihrem Profil herunter @@ -180,12 +184,14 @@ class KleinanzeigenBot(WebScrapingMixin): version - displays the application version Options: - --ads=all|due|new| (publish) - specifies which ads to (re-)publish (DEFAULT: due) + --ads=all|due|new|changed| (publish) - specifies which ads to (re-)publish (DEFAULT: due) Possible values: * all: (re-)publish all ads ignoring republication_interval * due: publish all new ads and republish ads according the republication_interval * new: only publish new ads (i.e. ads that have no id in the config file) + * changed: only publish ads that have been modified since last publication * : provide one or several ads by ID to (re-)publish, like e.g. "--ads=1,2,3" ignoring republication_interval + * Combinations: You can combine multiple selectors with commas, e.g. "--ads=changed,due" to publish both changed and due ads --ads=all|new| (download) - specifies which ads to download (DEFAULT: new) Possible values: * all: downloads all ads from your profile @@ -261,10 +267,12 @@ class KleinanzeigenBot(WebScrapingMixin): LOG.info("App version: %s", self.get_version()) LOG.info("Python version: %s", sys.version) - def __check_ad_republication(self, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any], ad_file_relative: str) -> bool: + def __check_ad_republication(self, ad_cfg: dict[str, Any], ad_file_relative: str) -> bool: """ - Check if an ad needs to be republished based on changes and republication interval. - Returns True if the ad should be republished. + Check if an ad needs to be republished based on republication interval. + Returns True if the ad should be republished based on the interval. + + Note: This method no longer checks for content changes. Use __check_ad_changed for that. """ if ad_cfg["updated_on"]: last_updated_on = parse_datetime(ad_cfg["updated_on"]) @@ -276,35 +284,44 @@ class KleinanzeigenBot(WebScrapingMixin): if not last_updated_on: return True - # Check for changes first - if ad_cfg["id"]: - # Calculate hash on original config to match what was stored - current_hash = calculate_content_hash(ad_cfg_orig) - stored_hash = ad_cfg_orig.get("content_hash") - - LOG.debug("Hash comparison for [%s]:", ad_file_relative) - LOG.debug(" Stored hash: %s", stored_hash) - LOG.debug(" Current hash: %s", current_hash) - - if stored_hash and current_hash == stored_hash: - # No changes - check republication interval - ad_age = datetime.utcnow() - last_updated_on - if ad_age.days <= ad_cfg["republication_interval"]: - LOG.info( - " -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days", - ad_file_relative, - ad_age.days, - ad_cfg["republication_interval"] - ) - return False - else: - LOG.info("Changes detected in ad [%s], will republish", ad_file_relative) - # Update hash in original configuration - ad_cfg_orig["content_hash"] = current_hash - return True + # Check republication interval + ad_age = datetime.utcnow() - last_updated_on + if ad_age.days <= ad_cfg["republication_interval"]: + LOG.info( + " -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days", + ad_file_relative, + ad_age.days, + ad_cfg["republication_interval"] + ) + return False return True + def __check_ad_changed(self, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any], ad_file_relative: str) -> bool: + """ + Check if an ad has been changed since last publication. + Returns True if the ad has been changed. + """ + if not ad_cfg["id"]: + # New ads are not considered "changed" + return False + + # Calculate hash on original config to match what was stored + current_hash = calculate_content_hash(ad_cfg_orig) + stored_hash = ad_cfg_orig.get("content_hash") + + LOG.debug("Hash comparison for [%s]:", ad_file_relative) + LOG.debug(" Stored hash: %s", stored_hash) + LOG.debug(" Current hash: %s", current_hash) + + if stored_hash and current_hash != stored_hash: + LOG.info("Changes detected in ad [%s], will republish", ad_file_relative) + # Update hash in original configuration + ad_cfg_orig["content_hash"] = current_hash + return True + + return False + def load_ads(self, *, ignore_inactive:bool = True, check_id:bool = True) -> list[tuple[str, dict[str, Any], dict[str, Any]]]: LOG.info("Searching for ad config files...") @@ -320,6 +337,8 @@ class KleinanzeigenBot(WebScrapingMixin): ids = [] use_specific_ads = False + selectors = self.ads_selector.split(',') + if re.compile(r'\d+[,\d+]*').search(self.ads_selector): ids = [int(n) for n in self.ads_selector.split(',')] use_specific_ads = True @@ -343,13 +362,31 @@ class KleinanzeigenBot(WebScrapingMixin): LOG.info(" -> SKIPPED: ad [%s] is not in list of given ids.", ad_file_relative) continue else: - if self.ads_selector == "new" and ad_cfg["id"] and check_id: - LOG.info(" -> SKIPPED: ad [%s] is not new. already has an id assigned.", ad_file_relative) - continue + # Check if ad should be included based on selectors + should_include = False - if self.ads_selector == "due": - if not self.__check_ad_republication(ad_cfg, ad_cfg_orig, ad_file_relative): - continue + # Check for 'changed' selector + if "changed" in selectors and self.__check_ad_changed(ad_cfg, ad_cfg_orig, ad_file_relative): + should_include = True + + # Check for 'new' selector + if "new" in selectors and (not ad_cfg["id"] or not check_id): + should_include = True + elif "new" in selectors and ad_cfg["id"] and check_id: + LOG.info(" -> SKIPPED: ad [%s] is not new. already has an id assigned.", ad_file_relative) + + # Check for 'due' selector + if "due" in selectors: + # For 'due' selector, check if the ad is due for republication based on interval + if self.__check_ad_republication(ad_cfg, ad_file_relative): + should_include = True + + # Check for 'all' selector (always include) + if "all" in selectors: + should_include = True + + if not should_include: + continue # Get description with prefix/suffix from ad config if present, otherwise use defaults ad_cfg["description"] = self.__get_description_with_affixes(ad_cfg) diff --git a/src/kleinanzeigen_bot/resources/translations.de.yaml b/src/kleinanzeigen_bot/resources/translations.de.yaml index 127e575..0c8b5c8 100644 --- a/src/kleinanzeigen_bot/resources/translations.de.yaml +++ b/src/kleinanzeigen_bot/resources/translations.de.yaml @@ -24,6 +24,12 @@ kleinanzeigen_bot/__init__.py: "App version: %s": "App Version: %s" "Python version: %s": "Python Version: %s" + __check_ad_changed: + "Hash comparison for [%s]:": "Hash-Vergleich für [%s]:" + " Stored hash: %s": " Gespeicherter Hash: %s" + " Current hash: %s": " Aktueller Hash: %s" + "Changes detected in ad [%s], will republish": "Änderungen in Anzeige [%s] erkannt, wird erneut veröffentlicht" + load_ads: "Searching for ad config files...": "Suche nach Anzeigendateien..." " -> found %s": "-> %s gefunden" @@ -90,11 +96,8 @@ kleinanzeigen_bot/__init__.py: " -> uploading image [%s]": " -> Lade Bild [%s] hoch" __check_ad_republication: - "Hash comparison for [%s]:": "Hash-Vergleich für [%s]:" - " Stored hash: %s": " Gespeicherter Hash: %s" - " Current hash: %s": " Aktueller Hash: %s" - "Changes detected in ad [%s], will republish": "Änderungen in Anzeige [%s] erkannt, wird neu veröffentlicht" - " -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days": " -> ÜBERSPRUNGEN: Anzeige [%s] wurde zuletzt vor %d Tagen veröffentlicht. Eine erneute Veröffentlichung ist nur alle %s Tage erforderlich" + "Changes detected in ad [%s], will republish": "Änderungen in Anzeige [%s] erkannt, wird erneut veröffentlicht" + " -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days": " -> ÜBERSPRUNGEN: Anzeige [%s] wurde zuletzt vor %d Tagen veröffentlicht. Erneute Veröffentlichung ist erst nach %s Tagen erforderlich" __set_special_attributes: "Found %i special attributes": "%i spezielle Attribute gefunden" diff --git a/tests/unit/test_init.py b/tests/unit/test_init.py index fe9d492..a760895 100644 --- a/tests/unit/test_init.py +++ b/tests/unit/test_init.py @@ -199,6 +199,9 @@ class TestKleinanzeigenBotCommandLine: (["publish", "--keep-old"], "publish", "due", True), (["publish", "--ads=all", "--keep-old"], "publish", "all", True), (["download", "--ads=new"], "download", "new", False), + (["publish", "--ads=changed"], "publish", "changed", False), + (["publish", "--ads=changed,due"], "publish", "changed,due", False), + (["publish", "--ads=changed,new"], "publish", "changed,new", False), (["version"], "version", "due", False), ]) def test_parse_args_handles_valid_arguments( @@ -1180,3 +1183,113 @@ class TestKleinanzeigenBotDescriptionHandling: description = getattr(test_bot, "_KleinanzeigenBot__get_description_with_affixes")(ad_cfg) assert description == "Contact: test(at)example.com" + + +class TestKleinanzeigenBotChangedAds: + """Tests for the 'changed' ads selector functionality.""" + + def test_load_ads_with_changed_selector(self, test_bot: KleinanzeigenBot, base_ad_config: dict[str, Any]) -> None: + """Test that only changed ads are loaded when using the 'changed' selector.""" + # Set up the bot with the 'changed' selector + test_bot.ads_selector = "changed" + test_bot.config["ad_defaults"] = { + "description": { + "prefix": "", + "suffix": "" + } + } + + # Create a changed ad + changed_ad = create_ad_config( + base_ad_config, + id="12345", + title="Changed Ad", + updated_on="2024-01-01T00:00:00", + created_on="2024-01-01T00:00:00", + active=True + ) + + # Calculate hash for changed_ad and add it to the config + # Then modify the ad to simulate a change + changed_hash = calculate_content_hash(changed_ad) + changed_ad["content_hash"] = changed_hash + # Now modify the ad to make it "changed" + changed_ad["title"] = "Changed Ad - Modified" + + # Create temporary directory and file + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + ad_dir = temp_path / "ads" + ad_dir.mkdir() + + # Write the ad file + yaml = YAML() + changed_file = ad_dir / "changed_ad.yaml" + with open(changed_file, "w", encoding="utf-8") as f: + yaml.dump(changed_ad, f) + + # Set config file path and use relative path for ad_files + test_bot.config_file_path = str(temp_path / "config.yaml") + test_bot.config['ad_files'] = ["ads/*.yaml"] + + # Mock the loading of the ad configuration + with patch('kleinanzeigen_bot.utils.dicts.load_dict', side_effect=[ + changed_ad, # First call returns the changed ad + {} # Second call for ad_fields.yaml + ]): + ads_to_publish = test_bot.load_ads() + + # The changed ad should be loaded + assert len(ads_to_publish) == 1 + assert ads_to_publish[0][1]["title"] == "Changed Ad - Modified" + + def test_load_ads_with_due_selector_includes_all_due_ads(self, test_bot: KleinanzeigenBot, base_ad_config: dict[str, Any]) -> None: + """Test that 'due' selector includes all ads that are due for republication, regardless of changes.""" + # Set up the bot with the 'due' selector + test_bot.ads_selector = "due" + test_bot.config["ad_defaults"] = { + "description": { + "prefix": "", + "suffix": "" + } + } + + # Create a changed ad that is also due for republication + current_time = datetime.utcnow() + old_date = (current_time - timedelta(days=10)).isoformat() # Past republication interval + + changed_ad = create_ad_config( + base_ad_config, + id="12345", + title="Changed Ad", + updated_on=old_date, + created_on=old_date, + republication_interval=7, # Due for republication after 7 days + active=True + ) + + # Create temporary directory and file + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + ad_dir = temp_path / "ads" + ad_dir.mkdir() + + # Write the ad file + yaml = YAML() + ad_file = ad_dir / "changed_ad.yaml" + with open(ad_file, "w", encoding="utf-8") as f: + yaml.dump(changed_ad, f) + + # Set config file path and use relative path for ad_files + test_bot.config_file_path = str(temp_path / "config.yaml") + test_bot.config['ad_files'] = ["ads/*.yaml"] + + # Mock the loading of the ad configuration + with patch('kleinanzeigen_bot.utils.dicts.load_dict', side_effect=[ + changed_ad, # First call returns the changed ad + {} # Second call for ad_fields.yaml + ]): + ads_to_publish = test_bot.load_ads() + + # The changed ad should be loaded with 'due' selector because it's due for republication + assert len(ads_to_publish) == 1