From 868f81239aa83e04ab20473a7423449dbd6c0b26 Mon Sep 17 00:00:00 2001 From: Torsten Liermann Date: Sat, 14 Mar 2026 08:37:30 +0100 Subject: [PATCH] fix: prevent duplicate listings during publish retry loop (#875) --- src/kleinanzeigen_bot/__init__.py | 30 +++++++++++++++ .../resources/translations.de.yaml | 3 ++ tests/unit/test_init.py | 38 ++++++++++++++++++- 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py index 0339159..1ca0048 100644 --- a/src/kleinanzeigen_bot/__init__.py +++ b/src/kleinanzeigen_bot/__init__.py @@ -1572,6 +1572,14 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 success = False # Retry loop only for publish_ad (before submission completes) + # Fetch a fresh baseline right before the retry loop to avoid stale state + # from earlier successful publishes in multi-ad runs (see #874) + try: + pre_publish_ads = await self._fetch_published_ads() + ads_before_publish:set[str] = {str(x["id"]) for x in pre_publish_ads if x.get("id")} + except Exception as ex: # noqa: BLE001 + LOG.warning("Could not fetch fresh published-ads baseline for '%s': %s. Falling back to initial snapshot.", ad_cfg.title, ex) + ads_before_publish = {str(x["id"]) for x in published_ads if x.get("id")} for attempt in range(1, max_retries + 1): try: await self.publish_ad(ad_file, ad_cfg, ad_cfg_orig, published_ads, AdUpdateStrategy.REPLACE) @@ -1582,6 +1590,28 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 except (TimeoutError, ProtocolException) as ex: await self._capture_publish_error_diagnostics_if_enabled(ad_cfg, ad_cfg_orig, ad_file, attempt, ex) if attempt < max_retries: + # Before retrying, check if the ad was already created despite the error. + # A partially successful submission followed by a retry would create a duplicate listing, + # which violates kleinanzeigen.de terms of service and can lead to account suspension. + try: + current_ads = await self._fetch_published_ads() + current_ad_ids = {str(x["id"]) for x in current_ads if x.get("id")} + new_ad_ids = current_ad_ids - ads_before_publish + if new_ad_ids: + LOG.warning( + "Attempt %s/%s failed for '%s': %s. " + "However, a new ad was detected (id: %s) -- aborting retries to prevent duplicates.", + attempt, max_retries, ad_cfg.title, ex, ", ".join(new_ad_ids) + ) + failed_count += 1 + break + except Exception as verify_ex: # noqa: BLE001 + LOG.warning( + "Could not verify published ads after failed attempt for '%s': %s -- aborting retries to prevent duplicates.", + ad_cfg.title, verify_ex, + ) + failed_count += 1 + break LOG.warning("Attempt %s/%s failed for '%s': %s. Retrying...", attempt, max_retries, ad_cfg.title, ex) await self.web_sleep(2) # Wait before retry else: diff --git a/src/kleinanzeigen_bot/resources/translations.de.yaml b/src/kleinanzeigen_bot/resources/translations.de.yaml index 8d99c07..d38e905 100644 --- a/src/kleinanzeigen_bot/resources/translations.de.yaml +++ b/src/kleinanzeigen_bot/resources/translations.de.yaml @@ -154,6 +154,9 @@ kleinanzeigen_bot/__init__.py: "Skipping because ad is reserved": "Überspringen, da Anzeige reserviert ist" " -> Could not confirm publishing for '%s', but ad may be online": " -> Veröffentlichung für '%s' konnte nicht bestätigt werden, aber Anzeige ist möglicherweise online" "Attempt %s/%s failed for '%s': %s. Retrying...": "Versuch %s/%s fehlgeschlagen für '%s': %s. Erneuter Versuch..." + "Attempt %s/%s failed for '%s': %s. However, a new ad was detected (id: %s) -- aborting retries to prevent duplicates.": "Versuch %s/%s fehlgeschlagen für '%s': %s. Jedoch wurde eine neue Anzeige erkannt (ID: %s) -- Wiederholungen werden abgebrochen, um Duplikate zu vermeiden." + "Could not fetch fresh published-ads baseline for '%s': %s. Falling back to initial snapshot.": "Konnte keine aktuelle Anzeigen-Baseline für '%s' abrufen: %s. Verwende initialen Snapshot." + "Could not verify published ads after failed attempt for '%s': %s -- aborting retries to prevent duplicates.": "Veröffentlichte Anzeigen konnten nach fehlgeschlagenem Versuch für '%s' nicht geprüft werden: %s -- Wiederholungen werden abgebrochen, um Duplikate zu vermeiden." "All %s attempts failed for '%s': %s. Skipping ad.": "Alle %s Versuche fehlgeschlagen für '%s': %s. Überspringe Anzeige." "DONE: (Re-)published %s (%s failed after retries)": "FERTIG: %s (erneut) veröffentlicht (%s fehlgeschlagen nach Wiederholungen)" "DONE: (Re-)published %s": "FERTIG: %s (erneut) veröffentlicht" diff --git a/tests/unit/test_init.py b/tests/unit/test_init.py index 18e90c3..b7546ef 100644 --- a/tests/unit/test_init.py +++ b/tests/unit/test_init.py @@ -1007,12 +1007,46 @@ class TestKleinanzeigenBotBasics: ): await test_bot.publish_ads(ad_cfgs) - # With pagination, the URL now includes pageNum parameter - web_request_mock.assert_awaited_once_with(f"{test_bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") + # web_request is called twice: once for initial fetch, once for pre-retry-loop baseline + expected_url = f"{test_bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1" + assert web_request_mock.await_count == 2 + web_request_mock.assert_any_await(expected_url) publish_ad_mock.assert_awaited_once_with("ad.yaml", ad_cfgs[0][1], {}, [], AdUpdateStrategy.REPLACE) web_await_mock.assert_awaited_once() delete_ad_mock.assert_awaited_once_with(ad_cfgs[0][1], [], delete_old_ads_by_title = False) + @pytest.mark.asyncio + async def test_publish_ads_aborts_retry_on_duplicate_detection( + self, + test_bot:KleinanzeigenBot, + base_ad_config:dict[str, Any], + mock_page:MagicMock, + ) -> None: + """Ensure retries are aborted when a new ad is detected after a failed attempt to prevent duplicates.""" + test_bot.page = mock_page + + ad_cfg = Ad.model_validate(base_ad_config) + ad_cfg_orig = copy.deepcopy(base_ad_config) + ad_file = "ad.yaml" + + # 1st _fetch_published_ads call (initial, before loop): no ads + # 2nd call (fresh baseline, before retry loop): no ads + # 3rd call (after first failed attempt): a new ad appeared — duplicate detected + fetch_responses = [ + {"content": json.dumps({"ads": []})}, # initial fetch + {"content": json.dumps({"ads": []})}, # fresh baseline + {"content": json.dumps({"ads": [{"id": "99999", "state": "active"}]})}, # duplicate detected + ] + + with ( + patch.object(test_bot, "web_request", new_callable = AsyncMock, side_effect = fetch_responses), + patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("image upload timeout")) as publish_mock, + ): + await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)]) + + # publish_ad should have been called only once — retry was aborted due to duplicate detection + assert publish_mock.await_count == 1 + def test_get_root_url(self, test_bot:KleinanzeigenBot) -> None: """Test root URL retrieval.""" assert test_bot.root_url == "https://www.kleinanzeigen.de"